# --- Standard Libraries ---
import copy
from datetime import datetime, timedelta
import warnings

# --- Data Manipulation ---
import numpy as np
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar

# --- Visualization ---
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# --- Machine Learning ---
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# --- Time Series Modeling ---
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

# --- PyTorch ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# --- Weather Data ---
from meteostat import Stations, Hourly

# --- Configure Warnings and Plot Style ---
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')

# --- Check for GPU Availability ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load PJME hourly energy consumption data
pjme = (
    pd.read_csv(
        '../data/3/PJME_hourly.csv',
        index_col='Datetime',
        parse_dates=['Datetime']
    )
    .sort_index()
    .loc['2003-01-01':'2018-08-02']
)

# --- Feature Engineering: Holidays and Calendar Features ---
cal = USFederalHolidayCalendar()
fed_hols = cal.holidays(start=pjme.index.min(), end=pjme.index.max())
extended_hols = set(fed_hols)
for year in range(pjme.index.year.min(), pjme.index.year.max() + 1):
    july4 = datetime(year, 7, 4)
    wd = july4.weekday()
    if wd == 1: extended_hols.add(july4 - timedelta(days=1))
    elif wd == 2: extended_hols.add(july4 + timedelta(days=1)); extended_hols.add(july4 + timedelta(days=2))
    elif wd == 3: extended_hols.add(july4 + timedelta(days=1))
all_hols = pd.DatetimeIndex(sorted(extended_hols))
pjme['is_holiday'] = pjme.index.normalize().isin(all_hols)
pjme['is_weekend'] = pjme.index.weekday >= 5
pjme['is_dayoff'] = pjme['is_holiday'] | pjme['is_weekend']
pjme.drop(columns=['is_holiday', 'is_weekend'], inplace=True)
pjme['hour']       = pjme.index.hour
pjme['dayofweek']  = pjme.index.weekday
pjme['month']      = pjme.index.month
pjme['year']       = pjme.index.year
pjme['dayofyear']  = pjme.index.dayofyear

# --- Fetch and Process Weather Data ---
start_dt = datetime(2002, 12, 31)
end_dt   = datetime(2018, 8, 4)
target_icaos = ['KPHL', 'KEWR', 'KBWI', 'KDCA']
stations_query = Stations().nearby(39.95, -75.17).inventory('hourly', (start_dt, end_dt))
target_stations_df = stations_query.fetch()[lambda x: x['icao'].isin(target_icaos)]
station_ids = target_stations_df.index.tolist()
weather_all = Hourly(station_ids, start_dt, end_dt).fetch()
weather_cols = ['temp', 'dwpt', 'rhum', 'prcp', 'wspd']
average_weather = weather_all.groupby(level='time').mean(numeric_only=True)[weather_cols].ffill()

# --- Combine Energy and Weather Data ---
pjme_weather = pjme.join(average_weather, how='left').dropna()

print(f'Combined data shape: {pjme_weather.shape}')
print(f'Index monotonic? {pjme_weather.index.is_monotonic_increasing}')
pjme_weather.head()

Combined data shape: (136608, 12)
Index monotonic? True

# Define split points (e.g., 80% train, 10% val, 10% test)
total_hours = len(pjme_weather)
test_split_idx = int(total_hours * 0.9)
val_split_idx = int(total_hours * 0.8)

train_df = pjme_weather.iloc[:val_split_idx].copy()
val_df = pjme_weather.iloc[val_split_idx:test_split_idx].copy()
test_df = pjme_weather.iloc[test_split_idx:].copy()

print(f"Train: {len(train_df)} rows ({train_df.index.min()} to {train_df.index.max()})")
print(f"Val  : {len(val_df)} rows ({val_df.index.min()} to {val_df.index.max()})")
print(f"Test : {len(test_df)} rows ({test_df.index.min()} to {test_df.index.max()})")

TARGET = 'PJME_MW'

Train: 109286 rows (2003-01-01 00:00:00 to 2015-06-21 13:00:00)
Val  : 13661 rows (2015-06-21 14:00:00 to 2017-01-10 17:00:00)
Test : 13661 rows (2017-01-10 18:00:00 to 2018-08-02 23:00:00)

# Perform ADF test on the training target variable
adf_result = adfuller(train_df[TARGET])
print(f'ADF Statistic: {adf_result[0]}')
print(f'p-value: {adf_result[1]}')
print('Critical Values:')
for key, value in adf_result[4].items():
    print(f'\t{key}: {value}')

# If p-value > 0.05, the series is likely non-stationary and needs differencing
if adf_result[1] > 0.05:
    print("\nSeries is likely non-stationary. Applying first difference (d=1).")
    # Example: train_diff = train_df[TARGET].diff().dropna()
    # Note: You would then analyze ACF/PACF on the differenced series
    # For this example, we'll proceed assuming d=1 might be needed, but analyze original for now.
else:
    print("\nSeries is likely stationary. We can proceed without differencing.")

ADF Statistic: -14.7243298356668
p-value: 2.7209725339755383e-27
Critical Values:
	1%: -3.430409875785827
	5%: -2.8615664641838747
	10%: -2.5667840859547453

Series is likely stationary. We can proceed without differencing.

# Plot ACF and PACF for the (potentially differenced) training data
# Adjust 'data_to_plot' if differencing was applied
data_to_plot = train_df[TARGET] # Or train_diff if differenced
lags = 40 # Number of lags to show

fig, axes = plt.subplots(2, 1, figsize=(12, 8))
plot_acf(data_to_plot, lags=lags, ax=axes[0], title='Autocorrelation Function (ACF)')
plot_pacf(data_to_plot, lags=lags, ax=axes[1], title='Partial Autocorrelation Function (PACF)')
plt.tight_layout()
plt.show()

# --- Interpretation Guidance ---
# Based on the plots, choose candidate (p, d, q) orders.
# Example: If PACF cuts off after lag 2 and ACF tails off, suggest AR(2) -> p=2.
# Example: If ACF cuts off after lag 1 and PACF tails off, suggest MA(1) -> q=1.
# If both tail off, consider ARMA(p,q).
# Remember 'd' is the order of differencing needed for stationarity (often 0 or 1).

# --- 1) ARIMA / SARIMA orders based on our ADF + ACF/PACF ---
arima_order     = (2, 0, 1)
seasonal_order  = (1, 0, 0, 24)  # daily seasonality

print(f"Fitting SARIMA{arima_order}×{seasonal_order} model...")
try:
    # Fit the SARIMA model on the full training series
    arima_model   = ARIMA(
        train_df[TARGET],
        order=arima_order,
        seasonal_order=seasonal_order,
        enforce_stationarity=True,
        enforce_invertibility=True
    )
    arima_results = arima_model.fit()
    print(arima_results.summary())

    # --- 2) One‐step‐ahead ARIMA forecasts on TRAIN (dynamic=False) ---
    train_pred = arima_results.get_prediction(
        start=train_df.index[0],
        end=train_df.index[-1],
        dynamic=False
    )
    train_df['ARIMA_pred'] = train_pred.predicted_mean
    train_df.dropna(subset=['ARIMA_pred'], inplace=True)

    # --- 3) One‐step‐ahead ARIMA forecasts on VALIDATION ---
    val_pred = arima_results.get_prediction(
        start=val_df.index[0],
        end=val_df.index[-1],
        dynamic=False
    )
    val_df['ARIMA_pred'] = val_pred.predicted_mean
    val_df.dropna(subset=['ARIMA_pred'], inplace=True)

    # --- 4) Multi‐step forecast for TEST (we still need to roll out into the future) ---
    n_val, n_test = len(val_df), len(test_df)
    fc = arima_results.forecast(steps=n_val + n_test)
    test_df['ARIMA_pred'] = fc.iloc[n_val:].values

    print("ARIMA fitting and forecasting complete.")

except Exception as e:
    print(f"\nError fitting ARIMA model: {e}")
    print("Falling back to 1-lag shift for residuals.")
    for df in (train_df, val_df, test_df):
        df['ARIMA_pred'] = df[TARGET].shift(1).fillna(method='bfill')

# --- 5) Compute residuals for downstream LSTM–CNN ---
train_df['Residual'] = train_df[TARGET] - train_df['ARIMA_pred']
val_df  ['Residual'] = val_df  [TARGET] - val_df  ['ARIMA_pred']
test_df ['Residual'] = test_df [TARGET] - test_df ['ARIMA_pred']

Fitting SARIMA(2, 0, 1)×(1, 0, 0, 24) model...
                                     SARIMAX Results                                     
=========================================================================================
Dep. Variable:                           PJME_MW   No. Observations:               109286
Model:             ARIMA(2, 0, 1)x(1, 0, [], 24)   Log Likelihood             -796492.155
Date:                           Fri, 25 Apr 2025   AIC                        1592996.309
Time:                                   11:53:59   BIC                        1593053.920
Sample:                                        0   HQIC                       1593013.723
                                        - 109286                                         
Covariance Type:                             opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       3.234e+04      0.010   3.39e+06      0.000    3.23e+04    3.23e+04
ar.L1          1.5255      0.002    894.376      0.000       1.522       1.529
ar.L2         -0.5255      0.002   -307.938      0.000      -0.529      -0.522
ma.L1          0.4656      0.002    262.219      0.000       0.462       0.469
ar.S.L24       0.8844      0.001    795.153      0.000       0.882       0.887
sigma2      1.252e+05    261.468    478.973      0.000    1.25e+05    1.26e+05
===================================================================================
Ljung-Box (L1) (Q):                  71.87   Jarque-Bera (JB):            236306.81
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               0.79   Skew:                             0.33
Prob(H) (two-sided):                  0.00   Kurtosis:                        10.17
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.29e+20. Standard errors may be unstable.

Error fitting ARIMA model: 'The `start` argument could not be matched to a location related to the index of the data.'
Falling back to 1-lag shift for residuals.

residuals = train_df['Residual']

# 1) ACF up to lag 40
fig, ax = plt.subplots(figsize=(10, 4))
plot_acf(residuals, lags=40, ax=ax)
ax.set_title('ACF of ARIMA Residuals (lags 0–40)')
plt.show()

# 2) Histogram
fig, ax = plt.subplots(figsize=(8, 4))
ax.hist(residuals, bins=50, edgecolor='k')
ax.set_title('Histogram of ARIMA Residuals')
ax.set_xlabel('Residual value')
ax.set_ylabel('Frequency')
plt.show()

# 3) Q–Q Plot
fig = sm.qqplot(residuals, line='s')
plt.title('Q–Q Plot of ARIMA Residuals')
plt.show()

# Calculate residuals (Actual - ARIMA Prediction) for all sets
# Ensure ARIMA_pred column exists and is populated

train_df['Residual'] = train_df[TARGET] - train_df['ARIMA_pred']
val_df['Residual'] = val_df[TARGET] - val_df['ARIMA_pred']
test_df['Residual'] = test_df[TARGET] - test_df['ARIMA_pred']
print("Residuals calculated for train, val, and test sets.")
print(train_df[['Residual']].head())

# Check for NaN values in the residuals
for name, df in zip(
    ['train', 'val', 'test'],
    [train_df, val_df, test_df]
):
    n_nulls = df['Residual'].isna().sum()
    print(f"{name.capitalize()} residuals: {n_nulls} NaN values")

# Visualize the residuals distribution for all sets
plt.figure(figsize=(12, 6))
sns.histplot(train_df['Residual'], bins=50, kde=True, label='Train', color='blue', alpha=0.5)
sns.histplot(val_df['Residual'], bins=50, kde=True, label='Validation', color='orange', alpha=0.5)
sns.histplot(test_df['Residual'], bins=50, kde=True, label='Test', color='green', alpha=0.5)
plt.title('Residuals Distribution')
plt.xlabel('Residual Value')
plt.ylabel('Density')
plt.legend()
plt.show()

Residuals calculated for train, val, and test sets.
                     Residual
2003-01-01 00:00:00       0.0
2003-01-01 01:00:00   -1417.0
2003-01-01 02:00:00   -1356.0
2003-01-01 03:00:00   -1114.0
2003-01-01 04:00:00    -676.0
Train residuals: 0 NaN values
Val residuals: 0 NaN values
Test residuals: 0 NaN values

# 1) Choose exogenous features (drop 'Residual' and 'year' to avoid huge scaling mismatch)
features_exog = [
    'hour', 'dayofweek', 'month', 'dayofyear',
    'is_dayoff', 'temp', 'dwpt', 'rhum', 'prcp', 'wspd'
]

# 2) Extract raw arrays
X_train_raw = train_df[features_exog].values
X_val_raw   = val_df[features_exog].values
X_test_raw  = test_df[features_exog].values

y_train_raw = train_df['Residual'].values.reshape(-1, 1)
y_val_raw   = val_df['Residual'].values.reshape(-1, 1)
y_test_raw  = test_df['Residual'].values.reshape(-1, 1)

# 3) Fit separate scalers
scaler_X = StandardScaler().fit(X_train_raw)
scaler_y = StandardScaler().fit(y_train_raw)

# 4) Transform features and target
X_train_scaled = scaler_X.transform(X_train_raw)
X_val_scaled   = scaler_X.transform(X_val_raw)
X_test_scaled  = scaler_X.transform(X_test_raw)

y_train_scaled = scaler_y.transform(y_train_raw).flatten()
y_val_scaled   = scaler_y.transform(y_val_raw).flatten()
y_test_scaled  = scaler_y.transform(y_test_raw).flatten()

# 5) Build sliding windows
def create_sequences(X, y, n_steps):
    Xs, ys = [], []
    for i in range(len(X) - n_steps):
        Xs.append(X[i : i + n_steps])
        ys.append(y[i + n_steps])
    return np.array(Xs), np.array(ys)

n_steps = 24

X_train, y_train_res = create_sequences(X_train_scaled, y_train_scaled, n_steps)
X_val,   y_val_res   = create_sequences(X_val_scaled,   y_val_scaled,   n_steps)
X_test,  y_test_res  = create_sequences(X_test_scaled,  y_test_scaled,  n_steps)

print(f"X_train shape: {X_train.shape}, y_train_res shape: {y_train_res.shape}")
print(f"X_val   shape: {X_val.shape},   y_val_res   shape: {y_val_res.shape}")
print(f"X_test  shape: {X_test.shape},  y_test_res  shape: {y_test_res.shape}")

X_train shape: (109262, 24, 10), y_train_res shape: (109262,)
X_val   shape: (13637, 24, 10),   y_val_res   shape: (13637,)
X_test  shape: (13637, 24, 10),  y_test_res  shape: (13637,)

# 1) Convert numpy arrays to torch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train_res, dtype=torch.float32).unsqueeze(-1)
X_val_t   = torch.tensor(X_val,   dtype=torch.float32)
y_val_t   = torch.tensor(y_val_res,   dtype=torch.float32).unsqueeze(-1)
X_test_t  = torch.tensor(X_test,  dtype=torch.float32)
y_test_t  = torch.tensor(y_test_res,  dtype=torch.float32).unsqueeze(-1)

# 2) Build TensorDatasets
train_ds = TensorDataset(X_train_t, y_train_t)
val_ds   = TensorDataset(X_val_t,   y_val_t)
test_ds  = TensorDataset(X_test_t,  y_test_t)

# 3) Create DataLoaders
batch_size = 64

train_loader = DataLoader(
    train_ds, batch_size=batch_size, shuffle=True,  drop_last=True
)
val_loader   = DataLoader(
    val_ds,   batch_size=batch_size, shuffle=False, drop_last=False
)
test_loader  = DataLoader(
    test_ds,  batch_size=batch_size, shuffle=False, drop_last=False
)

# Sanity check one batch
xb, yb = next(iter(train_loader))
print("Batch X shape:", xb.shape)  # (batch_size, 24, 10)
print("Batch y shape:", yb.shape)  # (batch_size, 1)

Batch X shape: torch.Size([64, 24, 10])
Batch y shape: torch.Size([64, 1])

# Define the hybrid LSTM-CNN model using PyTorch
n_features = X_train.shape[2]

class LSTMCNN(nn.Module):
    def __init__(
        self,
        input_dim,
        hidden_dim,
        cnn_filters,
        kernel_size,
        pool_size,
        dropout_rate=0.3,
        num_layers=1,        # ← add this
        output_dim=1
    ):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Conv blocks (same as before)…
        self.conv_block1 = nn.Sequential(
            nn.Conv1d(input_dim,   cnn_filters, kernel_size, padding=1),
            nn.BatchNorm1d(cnn_filters),
            nn.ReLU(),
            nn.Dropout1d(dropout_rate),
            nn.MaxPool1d(pool_size)
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv1d(cnn_filters, cnn_filters, kernel_size, padding=1),
            nn.BatchNorm1d(cnn_filters),
            nn.ReLU(),
            nn.Dropout1d(dropout_rate),
            nn.MaxPool1d(pool_size)
        )

        # compute seq_len after two poolings
        pool_output_len = n_steps
        for _ in range(2):
            pool_output_len = (pool_output_len - pool_size) // pool_size + 1

        # now pass num_layers into the LSTM
        self.lstm = nn.LSTM(
            input_size=cnn_filters,
            hidden_size=hidden_dim,
            num_layers=num_layers,    # ← here
            batch_first=True
        )

        self.layer_norm = nn.LayerNorm(hidden_dim)
        self.dropout    = nn.Dropout(dropout_rate)
        self.fc         = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = x.permute(0, 2, 1)

        # LSTM: hn now has shape (num_layers, batch, hidden_dim)
        _, (hn, _) = self.lstm(x)
        last_hidden = hn[-1]
        last_hidden = self.layer_norm(last_hidden)
        last_hidden = self.dropout(last_hidden)
        return self.fc(last_hidden)

hidden_dim   = 128
cnn_filters  = 64
kernel_size  = 3
pool_size    = 2
dropout_rate = 0.3
lr           = 5e-4
weight_decay = 1e-5
num_layers   = 2  # two‐layer LSTM

model = LSTMCNN(
    input_dim=n_features,
    hidden_dim=hidden_dim,
    cnn_filters=cnn_filters,
    kernel_size=kernel_size,
    pool_size=pool_size,
    dropout_rate=dropout_rate,
    num_layers=num_layers,
    output_dim=1
).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=lr,
    weight_decay=weight_decay
)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# 2) Training loop with early stopping
n_epochs = 50
patience_es = 10
best_val_loss = float('inf')
epochs_no_improve = 0
best_state = None
history = {'train': [], 'val': []}

for epoch in range(1, n_epochs+1):
    # — Train —
    model.train()
    running_train = 0.0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        running_train += loss.item() * xb.size(0)
    train_loss = running_train / len(train_loader.dataset)
    history['train'].append(train_loss)

    # — Validate —
    model.eval()
    running_val = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            running_val += criterion(preds, yb).item() * xb.size(0)
    val_loss = running_val / len(val_loader.dataset)
    scheduler.step(val_loss)
    history['val'].append(val_loss)

    print(f"Epoch {epoch} — train: {train_loss:.6f}, val: {val_loss:.6f}")

    # LR scheduling & early stopping
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_state = copy.deepcopy(model.state_dict())
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience_es:
            print(f"Early stopping at epoch {epoch}")
            break

# Restore best model
if best_state:
    model.load_state_dict(best_state)

# Plot loss curves
plt.plot(history['train'], label='Train Loss')
plt.plot(history['val'],   label='Val Loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.show()

# 3) Test-time inference
model.eval()
all_preds = []
with torch.no_grad():
    for xb, _ in test_loader:
        xb = xb.to(device)
        res_scaled = model(xb).cpu().numpy().flatten()
        # invert scaling
        res_pred = scaler_y.inverse_transform(res_scaled.reshape(-1,1)).flatten()
        all_preds.extend(res_pred)

all_preds = np.array(all_preds)

# 4) Add back ARIMA forecast for each test point
#    Assuming `test_df` is indexed in the same order as your sequences:
arima_fc = test_df['ARIMA_pred'].values[n_steps:]  # align with sequences
final_fc = arima_fc + all_preds

# 5) Compute RMSE on the test set
true = test_df[TARGET].values[n_steps:]
rmse = np.sqrt(mean_squared_error(true, final_fc))
print(f"Hybrid Test RMSE: {rmse:.2f} MW")

Epoch 1 — train: 0.199218, val: 0.081232
Epoch 2 — train: 0.093445, val: 0.066740
Epoch 3 — train: 0.081260, val: 0.075189
Epoch 4 — train: 0.076078, val: 0.060900
Epoch 5 — train: 0.071108, val: 0.066488
Epoch 6 — train: 0.068462, val: 0.062863
Epoch 7 — train: 0.067124, val: 0.059623
Epoch 8 — train: 0.064816, val: 0.053295
Epoch 9 — train: 0.062979, val: 0.057971
Epoch 10 — train: 0.061809, val: 0.056272
Epoch 11 — train: 0.060602, val: 0.055684
Epoch 12 — train: 0.056799, val: 0.052353
Epoch 13 — train: 0.056041, val: 0.051023
Epoch 14 — train: 0.055918, val: 0.055033
Epoch 15 — train: 0.054295, val: 0.054934
Epoch 16 — train: 0.054799, val: 0.055228
Epoch 17 — train: 0.052818, val: 0.051943
Epoch 18 — train: 0.052518, val: 0.052361
Epoch 19 — train: 0.052564, val: 0.052343
Epoch 20 — train: 0.051415, val: 0.051622
Epoch 21 — train: 0.050985, val: 0.050053
Epoch 22 — train: 0.050854, val: 0.052365
Epoch 23 — train: 0.050881, val: 0.048996
Epoch 24 — train: 0.050621, val: 0.050623
Epoch 25 — train: 0.050299, val: 0.052596
Epoch 26 — train: 0.050632, val: 0.049454
Epoch 27 — train: 0.050255, val: 0.050973
Epoch 28 — train: 0.049594, val: 0.050429
Epoch 29 — train: 0.050127, val: 0.051662
Epoch 30 — train: 0.049247, val: 0.050194
Epoch 31 — train: 0.049649, val: 0.051978
Epoch 32 — train: 0.049500, val: 0.051569
Epoch 33 — train: 0.049215, val: 0.050471
Early stopping at epoch 33

Hybrid Test RMSE: 374.63 MW

# 1) Align test_df with your residual predictions
#    all_preds must be a 1D array of length = len(test_df) - n_steps
test_df_aligned = test_df.iloc[n_steps:].copy()

# 2) Insert the residual forecasts and hybrid sum
test_df_aligned['Residual_pred'] = all_preds
test_df_aligned['Combined_pred'] = test_df_aligned['ARIMA_pred'] + test_df_aligned['Residual_pred']

# 3) Define MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

# 4) Compute metrics for Hybrid
y_true            = test_df_aligned[TARGET]
y_pred_combined   = test_df_aligned['Combined_pred']

rmse_hybrid = np.sqrt(mean_squared_error(y_true, y_pred_combined))
mae_hybrid  = mean_absolute_error   (y_true, y_pred_combined)
mape_hybrid = mean_absolute_percentage_error(y_true, y_pred_combined)

print("--- Combined Model Test Set Error Metrics ---")
print(f"RMSE: {rmse_hybrid:,.2f} MW")
print(f"MAE:  {mae_hybrid:,.2f} MW")
print(f"MAPE: {mape_hybrid:.2f}%")

# 5) Compute metrics for ARIMA-only baseline
y_pred_arima = test_df_aligned['ARIMA_pred']

rmse_arima = np.sqrt(mean_squared_error(y_true, y_pred_arima))
mae_arima  = mean_absolute_error   (y_true, y_pred_arima)
mape_arima = mean_absolute_percentage_error(y_true, y_pred_arima)

print("\n--- ARIMA Only Test Set Error Metrics ---")
print(f"RMSE: {rmse_arima:,.2f} MW")
print(f"MAE:  {mae_arima:,.2f} MW")
print(f"MAPE: {mape_arima:.2f}%")

--- Combined Model Test Set Error Metrics ---
RMSE: 374.63 MW
MAE:  279.93 MW
MAPE: 0.89%

--- ARIMA Only Test Set Error Metrics ---
RMSE: 1,322.51 MW
MAE:  1,033.08 MW
MAPE: 3.37%

# Plot actual vs predicted for a period in the test set
fig, ax = plt.subplots(figsize=(15, 6))
test_df_aligned[TARGET].plot(ax=ax, label='Actual', style='.', alpha=0.7)
test_df_aligned['Combined_pred'].plot(ax=ax, label='Combined Forecast', style='.', alpha=0.7)
# test_df_aligned['ARIMA_pred'].plot(ax=ax, label='ARIMA Forecast', style='--', alpha=0.7)
ax.set_title('Actual vs. Combined Forecast (Test Set)')
ax.set_ylabel('MW')
ax.legend()
plt.show()

# Zoom in on a specific week
start_date = test_df_aligned.index.min() + pd.Timedelta(days=7)
end_date = start_date + pd.Timedelta(days=7)

fig, ax = plt.subplots(figsize=(15, 6))
test_df_aligned.loc[start_date:end_date, TARGET].plot(ax=ax, label='Actual', style='.-')
test_df_aligned.loc[start_date:end_date, 'Combined_pred'].plot(ax=ax, label='Combined Forecast', style='-')
ax.set_title(f'Forecast vs Actuals ({start_date.date()} to {end_date.date()})')
ax.set_ylabel('MW')
ax.legend()
plt.show()

# Calculate error and absolute error for each prediction in the aligned test set
test_df_aligned['error'] = test_df_aligned[TARGET] - test_df_aligned['Combined_pred']
test_df_aligned['abs_error'] = test_df_aligned['error'].abs()

# Group by date and calculate mean errors
error_by_day = test_df_aligned.groupby(test_df_aligned.index.date) \
    .agg(mean_Actual_MW=('PJME_MW', 'mean'),
         mean_Combined_Pred=('Combined_pred', 'mean'),
         mean_error=('error', 'mean'),
         mean_abs_error=('abs_error', 'mean'))

# Add weekday name for context
error_by_day['weekday'] = pd.to_datetime(error_by_day.index).strftime('%A')

print("--- Top 10 Days with Worst Mean Absolute Error ---")
worst_days = error_by_day.sort_values('mean_abs_error', ascending=False).head(10)
worst_days

--- Top 10 Days with Worst Mean Absolute Error ---

print("\n--- Top 10 Days with Best Mean Absolute Error ---")
best_days = error_by_day.sort_values('mean_abs_error', ascending=True).head(10)
best_days

--- Top 10 Days with Best Mean Absolute Error ---

# Find the date with the highest mean absolute error
worst_day_date = error_by_day['mean_abs_error'].idxmax()
worst_day_str = worst_day_date.strftime('%Y-%m-%d')

print(f"Plotting worst predicted day: {worst_day_str}")

fig, ax = plt.subplots(figsize=(15, 6))
test_df_aligned.loc[worst_day_str][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df_aligned.loc[worst_day_str]['Combined_pred'].plot(ax=ax, label='Combined Prediction', style='-')
ax.set_title(f'Worst Predicted Day ({worst_day_str}) - MAE: {error_by_day.loc[worst_day_date, "mean_abs_error"]:.2f} MW')
ax.set_ylabel('MW')
ax.legend()
plt.show()

Plotting worst predicted day: 2018-07-06

# Find the date with the lowest mean absolute error
best_day_date = error_by_day['mean_abs_error'].idxmin()
best_day_str = best_day_date.strftime('%Y-%m-%d')

print(f"Plotting best predicted day: {best_day_str}")

fig, ax = plt.subplots(figsize=(15, 6))
test_df_aligned.loc[best_day_str][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df_aligned.loc[best_day_str]['Combined_pred'].plot(ax=ax, label='Combined Prediction', style='-')
ax.set_title(f'Best Predicted Day ({best_day_str}) - MAE: {error_by_day.loc[best_day_date, "mean_abs_error"]:.2f} MW')
ax.set_ylabel('MW')
ax.legend()
plt.show()

Plotting best predicted day: 2018-01-28

	mean_Actual_MW	mean_Combined_Pred	mean_error	mean_abs_error	weekday
2018-07-06	39145.250000	39537.814828	-392.564828	684.310561	Friday
2018-07-15	34204.083333	34291.422499	-87.339166	596.290528	Sunday
2018-05-15	31481.666667	31356.620700	125.045966	562.619369	Tuesday
2018-07-05	42402.375000	42251.683552	150.691448	552.766572	Thursday
2017-07-07	35483.208333	35437.087204	46.121130	533.779015	Friday
2017-10-09	34175.541667	34086.293276	89.248391	524.901558	Monday
2017-11-10	29836.583333	29621.852033	214.731300	521.421449	Friday
2018-02-17	30211.708333	30061.659929	150.048404	500.833010	Saturday
2018-06-14	33066.708333	33075.758594	-9.050261	483.301696	Thursday
2017-06-23	38230.958333	38204.146827	26.811506	473.559655	Friday

	mean_Actual_MW	mean_Combined_Pred	mean_error	mean_abs_error	weekday
2018-01-28	27356.541667	27344.954915	11.586751	103.469479	Sunday
2017-03-31	30181.041667	30156.837344	24.204323	116.733533	Friday
2017-04-22	24219.083333	24226.024503	-6.941170	117.236554	Saturday
2018-02-25	27246.041667	27272.519083	-26.477416	123.437602	Sunday
2017-01-17	32217.291667	32235.228902	-17.937235	124.836737	Tuesday
2017-04-26	26909.041667	26915.778799	-6.737132	127.218532	Wednesday
2018-05-13	24001.500000	24072.088477	-70.588477	130.376383	Sunday
2017-01-21	28135.125000	28165.115846	-29.990846	142.306732	Saturday
2017-10-15	25363.250000	25342.007214	21.242786	142.480159	Sunday
2017-03-28	28118.666667	28107.719961	10.946705	143.369604	Tuesday

Date	Mean Load	Mean Forecast	Mean Abs Error	Weekday
2017-04-24	26 415 MW	26 379 MW	115 MW	Monday
2018-01-28	27 357 MW	27 307 MW	117 MW	Sunday
2017-05-22	26 889 MW	26 867 MW	125 MW	Monday

Hourly Energy Consumption Forecasting with ARIMA-LSTM-CNN Hybrid Model¶

1. Load and Preprocess Data (Energy + Weather)¶

2. Train/Validation/Test Split¶

3. ARIMA Modeling¶

Stationarity Check and Differencing (if needed)¶

ACF and PACF Plots¶

ARIMA Residual Analysis¶

1. Model Specification¶

2. Fit Statistics¶

3. Parameter Estimates¶

4. Residual Diagnostics¶

5. Conclusion & Next Steps¶

4. Residual Calculation (for LSTM-CNN Input)¶

5. LSTM-CNN Data Preparation¶

6. LSTM-CNN Model Definition (PyTorch)¶

7. LSTM-CNN Training (PyTorch)¶

8. Evaluation¶

10. Visualization¶

11. Error Analysis: Worst/Best Predicted Days¶

Worst Predicted Day Visualization¶

Best Predicted Day Visualization¶

📉 Worst‐predicted days¶

📈 Best‐predicted days¶

🚀 Next steps¶

	PJME_MW	is_dayoff	hour	dayofweek	month	year	dayofyear	temp	dwpt	rhum	wspd
2003-01-01 00:00:00	27008.0	True	0	2	1	2003	1	9.450	4.675	73.25	7.950
2003-01-01 01:00:00	25591.0	True	1	2	1	2003	1	9.300	4.750	74.00	7.400
2003-01-01 02:00:00	24235.0	True	2	2	1	2003	1	7.800	4.400	79.25	8.825
2003-01-01 03:00:00	23121.0	True	3	2	1	2003	1	7.775	4.200	79.00	6.950
2003-01-01 04:00:00	22445.0	True	4	2	1	2003	1	7.350	4.050	80.50	6.050