import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import shap
import warnings
warnings.filterwarnings("ignore")

plt.style.use('fivethirtyeight')

pjme = pd.read_csv('../data/3/PJME_hourly.csv', index_col='Datetime', parse_dates=['Datetime'])

# Remove 08-03-2018, it's the last day of the dataset and has incomplete data
pjme = pjme[pjme.index < '2018-08-03']

# Ensure DatetimeIndex and sort
pjme.index = pd.to_datetime(pjme.index)
pjme = pjme.sort_index()

# Verify
print(f'Data shape: {pjme.shape}')
print(f'Index monotonic? {pjme.index.is_monotonic_increasing}')
pjme.head()

Data shape: (145365, 1)
Index monotonic? True

color_pal = sns.color_palette() # Use seaborn's default palette
pjme.plot(style='.', figsize=(15,5), color=color_pal[0], title='PJM East Hourly Energy Consumption (MW)')
plt.ylabel('MW')
plt.show()

# Define split points based on time
total_hours = len(pjme)
test_split_point = int(total_hours * 0.9)
val_split_point = int(total_hours * 0.8)

test_split_date = pjme.index[test_split_point]
val_split_date = pjme.index[val_split_point]

# Perform the split using the calculated dates
test_df = pjme.loc[pjme.index >= test_split_date].copy()
val_df = pjme.loc[(pjme.index >= val_split_date) & (pjme.index < test_split_date)].copy()
train_df = pjme.loc[pjme.index < val_split_date].copy()

# Check proportions and sizes
total_len = len(pjme)
train_len = len(train_df)
val_len = len(val_df)
test_len = len(test_df)
print(f"Total: {total_len} rows")
print(f"Train: {train_len} rows ({train_len/total_len:.1%})")
print(f"Val  : {val_len} rows ({val_len/total_len:.1%})")
print(f"Test : {test_len} rows ({test_len/total_len:.1%})")

Total: 145365 rows
Train: 116292 rows (80.0%)
Val  : 14536 rows (10.0%)
Test : 14537 rows (10.0%)

# Verify the chronological split
print("\n--- Split Verification ---")
print(f"Original sorted?    {pjme.index.is_monotonic_increasing}")
print(f"Train ends at:    {train_df.index.max()}")
print(f"Val starts at:    {val_df.index.min()}")
print(f"Val ends at:      {val_df.index.max()}")
print(f"Test starts at:   {test_df.index.min()}")

# Confirm no overlap and chronological order within splits
assert train_df.index.max() < val_df.index.min()
assert val_df.index.max() < test_df.index.min()
assert train_df.index.is_monotonic_increasing
assert val_df.index.is_monotonic_increasing
assert test_df.index.is_monotonic_increasing
print("Split boundaries and ordering confirmed.")

--- Split Verification ---
Original sorted?    True
Train ends at:    2015-04-09 14:00:00
Val starts at:    2015-04-09 15:00:00
Val ends at:      2016-12-05 05:00:00
Test starts at:   2016-12-05 06:00:00
Split boundaries and ordering confirmed.

# Plot the splits
fig, ax = plt.subplots(figsize=(15, 5))
train_df['PJME_MW'].plot(ax=ax, label='Training Set', title='Data Train/Validation/Test Split', style='.')
val_df['PJME_MW'].plot(ax=ax, label='Validation Set', style='.')
test_df['PJME_MW'].plot(ax=ax, label='Test Set', style='.')
ax.axvline(val_split_date, color='black', ls='--')
ax.axvline(test_split_date, color='black', ls='--')
ax.legend()
plt.ylabel('MW')
plt.show()

def create_features(df, label=None):
    """
    Creates time series features from a datetime index.
    
    Args:
        df (pd.DataFrame): DataFrame with a DatetimeIndex.
        label (str, optional): Name of the target column. Defaults to None.
        
    Returns:
        pd.DataFrame or tuple(pd.DataFrame, pd.Series): 
            If label is None, returns DataFrame with features.
            If label is provided, returns tuple (feature DataFrame X, target Series y).
    """
    df = df.copy()
    df['hour'] = df.index.hour
    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['dayofmonth'] = df.index.day
    df['weekofyear'] = df.index.isocalendar().week.astype(int)
    
    X = df[['hour', 'dayofweek', 'quarter', 'month', 'year',
            'dayofyear', 'dayofmonth', 'weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X

TARGET = 'PJME_MW'
X_train, y_train = create_features(train_df, label=TARGET)
X_val, y_val = create_features(val_df, label=TARGET)
X_test, y_test = create_features(test_df, label=TARGET)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape:   {X_val.shape}, y_val shape:   {y_val.shape}")
print(f"X_test shape:  {X_test.shape}, y_test shape:  {y_test.shape}")

X_train shape: (116292, 8), y_train shape: (116292,)
X_val shape:   (14536, 8), y_val shape:   (14536,)
X_test shape:  (14537, 8), y_test shape:  (14537,)

# Instantiate the XGBoost regressor
reg = xgb.XGBRegressor(
    n_estimators=1000,
    early_stopping_rounds=50,
    learning_rate=0.01, # Lower learning rate often requires more estimators but can generalize better
    objective='reg:squarederror',
    n_jobs=-1,
    random_state=42
)

# Fit the model
print("Training XGBoost model...")
reg.fit(
    X_train, y_train,
    eval_set=[(X_train, y_train), (X_val, y_val)],
    verbose=100 # Print evaluation results every 100 rounds
)
print("Model training complete.")

Training XGBoost model...
[0]	validation_0-rmse:6389.23892	validation_1-rmse:6966.29632
[100]	validation_0-rmse:3911.96295	validation_1-rmse:4455.85693
[200]	validation_0-rmse:3241.13734	validation_1-rmse:3897.31998
[300]	validation_0-rmse:2999.51806	validation_1-rmse:3792.87927
[397]	validation_0-rmse:2845.39505	validation_1-rmse:3783.11235
Model training complete.

# Explain model predictions using SHAP
print("Calculating SHAP values...")
explainer = shap.TreeExplainer(reg)
shap_values = explainer.shap_values(X_val)
print("SHAP values calculated.")

# Plot SHAP summary plot (beeswarm)
print("Generating SHAP summary plot (beeswarm)...")
shap.summary_plot(shap_values, X_val, feature_names=X_val.columns)
plt.show()

# Plot SHAP summary plot (bar)
print("Generating SHAP summary plot (bar)...")
shap.summary_plot(shap_values, X_val, feature_names=X_val.columns, plot_type="bar")
plt.show()

Calculating SHAP values...
SHAP values calculated.
Generating SHAP summary plot (beeswarm)...

Generating SHAP summary plot (bar)...

# Make predictions on the test set
test_df['MW_Prediction'] = reg.predict(X_test)

# Combine train/val/test for plotting full series (add target back to train/val)
train_df_plot = train_df.copy()
train_df_plot[TARGET] = y_train
val_df_plot = val_df.copy()
val_df_plot[TARGET] = y_val
all_df = pd.concat([train_df_plot, val_df_plot, test_df], sort=False)

# Plot actuals vs predictions for the entire dataset (test predictions highlighted)
fig, ax = plt.subplots(figsize=(15, 6))
all_df[TARGET].plot(ax=ax, label='Actual', style='.', alpha=0.5, ms=2)
test_df['MW_Prediction'].plot(ax=ax, label='Prediction (Test Set)', style='-')
ax.set_title('Actual vs. Predicted Energy Consumption (Calendar Features Only)')
ax.set_ylabel('MW')
ax.legend()
plt.show()

# Plot the forecast with the actuals for the first week of December 2016
fig, ax = plt.subplots(figsize=(15, 6))
start_date = '2016-12-05'
end_date = '2016-12-12'
test_df.loc[start_date:end_date][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df.loc[start_date:end_date]['MW_Prediction'].plot(ax=ax, label='Prediction', style='-')
ax.set_title(f'Forecast vs Actuals ({start_date} to {end_date})')
ax.set_ylabel('MW')
ax.legend()
plt.show()

# Plot the forecast with the actuals for the first week of July 2017
fig, ax = plt.subplots(figsize=(15, 6))
start_date = '2017-07-01'
end_date = '2017-07-08'
test_df.loc[start_date:end_date][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df.loc[start_date:end_date]['MW_Prediction'].plot(ax=ax, label='Prediction', style='-')
ax.set_title(f'Forecast vs Actuals ({start_date} to {end_date})')
ax.set_ylabel('MW')
ax.legend()
plt.show()

def mean_absolute_percentage_error(y_true, y_pred):
    """Calculates MAPE given y_true and y_pred."""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    if not np.all(mask):
        print("Warning: Zero values found in y_true, MAPE calculation excludes these points.")
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

y_true_test = test_df[TARGET]
y_pred_test = test_df['MW_Prediction']

rmse = np.sqrt(mean_squared_error(y_true=y_true_test, y_pred=y_pred_test))
mae = mean_absolute_error(y_true=y_true_test, y_pred=y_pred_test)
mape = mean_absolute_percentage_error(y_true=y_true_test, y_pred=y_pred_test)

print("--- Test Set Error Metrics (Calendar Features Only) ---")
print(f"RMSE: {rmse:,.2f} MW")
print(f"MAE:  {mae:,.2f} MW")
print(f"MAPE: {mape:.2f}%")

--- Test Set Error Metrics (Calendar Features Only) ---
RMSE: 3,973.63 MW
MAE:  3,137.22 MW
MAPE: 10.24%

# Calculate error and absolute error
test_df['error'] = test_df[TARGET] - test_df['MW_Prediction']
test_df['abs_error'] = test_df['error'].abs()

# Group by date and calculate mean errors
error_by_day = test_df.groupby(test_df.index.date) \
    .agg(mean_PJME_MW=(TARGET, 'mean'),
         mean_MW_Prediction=('MW_Prediction', 'mean'),
         mean_error=('error', 'mean'),
         mean_abs_error=('abs_error', 'mean'))

# Add weekday name for context
error_by_day['weekday'] = pd.to_datetime(error_by_day.index).strftime('%A')

print("Top 10 Days with Worst Mean Absolute Error:")
error_by_day.sort_values('mean_abs_error', ascending=False).head(10)

Top 10 Days with Worst Mean Absolute Error:

print("\nTop 10 Days with Best Mean Absolute Error:")
error_by_day.sort_values('mean_abs_error', ascending=True).head(10)

Top 10 Days with Best Mean Absolute Error:

# Find and plot the worst day
worst_day = error_by_day['mean_abs_error'].idxmax()
worst_day_str = worst_day.strftime('%Y-%m-%d')

fig, ax = plt.subplots(figsize=(15, 6))
test_df.loc[worst_day_str][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df.loc[worst_day_str]['MW_Prediction'].plot(ax=ax, label='Prediction', style='-')
ax.set_title(f'Worst Predicted Day ({worst_day_str}) - MAE: {error_by_day.loc[worst_day, "mean_abs_error"]:.2f} MW')
ax.set_ylabel('MW')
ax.legend()
plt.show()

# Find and plot the best day
best_day = error_by_day['mean_abs_error'].idxmin()
best_day_str = best_day.strftime('%Y-%m-%d')

fig, ax = plt.subplots(figsize=(15, 6))
test_df.loc[best_day_str][TARGET].plot(ax=ax, label='Actual', style='.-')
test_df.loc[best_day_str]['MW_Prediction'].plot(ax=ax, label='Prediction', style='-')
ax.set_title(f'Best Predicted Day ({best_day_str}) - MAE: {error_by_day.loc[best_day, "mean_abs_error"]:.2f} MW')
ax.set_ylabel('MW')
ax.legend()
plt.show()

	PJME_MW
Datetime
2002-01-01 01:00:00	30393.0
2002-01-01 02:00:00	29265.0
2002-01-01 03:00:00	28357.0
2002-01-01 04:00:00	27899.0
2002-01-01 05:00:00	28057.0

	mean_PJME_MW	mean_MW_Prediction	mean_error	mean_abs_error	weekday
2018-01-06	43565.750000	33872.621094	9693.130371	9693.130371	Saturday
2017-02-24	26445.083333	36059.433594	-9614.351725	9614.351725	Friday
2018-01-07	42159.708333	32589.742188	9569.965658	9569.965658	Sunday
2017-02-25	24344.458333	33805.746094	-9461.285645	9461.285645	Saturday
2017-02-19	24555.500000	33521.769531	-8966.270996	8966.270996	Sunday
2017-12-31	39016.000000	30089.259766	8926.738200	8926.738200	Sunday
2017-05-19	38032.583333	29108.976562	8923.606120	8923.606120	Friday
2017-02-20	27070.583333	35970.457031	-8899.874837	8899.874837	Monday
2018-02-21	27572.500000	36334.296875	-8761.797201	8761.797201	Wednesday
2017-02-23	27663.416667	36159.445312	-8496.028239	8496.028239	Thursday

Date	Weekday	MAE	Observed Avg Load (MW)	Predicted Avg Load (MW)	Hypothesized Cause	Notes
2018-01-06	Saturday	9,693	43,565	33,873	Mid-winter cold snap ↑ heating demand	Temp plunged below seasonal norm—Sat baseline under-forecasted peak load.
2017-02-24	Friday	9,614	26,445	36,059	Pre-holiday Friday drop	Friday before Presidents’ Day weekend often lower commercial activity.
2018-01-07	Sunday	9,570	42,160	32,590	Cold snap carries over ↑ heating	Another deep-freeze weekend day—model thinks “normal Sunday,” but load spiked.
2017-02-25	Saturday	9,461	24,344	33,806	Holiday-weekend anomaly ↓ baseline load	Saturday of Presidents’ Day weekend—further drop in industrial/commercial.
2017-02-19	Sunday	8,966	24,556	33,522	Presidents’ Day weekend	Actual Sunday load depressed by holiday; model sees typical Sunday.
2017-12-31	Sunday	8,927	39,016	30,089	New Year’s Eve celebrations ↑ residential	Party lighting, cooking, heating—load well above a “normal” Sunday forecast.
2017-05-19	Friday	8,923	38,033	29,109	Unusual weather or event ↑ demand	Late-spring heat wave or local event; no calendar feature to capture it.
2017-02-20	Monday	8,900	27,071	35,970	Presidents’ Day holiday	Federal holiday—Monday load down, but model treats as normal Monday.
2018-02-21	Wednesday	8,762	27,572	36,334	Post-holiday rebound	Day after Presidents’ Day—load recovering, model still on “weekday” norm.
2017-02-23	Thursday	8,496	27,663	36,159	Holiday-week carryover	Thursday after Presidents’ Day sees atypical load pattern through week.

	mean_PJME_MW	mean_MW_Prediction	mean_error	mean_abs_error	weekday
2016-12-30	32213.458333	32159.609375	53.847900	330.570068	Friday
2017-05-02	28546.916667	28805.884766	-258.968343	404.729899	Tuesday
2016-12-12	32757.208333	32902.617188	-145.408447	496.963460	Monday
2017-03-18	29677.083333	29711.373047	-34.288981	506.869222	Saturday
2017-04-01	27047.041667	26972.796875	74.244466	508.144043	Saturday
2017-12-08	32779.291667	32935.121094	-155.828532	562.034098	Friday
2016-12-08	32704.666667	33030.042969	-325.375651	577.592773	Thursday
2017-04-07	28840.250000	29130.056641	-289.808512	579.313721	Friday
2018-04-19	28949.958333	28955.976562	-6.018066	588.082194	Thursday
2017-10-24	28657.416667	29212.718750	-555.302165	620.493896	Tuesday

Hourly Energy Consumption Forecasting with XGBoost (Calendar Features Only)¶

Introduction to XGBoost for Time-Series Forecasting¶

Key Advantages for Time Series¶

Why Calendar Features?¶

Workflow Logic¶

Load and Prepare Data¶

Train/Validation/Test Split¶

Create Time Series Features¶

Train XGBoost Model¶

Feature Importance Analysis (SHAP)¶

Forecast on Test Set and Evaluate¶

Visualize Forecast for Specific Periods¶

Test Set Error Metrics¶

Error Analysis: Worst and Best Predicted Days¶

Worst Predicted Days¶

Best Predicted Days¶

Plotting Example Days¶

Worst Predicted Day¶

Best Predicted Day¶

Conclusion & Next Steps¶