VI. White Noise test of Residuals¶

Cumulative Periodogram of SARIMA¶

SARIMA(1,0,0,(1,0,0,[52])) Cumulative Periodogram

import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import periodogram
from statsmodels.tsa.statespace.sarimax import SARIMAX

# --- Fit SARIMA model ---
model = SARIMAX(energy_consumption_data, order=(1, 0, 0))
results = model.fit(disp=False)

# --- Get residuals  ---
residuals = results.resid

# --- Periodogram with Hann window ---
freqs, power = periodogram(residuals, window='hann')

# --- Remove near-zero frequencies ---
min_freq = 1 / len(residuals)
mask = freqs > min_freq
freqs = freqs[mask]
power = power[mask]

# --- Cumulative periodogram ---
cumulative_power = np.cumsum(power) / np.sum(power)
expected = np.linspace(0, 1, len(cumulative_power))

# --- KS statistic (max deviation from ideal)
ks_stat = np.max(np.abs(cumulative_power - expected))
print(f"Max deviation (KS statistic): {ks_stat*100:.2f}%")

# --- Plot cumulative periodogram ---
plt.figure(figsize=(10, 5))
plt.step(freqs, cumulative_power, where='post', label="Cumulative Periodogram", color='blue', lw=2)
plt.plot([freqs[0], freqs[-1]], [0, 1], 'r--', label="Ideal White Noise (y=x)")
plt.xlabel("Frequency (cycles/sample)")
plt.ylabel("Cumulative Proportion of Power")
plt.title("Cumulative Periodogram of SARIMA Residuals")
plt.grid(True, linestyle='--', alpha=0.6)
plt.ylim(0, 1.05)
plt.legend()
plt.tight_layout()
plt.show()

Max deviation (KS statistic): 12.89%

No description has been provided for this image

KS results on training data¶

import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import periodogram
from statsmodels.tsa.statespace.sarimax import SARIMAX

# --- Define only the desired models ---
model_configs = {
    "ARIMA(1,0,0)": ((1, 0, 0), (0, 0, 0, 0)),
    "ARIMA(1,1,0)": ((1, 1, 0), (0, 0, 0, 0)),
    "ARIMA(1,0,1)": ((1, 0, 1), (0, 0, 0, 0)),
    "SARIMA(1,0,0)(1,0,0,52)": ((1, 0, 0), (1, 0, 0, 52)),
    "SARIMAX(1,0,0)(1,0,0,52) + Temp": ((1, 0, 0), (1, 0, 0, 52)),
}

ks_results = {}

# --- Loop through each model ---
for label, (order, seasonal_order) in model_configs.items():
    print(f"Fitting model: {label}")
    
    if "Temp" in label:
        model = SARIMAX(energy_consumption_data, exog=weekly_temp, order=order, seasonal_order=seasonal_order)
    else:
        model = SARIMAX(energy_consumption_data, order=order, seasonal_order=seasonal_order)
    
    results = model.fit(disp=False)
    residuals = results.resid

    if len(residuals) < 60:
        print(f"Skipping {label}: too few residuals.")
        continue

    freqs, power = periodogram(residuals, window='hann')
    min_freq = 1 / len(residuals)
    mask = freqs > min_freq
    freqs = freqs[mask]
    power = power[mask]

    cumulative_power = np.cumsum(power) / np.sum(power)
    expected = np.linspace(0, 1, len(cumulative_power))
    ks_stat = np.max(np.abs(cumulative_power - expected))

    ks_results[label] = ks_stat * 100
    print(f"{label}: KS = {ks_stat*100:.2f}%")

# --- Bar chart of KS stats ---
sorted_items = sorted(ks_results.items(), key=lambda x: x[1])
labels = [k for k, _ in sorted_items]
ks_values = [v for _, v in sorted_items]

plt.figure(figsize=(10, 6))
bars = plt.barh(labels, ks_values, color='darkcyan')

for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.5, bar.get_y() + bar.get_height() / 2,
             f"{width:.2f}%", va='center', fontsize=10)

plt.xlabel("KS Statistic (%)")
plt.axvline(15, color='black', linestyle='--')
plt.text(15.5, -0.5, 'White Noise Threshold (~15%)', ha='left', va='bottom', fontsize=9)

plt.title("KS Statistic Comparison: ARIMA vs SARIMA vs SARIMAX")
plt.grid(axis='x', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

Fitting model: ARIMA(1,0,0)
ARIMA(1,0,0): KS = 12.89%
Fitting model: ARIMA(1,1,0)
ARIMA(1,1,0): KS = 10.87%
Fitting model: ARIMA(1,0,1)
ARIMA(1,0,1): KS = 15.91%
Fitting model: SARIMA(1,0,0)(1,0,0,52)
SARIMA(1,0,0)(1,0,0,52): KS = 12.11%
Fitting model: SARIMAX(1,0,0)(1,0,0,52) + Temp
SARIMAX(1,0,0)(1,0,0,52) + Temp: KS = 10.47%

We conclude that AR(1), ARIMA(1,1,0), SARIMA(1,0,0,(1,0,0[52])), and SARIMAX(1,0,0,(1,0,0[52]) with Temp as Future Cov.) produce white noise residuals when tested on the full training data. They might be suitable models.

We can hyptothesize that the SARIMAX model might be the best fit model based on how the white noise residuals score is lowest.