IV. Time Series Analysis¶
A. Mean Calculation¶
Calculating the mean of my time series (Total Power Consumption in the Grid)
the formula for the mean is: μ = (x₁ + x₂ + ... + xₙ) / n
def mean(data):
return sum(data) / len(data)
def sliced_mean_calculator(data, time_index,variance_window_size):
means = []
time_indices = []
for i in range(0, len(data) - variance_window_size + 1, variance_window_size):
window_slice = data[i:i + variance_window_size]
var = mean(window_slice)
means.append(var)
time_indices.append(time_index[i + variance_window_size - 1])
return means, time_indices
mean_window_size = len(energy_consumption_data) // 10
means, time_indices = sliced_mean_calculator(energy_consumption_data,time_index, mean_window_size)
plt.figure(figsize=(12, 5))
plt.plot(time_indices, means, color='green', label='Mean value')
plt.xlabel('Date')
plt.ylabel('Mean value [KwH]')
plt.title(f'Evolution of Mean Over Time(No Overlap) (Window Size = {mean_window_size})')
plt.grid(True)
plt.legend()
plt.show()
B. Variance Calculation¶
Calculating Variance:
Variance = 1/n sum(xi- mean)**2
def variance(data):
mean_value = mean(data)
variance = sum((x - mean_value)**2 for x in data) / len(data)
return variance
total_length = len(energy_consumption_data)
variance_window_size = total_length // 10
print(variance_window_size)
def sliced_variances_calculator(data, time_index,variance_window_size):
variances = []
time_indices = []
for i in range(0, len(data) - variance_window_size + 1, variance_window_size):
window_slice = data[i:i + variance_window_size]
var = variance(window_slice)
variances.append(var)
time_indices.append(time_index[i + variance_window_size - 1])
return variances, time_indices
variances, time_indices = sliced_variances_calculator(energy_consumption_data,time_index, variance_window_size)
print(variances, time_indices)
#Plotting the variances
plt.figure(figsize=(12, 5))
plt.plot(time_indices, variances, color='green', label='Rolling Variance')
plt.xlabel('Time Index')
plt.ylabel('Variance')
plt.title(f'Variance Over Time(No Overlap) (Window Size = {variance_window_size})')
plt.grid(True)
plt.legend()
plt.show()
16 [1.0960480879829592e+16, 1464521258618045.5, 7200982635238046.0, 8155543075894944.0, 2430623248220715.5, 5744999445593979.0, 4415194650633357.5, 3907984911992913.0, 2938256858112266.0, 8608203570732048.0] [Timestamp('2022-04-25 00:00:00'), Timestamp('2022-08-15 00:00:00'), Timestamp('2022-12-05 00:00:00'), Timestamp('2023-03-27 00:00:00'), Timestamp('2023-07-17 00:00:00'), Timestamp('2023-11-06 00:00:00'), Timestamp('2024-02-26 00:00:00'), Timestamp('2024-06-17 00:00:00'), Timestamp('2024-10-07 00:00:00'), Timestamp('2025-01-27 00:00:00')]
C. Autocorrelation Calculation¶
from statsmodels.tsa.stattools import acf
import numpy as np
autocorrelation function (ACF) at lag k, for k ≥ 0, of the time series is defined by ro k = ck/ c0 where ck is the autocovariance at lag k
alpha = 0.05
confint = 1 - alpha
nlags = 160
def check_autocorrelation(data, nlags=10, alpha=0.05):
data = np.asarray(data)
acf_vals, _ = acf(data, nlags=nlags, alpha=alpha)
return acf_vals
acf_vals = check_autocorrelation(energy_consumption_data, nlags=nlags)
print(f"High Autocorrelation defined as {1 - alpha}")
for lag in range(1, 5):
print(f"lag {lag:2d} → ACF = {acf_vals[lag]: .4f}")
High Autocorrelation defined as 0.95 lag 1 → ACF = 0.9028 lag 2 → ACF = 0.8092 lag 3 → ACF = 0.7166 lag 4 → ACF = 0.6537
Setting axis at 0
from darts import TimeSeries
acf_series = TimeSeries.from_values(acf_vals[1:])
plt.figure(figsize=(10, 6))
acf_series.plot(label="ACF")
plt.title("Autocorrelation Function (ACF)")
plt.xlabel("Lag")
plt.ylabel("ACF value between value ct and ct - lag")
plt.legend(loc="upper right")
plt.show()
from darts.utils.statistics import (
plot_acf,
)
D. Differencing¶
Differencing is a simple approach to removing trends. No need to estimate parameters.
Dt = yt - y(t-1)
fig, ax = plt.subplots(figsize=(12, 5))
first_order_diff = df['Total Energy Consumption (kWh)'].diff()
ax.plot(first_order_diff, label='Total Energy Consumption', color='gray')
ax.set_title('First Order Difference of Data')
ax.set_xlabel('Date')
ax.set_ylabel('Energy Consumption (kWh)')
ax.legend()
ax.grid(True)
plt.tight_layout()
plt.show()
We can use differencing to remove seasonality, we can then plot its ACF, which will give us more insights for our model selection
from darts.utils.statistics import (
stationarity_test_kpss,
plot_acf)
# --- Seasonal differencing (weekly seasonality, lag=52) ---
seasonal_diff = df['Total Energy Consumption (kWh)'].diff(52).dropna()
# --- Convert to Darts TimeSeries ---
seasonal_diff_series = TimeSeries.from_series(seasonal_diff)
# --- Plot ACF using Darts ---
plot_acf(seasonal_diff_series, max_lag=40)
plt.title("ACF of Deseasoned Series")
plt.tight_layout()
plt.show()
E. Testing for stationnarity¶
from darts.utils.statistics import (
stationarity_test_kpss,
stationarity_tests)
initial_series = TimeSeries.from_values(energy_consumption_data)
alpha=0.05
is_stationary = stationarity_test_kpss(initial_series)
stat, p_value, lags, crit_vals = stationarity_test_kpss(initial_series)
print(f"KPSS statistic: {stat}")
print(f"p-value: {p_value}")
is_stationary = alpha <= p_value
print(f"Is stationary: {is_stationary}")
KPSS statistic: 0.11391747420474804 p-value: 0.1 Is stationary: True
is_stationary = stationarity_tests(
initial_series,
p_value_threshold_kpss=0.05
)
# Print interpretation
print("Series is stationary" if is_stationary else "Series is not stationary")
Series is stationary
F. Periodogram and seasonality¶
There are many methods to test for white noise. One of which, documented in the Time Series Analysis book, is Cumulative Periodogram.
Periodogram
def compute_linear_periodogram(y):
y = np.asarray(y)
n = len(y)
m = (n - 1) // 2 if n % 2 else n // 2
freqs = np.arange(1, m + 1) / n
I_vals = []
for omega in freqs:
cos_part = np.sum(y * np.cos(2 * np.pi * omega * np.arange(1, n + 1)))
sin_part = np.sum(y * np.sin(2 * np.pi * omega * np.arange(1, n + 1)))
I = (1 / n) * (cos_part**2 + sin_part**2)
I_vals.append(I)
return freqs, np.array(I_vals)
valid_mask = time_index.year <= 2024
series_trimmed = energy_consumption_data
series_trimmed = energy_consumption_data[valid_mask]
freqs_original, I_vals_original = compute_linear_periodogram(series_trimmed)
plt.figure(figsize=(10, 5))
plt.plot(freqs_original, I_vals_original, marker='o', color = 'g')
plt.title("Linear Periodogram of original weekly series")
plt.xlabel('Frequency ω')
plt.ylabel("I(ω)")
plt.grid(True)
plt.tight_layout()
plt.show()
threshold=2*1e16
significant_frequencies = [(w, I) for w, I in zip(freqs_original, I_vals_original) if I > threshold]
for freq, power in significant_frequencies:
print(f"Frequency: {freq:.5f}, Power: {power:.2e}, Period ≈ {1/freq:.2f} weeks")
Frequency: 0.00641, Power: 4.57e+16, Period ≈ 156.00 weeks Frequency: 0.01923, Power: 9.48e+17, Period ≈ 52.00 weeks Frequency: 0.03846, Power: 6.99e+16, Period ≈ 26.00 weeks Frequency: 0.09615, Power: 3.82e+16, Period ≈ 10.40 weeks Frequency: 0.13462, Power: 2.34e+16, Period ≈ 7.43 weeks
# Extract periods and powers
periods = [round(1 / freq, 2) for freq, _ in significant_frequencies]
powers = [power for _, power in significant_frequencies]
# Define colors for each bar
colors = ['red', 'green', 'blue', 'orange', 'purple']
# Create the bar plot
plt.figure(figsize=(10, 5))
bars = plt.bar(range(len(periods)), powers, color=colors, edgecolor='black', width=0.6)
# Add period labels on x-axis
plt.xticks(range(len(periods)), [f"{p}w" for p in periods])
# Labeling
plt.xlabel("Period (weeks)")
plt.ylabel("Spectral Power")
plt.title("Significant Spectral Peaks")
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
threshold=2*1e16
significant_frequencies = [(w, I) for w, I in zip(freqs_original, I_vals_original) if I > threshold]
for freq, power in significant_frequencies:
print(f"Frequency: {freq:.5f}, Power: {power:.2e}, Period ≈ {1/freq:.2f} weeks")
Frequency: 0.00641, Power: 4.57e+16, Period ≈ 156.00 weeks Frequency: 0.01923, Power: 9.48e+17, Period ≈ 52.00 weeks Frequency: 0.03846, Power: 6.99e+16, Period ≈ 26.00 weeks Frequency: 0.09615, Power: 3.82e+16, Period ≈ 10.40 weeks Frequency: 0.13462, Power: 2.34e+16, Period ≈ 7.43 weeks
Plotting one of the Fourrier Series
from matplotlib.cm import get_cmap
Plotting sine waves in separate subplots with different colors.
def plot_significant_waves(significant_freqs, duration=30, sampling_rate=100):
t = np.linspace(0, duration, duration * sampling_rate)
n = len(significant_freqs)
cmap = get_cmap('tab10')
max_amplitude = max(power for _, power in significant_freqs)
fig, axs = plt.subplots(n, 1, figsize=(11, 1.5 * n), sharex=True)
if n == 1:
axs = [axs]
for i, (freq, power) in enumerate(significant_freqs):
amplitude = power
y = amplitude * np.sin(2 * np.pi * freq * t)
color = cmap(i % cmap.N)
axs[i].plot(t, y, color=color)
axs[i].set_ylim(-max_amplitude * 1.1, max_amplitude * 1.1)
axs[i].set_title(f'ω = {freq:.3f}, T ≈ {1/freq:.2f} days, Power = {power:.2e}', fontsize=10)
axs[i].set_ylabel('Amplitude (kWh)')
axs[i].grid(True)
axs[-1].set_xlabel('Time (weeks)')
plt.tight_layout()
fig.suptitle("Decomposed High Signals of Differenced Daily Series (threshold= 2.7*1e16) ", fontsize=14, y=1.02)
plt.show()
threshold= 0.7*1e16
plot_significant_waves(significant_frequencies, duration=200, sampling_rate=300)
We conclude that our Time Series has a 52 week seasonality.
G. Testing for White Noise¶
def plot_cumulative_periodogram(freqs, I_vals, show_plot=True, threshold=0.1):
freqs = np.array(freqs)
I_vals = np.array(I_vals)
# Sort by frequency
sorted_idx = np.argsort(freqs)
freqs = freqs[sorted_idx]
I_vals = I_vals[sorted_idx]
# Keep only non-negative frequencies
mask = freqs >= 0
freqs = freqs[mask]
I_vals = I_vals[mask]
# Normalize and compute cumulative
total_power = np.sum(I_vals)
cumulative_power = np.cumsum(I_vals)
C = cumulative_power / total_power
# Expected under white noise: linear
expected = np.linspace(0, 1, len(C))
max_deviation = np.max(np.abs(C - expected))
percent_error = max_deviation * 100
is_white_noise = percent_error <= threshold * 100
if show_plot:
plt.figure(figsize=(10, 5))
plt.step(freqs, C, where='post', label="Cumulative Periodogram", color='blue', lw=2)
plt.plot([freqs[0], freqs[-1]], [0, 1], 'r--', label="KS Line: $y = x$")
plt.xlabel("Frequency ω (1/week)")
plt.ylabel("Cumulative Proportion of Power")
plt.title("Cumulative Periodogram vs Flat Spectrum")
plt.grid(True, linestyle='--', alpha=0.6)
plt.ylim(0, 1.05)
plt.legend()
plt.tight_layout()
plt.show()
print(f"Maximum deviation from uniform: {percent_error:.2f}%")
print("Conclusion:", "Likely white noise" if is_white_noise else "Not white noise")
return freqs, C
freqs_sorted, C_vals = plot_cumulative_periodogram(freqs_original, I_vals_original)
Maximum deviation from uniform: 78.97% Conclusion: Not white noise
We conclude that our initial time series is not white noise.