Time Series Analysis: Modern Approaches to Forecasting
Objective
Build an end-to-end time series forecasting system using both statistical and deep learning approaches to predict household energy consumption. Learn modern forecasting techniques, model optimization, and deployment strategies for real-time predictions.
Learning Outcomes
By completing this project, you will:
- Master time series analysis fundamentals and advanced concepts
- Implement both statistical and deep learning forecasting models
- Use modern forecasting libraries and tools
- Deploy models for real-time predictions
- Understand model selection and ensemble strategies
- Learn to handle real-world forecasting challenges
Prerequisites
- Basic Python programming skills (functions, classes, data structures)
- Fundamental statistics concepts (mean, variance, correlation)
- Basic understanding of machine learning concepts (training, validation, testing)
- Familiarity with pandas DataFrames and NumPy arrays
- Understanding of basic time series concepts (trend, seasonality)
Dataset details
- Household power consumption data (available from UCI ML Repository)
- Size: ~20MB compressed (~127MB uncompressed)
- 2M+ rows of minute-by-minute power consumption
- 7 numerical features + timestamp
Tools Required
# Core libraries
pip install pandas numpy sklearn
pip install statsmodels pmdarima
pip install prophet neuralprophet
pip install tensorflow
pip install darts
# Visualization
pip install matplotlib seaborn plotly
# Deployment
pip install fastapi uvicorn
pip install mlflow
Project Structure
time_series_forecasting/
โ
โโโ data/
โ โโโ household_power_consumption/
โ โ โโโ train.csv
โ โ โโโ test.csv
โ โโโ processed/
โ
โโโ src/
โ โโโ data_processing.py
โ โโโ feature_engineering.py
โ โโโ models/
โ โ โโโ statistical_models.py
โ โ โโโ deep_learning_models.py
โ โ โโโ ensemble.py
โ โโโ evaluation.py
โ โโโ deployment.py
โ
โโโ notebooks/
โโโ 1_data_exploration.ipynb
โโโ 2_statistical_models.ipynb
โโโ 3_deep_learning_models.ipynb
โโโ 4_model_comparison.ipynb
Steps and Tasks
1. Data Acquisition and Exploration
First, letโs get and explore the household power consumption dataset:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load data
df = pd.read_csv('household_power_consumption.txt',
sep=';',
parse_dates={'datetime': ['Date', 'Time']},
dayfirst=True)
# Basic preprocessing
df.set_index('datetime', inplace=True)
df.replace('?', np.nan, inplace=True)
df = df.astype(float)
# Resample to hourly data
hourly_data = df.resample('H').mean()
# Plot basic time series
plt.figure(figsize=(15, 6))
plt.plot(hourly_data['Global_active_power'])
plt.title('Household Power Consumption Over Time')
plt.xlabel('Date')
plt.ylabel('Global Active Power (kilowatts)')
plt.show()
Click to view advanced time series analysis
class TimeSeriesAnalyzer:
def __init__(self, data):
self.data = data
self.decomposition = None
def perform_decomposition(self, period=24):
"""Perform seasonal decomposition"""
from statsmodels.tsa.seasonal import seasonal_decompose
self.decomposition = seasonal_decompose(
self.data,
period=period,
extrapolate_trend='freq'
)
# Plot components
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(15, 12))
self.decomposition.observed.plot(ax=ax1)
ax1.set_title('Observed')
self.decomposition.trend.plot(ax=ax2)
ax2.set_title('Trend')
self.decomposition.seasonal.plot(ax=ax3)
ax3.set_title('Seasonal')
self.decomposition.resid.plot(ax=ax4)
ax4.set_title('Residual')
plt.tight_layout()
plt.show()
def check_stationarity(self):
"""Check time series stationarity"""
from statsmodels.tsa.stattools import adfuller
result = adfuller(self.data.dropna())
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical values:')
for key, value in result[4].items():
print(f'\t{key}: {value}')
2. Feature Engineering
Create relevant features for forecasting:
def create_features(df):
"""Create time-based features"""
df = df.copy()
# Time-based features
df['hour'] = df.index.hour
df['dayofweek'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['month'] = df.index.month
df['year'] = df.index.year
df['dayofyear'] = df.index.dayofyear
# Lag features
df['lag_1h'] = df['Global_active_power'].shift(1)
df['lag_24h'] = df['Global_active_power'].shift(24)
df['lag_7d'] = df['Global_active_power'].shift(168)
return df
Click to view advanced feature engineering
class FeatureEngineer:
def __init__(self, df):
self.df = df
def create_cyclical_features(self):
"""Create cyclical features for time components"""
def encode_cyclical(df, col, max_val):
df[f'{col}_sin'] = np.sin(2 * np.pi * df[col]/max_val)
df[f'{col}_cos'] = np.cos(2 * np.pi * df[col]/max_val)
return df
self.df = encode_cyclical(self.df, 'hour', 24)
self.df = encode_cyclical(self.df, 'dayofweek', 7)
self.df = encode_cyclical(self.df, 'month', 12)
return self.df
def create_rolling_features(self, windows=[6, 12, 24, 48]):
"""Create rolling statistics"""
for window in windows:
self.df[f'rolling_mean_{window}h'] = (
self.df['Global_active_power']
.rolling(window=window)
.mean()
)
self.df[f'rolling_std_{window}h'] = (
self.df['Global_active_power']
.rolling(window=window)
.std()
)
return self.df
3. Statistical Models Implementation
Implement statistical forecasting models:
from prophet import Prophet
from pmdarima.arima import auto_arima
class StatisticalForecaster:
def __init__(self):
self.prophet_model = None
self.arima_model = None
def fit_prophet(self, df):
"""Fit Facebook Prophet model"""
# Prepare data for Prophet
df_prophet = df.reset_index()
df_prophet.columns = ['ds', 'y']
# Initialize and fit model
self.prophet_model = Prophet(
yearly_seasonality=True,
weekly_seasonality=True,
daily_seasonality=True
)
self.prophet_model.fit(df_prophet)
def fit_auto_arima(self, data):
"""Fit Auto ARIMA model"""
self.arima_model = auto_arima(
data,
seasonal=True,
m=24, # Daily seasonality
suppress_warnings=True,
stepwise=True
)
Click to view advanced statistical models
from neuralprophet import NeuralProphet
from darts import TimeSeries
from darts.models import Prophet, ARIMA, ExponentialSmoothing
class AdvancedStatisticalForecaster:
def __init__(self):
self.models = {}
def fit_neural_prophet(self, df):
"""Fit Neural Prophet model"""
model = NeuralProphet(
yearly_seasonality=True,
weekly_seasonality=True,
daily_seasonality=True,
batch_size=64,
learning_rate=0.01
)
df_np = df.reset_index()
df_np.columns = ['ds', 'y']
self.models['neural_prophet'] = model.fit(df_np)
def fit_darts_models(self, data):
"""Fit various statistical models using Darts"""
# Convert to Darts TimeSeries
series = TimeSeries.from_dataframe(data)
# Fit Prophet
prophet_model = Prophet()
self.models['prophet'] = prophet_model.fit(series)
# Fit ARIMA
arima_model = ARIMA()
self.models['arima'] = arima_model.fit(series)
# Fit ETS
ets_model = ExponentialSmoothing()
self.models['ets'] = ets_model.fit(series)
4. Deep Learning Models Implementation
Implement deep learning models:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
def create_lstm_model(n_features, n_steps):
"""Create LSTM model for time series forecasting"""
model = Sequential([
LSTM(50, activation='relu', input_shape=(n_steps, n_features)),
Dropout(0.2),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
Click to view advanced deep learning models
import tensorflow as tf
from tensorflow.keras.layers import GRU, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
class DeepLearningForecaster:
def __init__(self):
self.models = {}
def create_transformer(self, n_features, n_steps):
"""Create Transformer model"""
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
# Attention and preprocessing blocks
x = inputs
for _ in range(2):
x = tf.keras.layers.MultiHeadAttention(
key_dim=head_size, num_heads=num_heads, dropout=dropout
)(x, x)
x = tf.keras.layers.Dropout(dropout)(x)
x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
# Feed-forward network
ff = tf.keras.layers.Dense(ff_dim, activation="relu")(x)
ff = tf.keras.layers.Dense(inputs.shape[-1])(ff)
x = tf.keras.layers.Add()([x, ff])
x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)
return x
inputs = tf.keras.Input(shape=(n_steps, n_features))
x = transformer_encoder(inputs, 32, 2, 64, 0.1)
x = tf.keras.layers.GlobalAveragePooling1D(data_format="channels_first")(x)
outputs = tf.keras.layers.Dense(1)(x)
return tf.keras.Model(inputs, outputs)
def create_hybrid_model(self, n_features, n_steps):
"""Create hybrid CNN-LSTM model"""
model = Sequential([
tf.keras.layers.Conv1D(filters=64, kernel_size=2,
activation='relu',
input_shape=(n_steps, n_features)),
tf.keras.layers.MaxPooling1D(pool_size=2),
Bidirectional(LSTM(50, return_sequences=True)),
Dropout(0.2),
LSTM(50),
Dropout(0.2),
Dense(1)
])
model.compile(optimizer='adam', loss='mse')
return model
5. Model Evaluation and Ensemble
Create an ensemble of models and evaluate performance:
class ForecastEvaluator:
def __init__(self):
self.metrics = {}
def calculate_metrics(self, y_true, y_pred):
"""Calculate common forecasting metrics"""
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
return {
'MAE': mae,
'RMSE': rmse,
'MAPE': mape
}
Click to view advanced evaluation and ensemble
class AdvancedForecasting:
def __init__(self):
self.models = {}
self.weights = None
def create_ensemble(self, predictions_dict, y_true):
"""Create an optimal ensemble using predictions from different models"""
from scipy.optimize import minimize
def objective(weights):
# Combine predictions using weights
weighted_pred = np.zeros_like(y_true)
for i, (model_name, pred) in enumerate(predictions_dict.items()):
weighted_pred += weights[i] * pred
# Calculate RMSE
return np.sqrt(mean_squared_error(y_true, weighted_pred))
# Initialize weights
n_models = len(predictions_dict)
initial_weights = np.ones(n_models) / n_models
# Constraints: weights sum to 1 and are non-negative
constraints = (
{'type': 'eq', 'fun': lambda w: np.sum(w) - 1},
)
bounds = tuple((0, 1) for _ in range(n_models))
# Optimize weights
result = minimize(
objective,
initial_weights,
method='SLSQP',
constraints=constraints,
bounds=bounds
)
self.weights = result.x
return self.weights
6. Deployment
Set up real-time forecasting API:
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class TimeSeriesData(BaseModel):
values: List[float]
timestamps: List[str]
@app.post("/forecast/")
async def create_forecast(data: TimeSeriesData):
"""Generate forecasts for provided time series data"""
# Process input data
df = pd.DataFrame({
'timestamp': pd.to_datetime(data.timestamps),
'value': data.values
}).set_index('timestamp')
# Generate forecast
forecast = model.predict(df)
return {"forecast": forecast.tolist()}