Comparative Analysis Tutorial#
Overview#
This tutorial demonstrates how to perform comparative analysis using different temporal clustering methods and storage configurations in the Storage-in-OSeMOSYS framework.
Temporal Clustering Methods Comparison#
Available Methods#
The framework supports four temporal representation methods:
Kotzur Method - Advanced k-means clustering with storage considerations
Niet Method - Hierarchical clustering approach
Welsch Method - Representative days selection
8760 Hours - Full hourly resolution (baseline)
Comparison Setup#
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from src.utilities import *
import time
# Define comparison parameters
methods = ['Kotzur', 'Niet', 'Welsch', '8760']
input_path = Path("inputs_csv")
output_base = Path("Results_Comparison")
# Storage for results
results = {}
performance_metrics = {}
# Run comparison
for method in methods:
print(f"\n=== Running {method} Method ===")
# Track computational time
start_time = time.time()
# Create method-specific output directory
output_path = output_base / f"Results_{method}"
output_path.mkdir(parents=True, exist_ok=True)
# Run model
try:
result = run_storage_model(method, input_path, output_path)
results[method] = result
# Calculate performance metrics
computation_time = time.time() - start_time
performance_metrics[method] = {
'computation_time': computation_time,
'model_size': get_model_size(method),
'memory_usage': get_memory_usage()
}
print(f"✓ {method} completed in {computation_time:.2f} seconds")
except Exception as e:
print(f"✗ {method} failed: {str(e)}")
results[method] = None
Performance Analysis#
# Create performance comparison dataframe
perf_df = pd.DataFrame(performance_metrics).T
print("Performance Comparison:")
print(perf_df)
# Plot computation time comparison
plt.figure(figsize=(12, 8))
# Subplot 1: Computation Time
plt.subplot(2, 2, 1)
methods_clean = [m for m in methods if m in perf_df.index]
times = [perf_df.loc[m, 'computation_time'] for m in methods_clean]
bars = plt.bar(methods_clean, times, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.title('Computation Time Comparison')
plt.ylabel('Time (seconds)')
plt.xticks(rotation=45)
# Add value labels on bars
for bar, time_val in zip(bars, times):
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
f'{time_val:.1f}s', ha='center', va='bottom')
# Subplot 2: Model Size
plt.subplot(2, 2, 2)
sizes = [perf_df.loc[m, 'model_size'] for m in methods_clean]
bars = plt.bar(methods_clean, sizes, color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
plt.title('Model Size Comparison')
plt.ylabel('Variables + Constraints')
plt.xticks(rotation=45)
# Add value labels
for bar, size in zip(bars, sizes):
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100,
f'{size:,}', ha='center', va='bottom', fontsize=8)
plt.tight_layout()
plt.show()
Storage Technology Comparison#
Multi-Technology Analysis#
# Define storage technologies to compare
storage_technologies = {
'Battery': {
'capital_cost': 400, # $/kWh
'efficiency': 0.85,
'max_hours': 4
},
'Pumped_Hydro': {
'capital_cost': 150, # $/kWh
'efficiency': 0.75,
'max_hours': 8
},
'Compressed_Air': {
'capital_cost': 100, # $/kWh
'efficiency': 0.65,
'max_hours': 12
}
}
# Run comparison for each technology
storage_results = {}
for tech_name, tech_params in storage_technologies.items():
print(f"\n=== Analyzing {tech_name} ===")
# Update configuration for this technology
config = update_storage_config(tech_params)
# Run model with Kotzur method (fastest)
output_path = output_base / f"Storage_{tech_name}"
result = run_storage_model('Kotzur', input_path, output_path, config)
storage_results[tech_name] = result
# Compare storage technology results
compare_storage_technologies(storage_results)
Storage Impact Analysis#
# Compare scenarios with and without storage
scenarios = {
'No_Storage': {'storage_enabled': False},
'With_Storage': {'storage_enabled': True}
}
scenario_results = {}
for scenario_name, config in scenarios.items():
print(f"\n=== Running {scenario_name} Scenario ===")
output_path = output_base / f"Scenario_{scenario_name}"
result = run_storage_model('Kotzur', input_path, output_path, config)
scenario_results[scenario_name] = result
# Analyze storage impact
analyze_storage_impact(scenario_results)
Accuracy vs. Efficiency Trade-offs#
Clustering Accuracy Assessment#
# Load 8760-hour baseline results
baseline_8760 = load_results(output_base / "Results_8760")
# Compare clustered methods against 8760 baseline
accuracy_metrics = {}
for method in ['Kotzur', 'Niet', 'Welsch']:
if method in results and results[method] is not None:
method_results = load_results(output_base / f"Results_{method}")
# Calculate accuracy metrics
accuracy_metrics[method] = calculate_accuracy_metrics(
method_results, baseline_8760
)
# Create accuracy comparison
accuracy_df = pd.DataFrame(accuracy_metrics).T
print("Accuracy Comparison (vs. 8760 baseline):")
print(accuracy_df)
# Plot accuracy vs. efficiency
plt.figure(figsize=(10, 6))
for method in accuracy_metrics.keys():
if method in perf_df.index:
x = perf_df.loc[method, 'computation_time']
y = accuracy_metrics[method]['overall_accuracy']
plt.scatter(x, y, s=100, label=method)
plt.annotate(method, (x, y), xytext=(5, 5),
textcoords='offset points')
plt.xlabel('Computation Time (seconds)')
plt.ylabel('Accuracy Score (0-1)')
plt.title('Accuracy vs. Computational Efficiency')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
Sensitivity Analysis#
Parameter Sensitivity#
# Define parameter ranges for sensitivity analysis
sensitivity_params = {
'storage_cost': np.linspace(100, 500, 5), # $/kWh
'renewable_capacity': np.linspace(0.3, 0.9, 4), # fraction
'demand_growth': np.linspace(0.01, 0.05, 3) # annual growth rate
}
# Run sensitivity analysis
sensitivity_results = {}
for param_name, param_values in sensitivity_params.items():
print(f"\n=== Sensitivity Analysis: {param_name} ===")
sensitivity_results[param_name] = {}
for value in param_values:
# Update configuration
config = create_sensitivity_config(param_name, value)
# Run model
output_path = output_base / f"Sensitivity_{param_name}_{value:.3f}"
result = run_storage_model('Kotzur', input_path, output_path, config)
sensitivity_results[param_name][value] = result
# Plot sensitivity results
plot_sensitivity_analysis(sensitivity_results)
Regional Comparison#
Multi-Regional Analysis#
# Define regions for comparison
regions = {
'North': {'renewable_resource': 'high', 'demand_pattern': 'industrial'},
'South': {'renewable_resource': 'medium', 'demand_pattern': 'residential'},
'Coast': {'renewable_resource': 'low', 'demand_pattern': 'mixed'}
}
regional_results = {}
for region_name, region_config in regions.items():
print(f"\n=== Analyzing {region_name} Region ===")
# Update input data for region
regional_input_path = prepare_regional_data(input_path, region_config)
# Run model
output_path = output_base / f"Regional_{region_name}"
result = run_storage_model('Kotzur', regional_input_path, output_path)
regional_results[region_name] = result
# Compare regional results
compare_regional_results(regional_results)
Economic Analysis#
Cost-Benefit Analysis#
# Calculate economic metrics for each scenario
economic_metrics = {}
for method, result in results.items():
if result is not None:
metrics = calculate_economic_metrics(result)
economic_metrics[method] = metrics
# Create economic comparison
econ_df = pd.DataFrame(economic_metrics).T
print("Economic Comparison:")
print(econ_df)
# Plot economic metrics
plt.figure(figsize=(15, 10))
# Total system cost
plt.subplot(2, 3, 1)
costs = econ_df['total_system_cost']
plt.bar(costs.index, costs.values)
plt.title('Total System Cost')
plt.ylabel('Cost (M$)')
plt.xticks(rotation=45)
# Storage investment
plt.subplot(2, 3, 2)
storage_inv = econ_df['storage_investment']
plt.bar(storage_inv.index, storage_inv.values, color='orange')
plt.title('Storage Investment')
plt.ylabel('Investment (M$)')
plt.xticks(rotation=45)
# LCOE comparison
plt.subplot(2, 3, 3)
lcoe = econ_df['LCOE']
plt.bar(lcoe.index, lcoe.values, color='green')
plt.title('Levelized Cost of Energy')
plt.ylabel('LCOE ($/MWh)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Results Visualization#
Comprehensive Dashboard#
def create_comparison_dashboard(results, performance_metrics):
"""Create a comprehensive comparison dashboard"""
fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Storage-in-OSeMOSYS Comparative Analysis Dashboard', fontsize=16)
# 1. Computation Time
ax = axes[0, 0]
methods = list(performance_metrics.keys())
times = [performance_metrics[m]['computation_time'] for m in methods]
ax.bar(methods, times)
ax.set_title('Computation Time')
ax.set_ylabel('Seconds')
# 2. Storage Capacity
ax = axes[0, 1]
for method in methods:
if method in results and results[method]:
capacity_data = get_storage_capacity_data(results[method])
ax.plot(capacity_data.index, capacity_data.values, label=method, marker='o')
ax.set_title('Storage Capacity Evolution')
ax.set_ylabel('Capacity (GW)')
ax.legend()
# 3. System Cost
ax = axes[0, 2]
costs = [get_total_system_cost(results[m]) for m in methods if results[m]]
ax.bar(methods[:len(costs)], costs)
ax.set_title('Total System Cost')
ax.set_ylabel('Cost (M$)')
# 4. Storage Operation
ax = axes[1, 0]
for method in methods:
if method in results and results[method]:
operation_data = get_storage_operation_data(results[method])
ax.plot(operation_data.index, operation_data.values, label=method)
ax.set_title('Storage Operation Pattern')
ax.set_ylabel('Storage Level (%)')
ax.legend()
# 5. Renewable Integration
ax = axes[1, 1]
renewable_shares = [get_renewable_share(results[m]) for m in methods if results[m]]
ax.bar(methods[:len(renewable_shares)], renewable_shares)
ax.set_title('Renewable Energy Share')
ax.set_ylabel('Share (%)')
# 6. Emissions
ax = axes[1, 2]
emissions = [get_total_emissions(results[m]) for m in methods if results[m]]
ax.bar(methods[:len(emissions)], emissions, color='red', alpha=0.7)
ax.set_title('Total CO2 Emissions')
ax.set_ylabel('Mt CO2')
# 7. Model Size
ax = axes[2, 0]
sizes = [performance_metrics[m]['model_size'] for m in methods]
ax.bar(methods, sizes, color='purple', alpha=0.7)
ax.set_title('Model Size')
ax.set_ylabel('Variables + Constraints')
# 8. Accuracy Score
ax = axes[2, 1]
if 'accuracy_metrics' in globals():
accuracy_scores = [accuracy_metrics[m]['overall_accuracy']
for m in methods if m in accuracy_metrics]
ax.bar(methods[:len(accuracy_scores)], accuracy_scores, color='green', alpha=0.7)
ax.set_title('Accuracy Score')
ax.set_ylabel('Score (0-1)')
# 9. Summary metrics
ax = axes[2, 2]
ax.axis('off')
summary_text = create_summary_text(results, performance_metrics)
ax.text(0.1, 0.9, summary_text, transform=ax.transAxes,
fontsize=10, verticalalignment='top')
ax.set_title('Summary')
plt.tight_layout()
plt.show()
# Create the dashboard
create_comparison_dashboard(results, performance_metrics)
Key Insights and Recommendations#
Method Selection Guidelines#
Based on the comparative analysis:
For Quick Analysis: Use Kotzur method for fast, accurate results
For Detailed Studies: Use 8760-hour resolution when computational resources allow
For Storage-Heavy Systems: Kotzur method provides best storage representation
For Academic Research: Compare multiple methods to validate findings
Best Practices#
Always validate clustered results against 8760-hour baseline
Consider computational budget when selecting temporal resolution
Analyze sensitivity to key parameters before making conclusions
Document methodology and assumptions for reproducibility
Future Work#
The comparative analysis reveals opportunities for:
Hybrid clustering approaches
Adaptive temporal resolution
Machine learning-enhanced clustering
Real-time optimization capabilities
Continue with Advanced Configuration for expert-level usage.