Source code for src.cluster

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

[docs] def cluster_data(capacity_factor_path, demand_profile_path, n_clusters): capacity_factor_data = pd.read_csv(capacity_factor_path) demand_profile_data = pd.read_csv(demand_profile_path) # Normalizar os dados de capacidade scaler_capacity = MinMaxScaler() capacity_factor_data['VALUE'] = scaler_capacity.fit_transform(capacity_factor_data[['VALUE']]) # Normalizar os dados de demanda scaler_demand = MinMaxScaler() demand_profile_data['VALUE'] = scaler_demand.fit_transform(demand_profile_data[['VALUE']]) # Combinar os dados usando a coluna TIMESLICE e YEAR combined_data = pd.merge(capacity_factor_data[['TIMESLICE', 'YEAR', 'VALUE']], demand_profile_data[['TIMESLICE', 'YEAR', 'VALUE']], on=['TIMESLICE', 'YEAR'], suffixes=('_capacity', '_demand')) # Converter TIMESLICE em DayOfYear combined_data['DayOfYear'] = (combined_data['TIMESLICE'] - 1) // 24 + 1 combined_data['HourOfDay'] = (combined_data['TIMESLICE'] - 1) % 24 + 1 #print(combined_data.head(30)) # Pivotear os dados para VALUE_capacity e VALUE_demand separadamente pivot_capacity = combined_data.pivot(index='DayOfYear', columns='HourOfDay', values='VALUE_capacity') pivot_demand = combined_data.pivot(index='DayOfYear', columns='HourOfDay', values='VALUE_demand') # Renomear as colunas para refletir os nomes apropriados pivot_capacity.columns = [f'value_capacity_timeslice_{i}' for i in pivot_capacity.columns] pivot_demand.columns = [f'value_demand_timeslice_{i}' for i in pivot_demand.columns] # Concatenar as duas tabelas pivotadas horizontalmente final_data = pd.concat([pivot_capacity, pivot_demand], axis=1) # Visualizar os resultados #print(final_data.head()) # Aplicar K-means kmeans = KMeans(n_clusters=n_clusters, random_state=0) clusters = kmeans.fit_predict(final_data) # Encontrar o dia mais próximo do centro de cada cluster closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, final_data) representative_days = final_data.iloc[closest].index.tolist() # Ordenar os dias representativos em ordem cronológica representative_days_sorted = sorted(representative_days) chronological_sequence = [representative_days[i] for i in clusters] return representative_days_sorted, chronological_sequence