Source code for periodAggregation

# -*- coding: utf-8 -*-

import numpy as np
from tsam.representations import representations


[docs]def aggregatePeriods( candidates, n_clusters=8, n_iter=100, clusterMethod="k_means", solver="highs", representationMethod=None, representationDict=None, distributionPeriodWise=True, timeStepsPerPeriod=None, ): """ Clusters the data based on one of the cluster methods: 'averaging', 'k_means', 'exact k_medoid' or 'hierarchical' :param candidates: Dissimilarity matrix where each row represents a candidate. required :type candidates: np.ndarray :param n_clusters: Number of aggregated cluster. optional (default: 8) :type n_clusters: integer :param n_iter: Only required for the number of starts of the k-mean algorithm. optional (default: 10) :type n_iter: integer :param clusterMethod: Chosen clustering algorithm. Possible values are 'averaging','k_means','exact k_medoid' or 'hierarchical'. optional (default: 'k_means') :type clusterMethod: string """ # cluster the data if clusterMethod == "averaging": n_sets = len(candidates) if n_sets % n_clusters == 0: cluster_size = int(n_sets / n_clusters) clusterOrder = [ [n_cluster] * cluster_size for n_cluster in range(n_clusters) ] else: cluster_size = int(n_sets / n_clusters) clusterOrder = [ [n_cluster] * cluster_size for n_cluster in range(n_clusters) ] clusterOrder.append( [n_clusters - 1] * int(n_sets - cluster_size * n_clusters) ) clusterOrder = np.hstack(np.array(clusterOrder, dtype=object)) clusterCenters, clusterCenterIndices = representations( candidates, clusterOrder, default="meanRepresentation", representationMethod=representationMethod, representationDict=representationDict, distributionPeriodWise=distributionPeriodWise, timeStepsPerPeriod=timeStepsPerPeriod, ) if clusterMethod == "k_means": from sklearn.cluster import KMeans k_means = KMeans(n_clusters=n_clusters, max_iter=1000, n_init=n_iter, tol=1e-4) clusterOrder = k_means.fit_predict(candidates) # get with own mean representation to avoid numerical trouble caused by sklearn clusterCenters, clusterCenterIndices = representations( candidates, clusterOrder, default="meanRepresentation", representationMethod=representationMethod, representationDict=representationDict, distributionPeriodWise=distributionPeriodWise, timeStepsPerPeriod=timeStepsPerPeriod, ) if clusterMethod == "k_medoids": from tsam.utils.k_medoids_exact import KMedoids k_medoid = KMedoids(n_clusters=n_clusters, solver=solver) clusterOrder = k_medoid.fit_predict(candidates) clusterCenters, clusterCenterIndices = representations( candidates, clusterOrder, default="medoidRepresentation", representationMethod=representationMethod, representationDict=representationDict, distributionPeriodWise=distributionPeriodWise, timeStepsPerPeriod=timeStepsPerPeriod, ) if clusterMethod == "k_maxoids": from tsam.utils.k_maxoids import KMaxoids k_maxoid = KMaxoids(n_clusters=n_clusters) clusterOrder = k_maxoid.fit_predict(candidates) clusterCenters, clusterCenterIndices = representations( candidates, clusterOrder, default="maxoidRepresentation", representationMethod=representationMethod, representationDict=representationDict, distributionPeriodWise=distributionPeriodWise, timeStepsPerPeriod=timeStepsPerPeriod, ) if clusterMethod == "hierarchical" or clusterMethod == "adjacent_periods": if n_clusters == 1: clusterOrder = np.asarray([0] * len(candidates)) else: from sklearn.cluster import AgglomerativeClustering if clusterMethod == "hierarchical": clustering = AgglomerativeClustering( n_clusters=n_clusters, linkage="ward" ) elif clusterMethod == "adjacent_periods": adjacencyMatrix = np.eye(len(candidates), k=1) + np.eye( len(candidates), k=-1 ) clustering = AgglomerativeClustering( n_clusters=n_clusters, linkage="ward", connectivity=adjacencyMatrix ) clusterOrder = clustering.fit_predict(candidates) # represent hierarchical aggregation with medoid clusterCenters, clusterCenterIndices = representations( candidates, clusterOrder, default="medoidRepresentation", representationMethod=representationMethod, representationDict=representationDict, distributionPeriodWise=distributionPeriodWise, timeStepsPerPeriod=timeStepsPerPeriod, ) return clusterCenters, clusterCenterIndices, clusterOrder