# -*- coding: utf-8 -*-
import numpy as np
from tsam.representations import representations
[docs]def aggregatePeriods(
candidates,
n_clusters=8,
n_iter=100,
clusterMethod="k_means",
solver="highs",
representationMethod=None,
representationDict=None,
distributionPeriodWise=True,
timeStepsPerPeriod=None,
):
"""
Clusters the data based on one of the cluster methods:
'averaging', 'k_means', 'exact k_medoid' or 'hierarchical'
:param candidates: Dissimilarity matrix where each row represents a candidate. required
:type candidates: np.ndarray
:param n_clusters: Number of aggregated cluster. optional (default: 8)
:type n_clusters: integer
:param n_iter: Only required for the number of starts of the k-mean algorithm. optional (default: 10)
:type n_iter: integer
:param clusterMethod: Chosen clustering algorithm. Possible values are
'averaging','k_means','exact k_medoid' or 'hierarchical'. optional (default: 'k_means')
:type clusterMethod: string
"""
# cluster the data
if clusterMethod == "averaging":
n_sets = len(candidates)
if n_sets % n_clusters == 0:
cluster_size = int(n_sets / n_clusters)
clusterOrder = [
[n_cluster] * cluster_size for n_cluster in range(n_clusters)
]
else:
cluster_size = int(n_sets / n_clusters)
clusterOrder = [
[n_cluster] * cluster_size for n_cluster in range(n_clusters)
]
clusterOrder.append(
[n_clusters - 1] * int(n_sets - cluster_size * n_clusters)
)
clusterOrder = np.hstack(np.array(clusterOrder, dtype=object))
clusterCenters, clusterCenterIndices = representations(
candidates,
clusterOrder,
default="meanRepresentation",
representationMethod=representationMethod,
representationDict=representationDict,
distributionPeriodWise=distributionPeriodWise,
timeStepsPerPeriod=timeStepsPerPeriod,
)
if clusterMethod == "k_means":
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=n_clusters, max_iter=1000, n_init=n_iter, tol=1e-4)
clusterOrder = k_means.fit_predict(candidates)
# get with own mean representation to avoid numerical trouble caused by sklearn
clusterCenters, clusterCenterIndices = representations(
candidates,
clusterOrder,
default="meanRepresentation",
representationMethod=representationMethod,
representationDict=representationDict,
distributionPeriodWise=distributionPeriodWise,
timeStepsPerPeriod=timeStepsPerPeriod,
)
if clusterMethod == "k_medoids":
from tsam.utils.k_medoids_exact import KMedoids
k_medoid = KMedoids(n_clusters=n_clusters, solver=solver)
clusterOrder = k_medoid.fit_predict(candidates)
clusterCenters, clusterCenterIndices = representations(
candidates,
clusterOrder,
default="medoidRepresentation",
representationMethod=representationMethod,
representationDict=representationDict,
distributionPeriodWise=distributionPeriodWise,
timeStepsPerPeriod=timeStepsPerPeriod,
)
if clusterMethod == "k_maxoids":
from tsam.utils.k_maxoids import KMaxoids
k_maxoid = KMaxoids(n_clusters=n_clusters)
clusterOrder = k_maxoid.fit_predict(candidates)
clusterCenters, clusterCenterIndices = representations(
candidates,
clusterOrder,
default="maxoidRepresentation",
representationMethod=representationMethod,
representationDict=representationDict,
distributionPeriodWise=distributionPeriodWise,
timeStepsPerPeriod=timeStepsPerPeriod,
)
if clusterMethod == "hierarchical" or clusterMethod == "adjacent_periods":
if n_clusters == 1:
clusterOrder = np.asarray([0] * len(candidates))
else:
from sklearn.cluster import AgglomerativeClustering
if clusterMethod == "hierarchical":
clustering = AgglomerativeClustering(
n_clusters=n_clusters, linkage="ward"
)
elif clusterMethod == "adjacent_periods":
adjacencyMatrix = np.eye(len(candidates), k=1) + np.eye(
len(candidates), k=-1
)
clustering = AgglomerativeClustering(
n_clusters=n_clusters, linkage="ward", connectivity=adjacencyMatrix
)
clusterOrder = clustering.fit_predict(candidates)
# represent hierarchical aggregation with medoid
clusterCenters, clusterCenterIndices = representations(
candidates,
clusterOrder,
default="medoidRepresentation",
representationMethod=representationMethod,
representationDict=representationDict,
distributionPeriodWise=distributionPeriodWise,
timeStepsPerPeriod=timeStepsPerPeriod,
)
return clusterCenters, clusterCenterIndices, clusterOrder