Skip to content

tsam.periodAggregation

tsam.periodAggregation

aggregatePeriods

aggregatePeriods(
    candidates,
    n_clusters=8,
    n_iter=100,
    clusterMethod="k_means",
    solver="highs",
    representationMethod=None,
    representationDict=None,
    distributionPeriodWise=True,
    timeStepsPerPeriod=None,
    n_extra_columns=0,
)

Clusters the data based on one of the cluster methods: 'averaging', 'k_means', 'exact k_medoid' or 'hierarchical'

Parameters:

Name Type Description Default
candidates ndarray

Dissimilarity matrix where each row represents a candidate. required

required
n_clusters integer

Number of aggregated cluster. optional (default: 8)

8
n_iter integer

Only required for the number of starts of the k-mean algorithm. optional (default: 10)

100
clusterMethod string

Chosen clustering algorithm. Possible values are 'averaging','k_means','exact k_medoid' or 'hierarchical'. optional (default: 'k_means')

'k_means'
n_extra_columns integer

Number of extra columns appended to candidates for clustering (e.g. period sums) that should be excluded from the representation step. optional (default: 0)

0
Source code in src/tsam/periodAggregation.py
def aggregatePeriods(
    candidates,
    n_clusters=8,
    n_iter=100,
    clusterMethod="k_means",
    solver="highs",
    representationMethod=None,
    representationDict=None,
    distributionPeriodWise=True,
    timeStepsPerPeriod=None,
    n_extra_columns=0,
):
    """
    Clusters the data based on one of the cluster methods:
    'averaging', 'k_means', 'exact k_medoid' or 'hierarchical'

    :param candidates: Dissimilarity matrix where each row represents a candidate. required
    :type candidates: np.ndarray

    :param n_clusters: Number of aggregated cluster. optional (default: 8)
    :type n_clusters: integer

    :param n_iter: Only required for the number of starts of the k-mean algorithm. optional (default: 10)
    :type n_iter: integer

    :param clusterMethod: Chosen clustering algorithm. Possible values are
        'averaging','k_means','exact k_medoid' or 'hierarchical'. optional (default: 'k_means')
    :type clusterMethod: string

    :param n_extra_columns: Number of extra columns appended to candidates for
        clustering (e.g. period sums) that should be excluded from the
        representation step. optional (default: 0)
    :type n_extra_columns: integer
    """
    # Candidates used for representation exclude extra evaluation columns
    repr_candidates = (
        candidates[:, :-n_extra_columns] if n_extra_columns else candidates
    )

    # cluster the data
    if clusterMethod == "averaging":
        n_sets = len(candidates)
        if n_sets % n_clusters == 0:
            cluster_size = int(n_sets / n_clusters)
            clusterOrder = [
                [n_cluster] * cluster_size for n_cluster in range(n_clusters)
            ]
        else:
            cluster_size = int(n_sets / n_clusters)
            clusterOrder = [
                [n_cluster] * cluster_size for n_cluster in range(n_clusters)
            ]
            clusterOrder.append(
                [n_clusters - 1] * int(n_sets - cluster_size * n_clusters)
            )
        clusterOrder = np.hstack(np.array(clusterOrder, dtype=object))
        clusterCenters, clusterCenterIndices = representations(
            repr_candidates,
            clusterOrder,
            default="meanRepresentation",
            representationMethod=representationMethod,
            representationDict=representationDict,
            distributionPeriodWise=distributionPeriodWise,
            timeStepsPerPeriod=timeStepsPerPeriod,
        )

    if clusterMethod == "k_means":
        from sklearn.cluster import KMeans

        k_means = KMeans(n_clusters=n_clusters, max_iter=1000, n_init=n_iter, tol=1e-4)

        clusterOrder = k_means.fit_predict(candidates)
        # get with own mean representation to avoid numerical trouble caused by sklearn
        clusterCenters, clusterCenterIndices = representations(
            repr_candidates,
            clusterOrder,
            default="meanRepresentation",
            representationMethod=representationMethod,
            representationDict=representationDict,
            distributionPeriodWise=distributionPeriodWise,
            timeStepsPerPeriod=timeStepsPerPeriod,
        )

    if clusterMethod == "k_medoids":
        from tsam.utils.k_medoids_exact import KMedoids

        k_medoid = KMedoids(n_clusters=n_clusters, solver=solver)

        clusterOrder = k_medoid.fit_predict(candidates)
        clusterCenters, clusterCenterIndices = representations(
            repr_candidates,
            clusterOrder,
            default="medoidRepresentation",
            representationMethod=representationMethod,
            representationDict=representationDict,
            distributionPeriodWise=distributionPeriodWise,
            timeStepsPerPeriod=timeStepsPerPeriod,
        )

    if clusterMethod == "k_maxoids":
        from tsam.utils.k_maxoids import KMaxoids

        k_maxoid = KMaxoids(n_clusters=n_clusters)

        clusterOrder = k_maxoid.fit_predict(candidates)
        clusterCenters, clusterCenterIndices = representations(
            repr_candidates,
            clusterOrder,
            default="maxoidRepresentation",
            representationMethod=representationMethod,
            representationDict=representationDict,
            distributionPeriodWise=distributionPeriodWise,
            timeStepsPerPeriod=timeStepsPerPeriod,
        )

    if clusterMethod == "hierarchical" or clusterMethod == "adjacent_periods":
        if n_clusters == 1:
            clusterOrder = np.asarray([0] * len(candidates))
        else:
            from sklearn.cluster import AgglomerativeClustering

            if clusterMethod == "hierarchical":
                clustering = AgglomerativeClustering(
                    n_clusters=n_clusters, linkage="ward"
                )
            elif clusterMethod == "adjacent_periods":
                adjacencyMatrix = np.eye(len(candidates), k=1) + np.eye(
                    len(candidates), k=-1
                )
                clustering = AgglomerativeClustering(
                    n_clusters=n_clusters, linkage="ward", connectivity=adjacencyMatrix
                )
            clusterOrder = clustering.fit_predict(candidates)
        # represent hierarchical aggregation with medoid
        clusterCenters, clusterCenterIndices = representations(
            repr_candidates,
            clusterOrder,
            default="medoidRepresentation",
            representationMethod=representationMethod,
            representationDict=representationDict,
            distributionPeriodWise=distributionPeriodWise,
            timeStepsPerPeriod=timeStepsPerPeriod,
        )

    return clusterCenters, clusterCenterIndices, clusterOrder