Skip to content

tsam.utils.k_medoids_exact

tsam.utils.k_medoids_exact

KMedoids

Bases: BaseEstimator, ClusterMixin, TransformerMixin

k-medoids class.

Parameters:

Name Type Description Default
n_clusters integer

How many medoids. Must be positive. optional, default: 8

8
distance_metric string

What distance metric to use. optional, default: 'euclidean'

'euclidean'
timelimit integer

Specify the time limit of the solver. optional, default: 100

100
threads integer

Threads to use by the optimization solver. optional, default: 7

7
solver string

Specifies the solver. optional, default: 'highs'

'highs'
Source code in src/tsam/utils/k_medoids_exact.py
class KMedoids(BaseEstimator, ClusterMixin, TransformerMixin):
    """
    k-medoids class.

    :param n_clusters:  How many medoids. Must be positive. optional, default: 8
    :type n_clusters: integer

    :param distance_metric: What distance metric to use. optional, default: 'euclidean'
    :type distance_metric: string

    :param timelimit: Specify the time limit of the solver. optional, default:  100
    :type timelimit: integer

    :param threads: Threads to use by the optimization solver. optional, default: 7
    :type threads: integer

    :param solver: Specifies the solver. optional, default: 'highs'
    :type solver: string
    """

    def __init__(
        self,
        n_clusters=8,
        distance_metric="euclidean",
        timelimit=100,
        threads=7,
        solver="highs",
    ):
        self.n_clusters = n_clusters

        self.distance_metric = distance_metric

        self.solver = solver

        self.timelimit = timelimit

        self.threads = threads

    def _check_init_args(self):
        # Check n_clusters
        if (
            self.n_clusters is None
            or self.n_clusters <= 0
            or not isinstance(self.n_clusters, int)
        ):
            raise ValueError("n_clusters has to be nonnegative integer")

        # Check distance_metric
        if callable(self.distance_metric):
            self.distance_func = self.distance_metric
        elif self.distance_metric in PAIRWISE_DISTANCE_FUNCTIONS:
            self.distance_func = PAIRWISE_DISTANCE_FUNCTIONS[self.distance_metric]
        else:
            raise ValueError(
                "distance_metric needs to be "
                + "callable or one of the "
                + "following strings: "
                + f"{PAIRWISE_DISTANCE_FUNCTIONS.keys()}"
                + f". Instead, '{self.distance_metric}' "
                + "was given."
            )

    def fit(self, X, y=None):
        """Fit K-Medoids to the provided data.

        :param X: shape=(n_samples, n_features)
        :type X: array-like or sparse matrix

        :returns: self
        """

        self._check_init_args()

        # check that the array is good and attempt to convert it to
        # Numpy array if possible
        X = self._check_array(X)

        # apply distance metric to get the distance matrix
        D = self.distance_func(X)

        # run exact optimization
        r_y, r_x, _best_inertia = self._k_medoids_exact(D, self.n_clusters)

        labels_raw = r_x.argmax(axis=0)

        count = 0
        translator = {}
        cluster_centers_ = []
        for ix, val in enumerate(r_y):
            if val > 0:
                translator[ix] = count
                cluster_centers_.append(X[ix])
                count += 1
        labels_ = []
        for label in labels_raw:
            labels_.append(translator[label])

        self.labels_ = labels_
        self.cluster_centers_ = cluster_centers_

        return self

    def _check_array(self, X):
        X = check_array(X)

        # Check that the number of clusters is less than or equal to
        # the number of samples
        if self.n_clusters > X.shape[0]:
            raise ValueError(
                "The number of medoids "
                + f"({self.n_clusters}) "
                + "must be larger than the number "
                + f"of samples ({X.shape[0]})"
            )

        return X

    def _k_medoids_exact(self, distances, n_clusters):
        """
        Parameters
        ----------
        distances : int, required
            Pairwise distances between each row.
        n_clusters : int, required
            Number of clusters.
        """

        # Create pyomo model
        M = _setup_k_medoids(distances, n_clusters)

        # And solve
        r_x, r_y, r_obj = _solve_given_pyomo_model(M, solver=self.solver)

        return (r_y, r_x.T, r_obj)

fit

fit(X, y=None)

Fit K-Medoids to the provided data.

Parameters:

Name Type Description Default
X array-like | sparse matrix

shape=(n_samples, n_features)

required

Returns:

Type Description

self

Source code in src/tsam/utils/k_medoids_exact.py
def fit(self, X, y=None):
    """Fit K-Medoids to the provided data.

    :param X: shape=(n_samples, n_features)
    :type X: array-like or sparse matrix

    :returns: self
    """

    self._check_init_args()

    # check that the array is good and attempt to convert it to
    # Numpy array if possible
    X = self._check_array(X)

    # apply distance metric to get the distance matrix
    D = self.distance_func(X)

    # run exact optimization
    r_y, r_x, _best_inertia = self._k_medoids_exact(D, self.n_clusters)

    labels_raw = r_x.argmax(axis=0)

    count = 0
    translator = {}
    cluster_centers_ = []
    for ix, val in enumerate(r_y):
        if val > 0:
            translator[ix] = count
            cluster_centers_.append(X[ix])
            count += 1
    labels_ = []
    for label in labels_raw:
        labels_.append(translator[label])

    self.labels_ = labels_
    self.cluster_centers_ = cluster_centers_

    return self