Segmentation¶

How to use time step segmentation to reduce the number of timesteps per period.

Author: Maximilian Hoffmann

In [1]:

Copied!





%load_ext autoreload
%autoreload 2

import pandas as pd
import plotly.express as px
import plotly.io as pio

import tsam
from tsam import ClusterConfig, SegmentConfig

pio.renderers.default = "notebook_connected"
import warnings

# Added to every example notebook: silence the v3 column-order
# FutureWarning in the rendered docs (tsam v4 returns result columns in
# input order; see migration guide).
warnings.filterwarnings(
    "ignore", category=FutureWarning, message=".*sorted alphabetically.*"
)
%load_ext autoreload
%autoreload 2

import pandas as pd
import plotly.express as px
import plotly.io as pio

import tsam
from tsam import ClusterConfig, SegmentConfig

pio.renderers.default = "notebook_connected"
import warnings

# Added to every example notebook: silence the v3 column-order
# FutureWarning in the rendered docs (tsam v4 returns result columns in
# input order; see migration guide).
warnings.filterwarnings(
    "ignore", category=FutureWarning, message=".*sorted alphabetically.*"
)

Input data¶

In [2]:

Copied!

raw = pd.read_csv("testdata.csv", index_col=0)
raw = pd.read_csv("testdata.csv", index_col=0)

10 typical days at hourly resolution (no segmentation)¶

Baseline: hierarchical clustering with medoid representation and 24 hourly timesteps.

In [3]:

Copied!





result = tsam.aggregate(
    raw,
    n_clusters=10,
    period_duration=24,
    cluster=ClusterConfig(method="hierarchical"),
)
result.accuracy
result = tsam.aggregate(
    raw,
    n_clusters=10,
    period_duration=24,
    cluster=ClusterConfig(method="hierarchical"),
)
result.accuracy

Out[3]:

AccuracyMetrics(
  rmse=0.1044 (weighted),
  mae=0.0715 (weighted),
  rmse_duration=0.0337 (weighted)
)

In [4]:

Copied!

result.plot.cluster_members()
result.plot.cluster_members()

In [5]:

Copied!

result.cluster_representatives
result.cluster_representatives

Out[5]:

		GHI	Load	T	Wind
	timestep
0	0	0.000000	383.795921	4.421453	5.043609
	1	0.000000	371.689230	4.026704	4.933965
	2	0.000000	374.310544	3.829330	4.714678
	3	0.000000	369.720991	3.730643	4.605034
	4	0.000000	376.053584	3.335894	5.591827
...	...	...	...	...	...
9	19	2.199516	478.910919	10.145308	2.192873
	20	0.000000	457.575577	10.342682	2.192873
	21	0.000000	444.225788	10.540056	2.192873
	22	0.000000	430.132842	10.441369	2.192873
	23	0.000000	401.874887	10.441369	2.192873

240 rows × 4 columns

20 typical days with 12 irregular segments¶

Segmentation reduces the number of timesteps per period while preserving key transitions.

In [6]:

Copied!





result_segmented = tsam.aggregate(
    raw,
    n_clusters=20,
    period_duration=24,
    cluster=ClusterConfig(method="hierarchical"),
    segments=SegmentConfig(n_segments=12),
)
result_segmented.accuracy
result_segmented = tsam.aggregate(
    raw,
    n_clusters=20,
    period_duration=24,
    cluster=ClusterConfig(method="hierarchical"),
    segments=SegmentConfig(n_segments=12),
)
result_segmented.accuracy

Out[6]:

AccuracyMetrics(
  rmse=0.0961 (weighted),
  mae=0.0658 (weighted),
  rmse_duration=0.0203 (weighted)
)

In [7]:

Copied!

result_segmented.plot.cluster_members()
result_segmented.plot.cluster_members()

In [8]:

Copied!

result_segmented.plot.segment_durations()
result_segmented.plot.segment_durations()

In [9]:

Copied!

result_segmented.cluster_representatives
result_segmented.cluster_representatives

Out[9]:

			GHI	Load	T	Wind
	Segment Step	Segment Duration
0	0	4	0.000000	403.310708	1.025025	2.226777
	1	2	0.000000	424.393351	1.122830	3.340165
	2	1	0.000000	495.118712	1.171733	3.340165
	3	2	11.182790	541.665766	1.562953	3.340165
	4	4	74.551932	547.697631	2.272040	2.226777
...	...	...	...	...	...	...
19	7	3	76.681988	471.099097	-2.447057	3.340165
	8	4	34.080883	469.022225	-1.664616	2.226777
	9	1	0.000000	499.029025	-2.055836	2.226777
	10	4	0.000000	498.349573	-2.178093	1.113388
	11	2	0.000000	474.908494	-2.495959	2.226777

240 rows × 4 columns

Comparison¶

In [10]:

Copied!





results = {
    "10 x 24h": result,
    "20 x 12seg": result_segmented,
}

# Duration curves
frames = []
for name, r in {"Original": None, **results}.items():
    vals = (raw if r is None else r.reconstructed)["Load"]
    sorted_vals = vals.sort_values(ascending=False).reset_index(drop=True)
    frames.append(
        pd.DataFrame(
            {"Hour": range(len(sorted_vals)), "Load": sorted_vals, "Method": name}
        )
    )

px.line(
    pd.concat(frames, ignore_index=True),
    x="Hour",
    y="Load",
    color="Method",
    title="Duration Curve Comparison - Load",
)
results = {
    "10 x 24h": result,
    "20 x 12seg": result_segmented,
}

# Duration curves
frames = []
for name, r in {"Original": None, **results}.items():
    vals = (raw if r is None else r.reconstructed)["Load"]
    sorted_vals = vals.sort_values(ascending=False).reset_index(drop=True)
    frames.append(
        pd.DataFrame(
            {"Hour": range(len(sorted_vals)), "Load": sorted_vals, "Method": name}
        )
    )

px.line(
    pd.concat(frames, ignore_index=True),
    x="Hour",
    y="Load",
    color="Method",
    title="Duration Curve Comparison - Load",
)

In [11]:

Copied!





# Heatmap comparison
param = "GHI"

import plotly.graph_objects as go
from plotly.subplots import make_subplots

labels = ["Original", "10 x 24h", "20 x 12seg"]
data = [
    tsam.unstack_to_periods(raw, period_duration=24),
    tsam.unstack_to_periods(result.reconstructed, period_duration=24),
    tsam.unstack_to_periods(result_segmented.reconstructed, period_duration=24),
]

fig = make_subplots(rows=3, cols=1, subplot_titles=labels, vertical_spacing=0.05)
for i, d in enumerate(data, 1):
    fig.add_trace(go.Heatmap(z=d[param].values.T, coloraxis="coloraxis"), row=i, col=1)
fig.update_layout(
    height=750,
    coloraxis={"colorscale": "Viridis"},
    title_text=f"Heatmap Comparison - {param}",
)
fig.show()
# Heatmap comparison
param = "GHI"

import plotly.graph_objects as go
from plotly.subplots import make_subplots

labels = ["Original", "10 x 24h", "20 x 12seg"]
data = [
    tsam.unstack_to_periods(raw, period_duration=24),
    tsam.unstack_to_periods(result.reconstructed, period_duration=24),
    tsam.unstack_to_periods(result_segmented.reconstructed, period_duration=24),
]

fig = make_subplots(rows=3, cols=1, subplot_titles=labels, vertical_spacing=0.05)
for i, d in enumerate(data, 1):
    fig.add_trace(go.Heatmap(z=d[param].values.T, coloraxis="coloraxis"), row=i, col=1)
fig.update_layout(
    height=750,
    coloraxis={"colorscale": "Viridis"},
    title_text=f"Heatmap Comparison - {param}",
)
fig.show()

In [12]:

Copied!





# Time slice comparison
frames = []
for name, r in {"Original": None, **results}.items():
    df = raw if r is None else r.reconstructed
    sliced = df.loc["20100210":"20100218", ["Load"]].copy()
    sliced["Method"] = name
    frames.append(sliced)

px.line(
    pd.concat(frames).reset_index(names="Time"),
    x="Time",
    y="Load",
    color="Method",
    title="Time Slice Comparison - Load (Feb 10-18)",
)
# Time slice comparison
frames = []
for name, r in {"Original": None, **results}.items():
    df = raw if r is None else r.reconstructed
    sliced = df.loc["20100210":"20100218", ["Load"]].copy()
    sliced["Method"] = name
    frames.append(sliced)

px.line(
    pd.concat(frames).reset_index(names="Time"),
    x="Time",
    y="Load",
    color="Method",
    title="Time Slice Comparison - Load (Feb 10-18)",
)

Validation¶

Column means should be preserved for both approaches.

In [13]:

Copied!





means = pd.DataFrame(
    {
        "Original": raw.mean(),
        "10 x 24h": result.reconstructed.mean(),
        "20 x 12seg": result_segmented.reconstructed.mean(),
    }
)
means
means = pd.DataFrame(
    {
        "Original": raw.mean(),
        "10 x 24h": result.reconstructed.mean(),
        "20 x 12seg": result_segmented.reconstructed.mean(),
    }
)
means

Out[13]:

	Original	10 x 24h	20 x 12seg
GHI	110.990183	110.990183	110.990179
Load	450.260335	450.260335	450.260335
T	7.790616	7.790616	7.790616
Wind	3.057306	3.057306	3.057306