Add tests for same clusters as input data in same_cluster_as_input_data.py Update CI (#173)

julian-belina · web-flow · commit 30139a87ca5e · 2026-03-08T18:13:11.000+01:00
* Add tests for same clusters as input data in `same_cluster_as_input_data.py` Update CI

* Update ci

* Add max python version

* dummy commit

* increment version number

* Refactor tests for same clusters to use parameterized inputs and improve readability

* Enhance warning filters in pyproject.toml and update test_assert_raises to use pytest for warning assertions

* Refactor filterwarnings in pyproject.toml for improved clarity and consistency

* Normalize deprecated lowercase day alias in duration parsing
diff --git a/.github/workflows/test_on_push.yml b/.github/workflows/test_on_push.yml
@@ -4,8 +4,14 @@ on:
     branches: ["**"]
 
 jobs:
-  TestPushCondaForgeDev:
-    name: Test conda-forge development version on ${{ matrix.runner_tag }}
+  extract-python-versions:
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_extract_env_versions.yml@main
+    with:
+      library_name: python
+      env_file: environment.yml
+
+  TestPullCondaForgeDev:
+    name: Test development version on ${{ matrix.runner_tag }}
     strategy:
       fail-fast: false
       matrix:
@@ -17,17 +23,17 @@ jobs:
       examples_to_execute: "docs/source/examples_notebooks/quickstart.ipynb docs/source/examples_notebooks/clustering_methods.ipynb docs/source/examples_notebooks/optimization_input.ipynb docs/source/examples_notebooks/representations.ipynb docs/source/examples_notebooks/segmentation.ipynb docs/source/examples_notebooks/k_maxoids.ipynb docs/source/examples_notebooks/clustering_transfer.ipynb"
       multiprocessing_pytest_string: "-n auto"
       multiprocessing_example_string: "-n auto"
-  TestPushPyPIDev:
-    name: Test PyPi development version on ${{ matrix.runner_tag }}
+  TestPullPyPIDev:
+    name: Test PyPI development version on ${{ matrix.runner_tag }}
+    needs: extract-python-versions
     strategy:
       fail-fast: false
       matrix:
         runner_tag: ["self-hosted"]
-    uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test.yml@main
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test_uv.yml@main
     with:
       runner_tag: ${{ matrix.runner_tag }}
-      python_version: "3.13"
-      optional_dependency_PyPI_tag: "[develop]"
       examples_to_execute: "docs/source/examples_notebooks/quickstart.ipynb docs/source/examples_notebooks/clustering_methods.ipynb docs/source/examples_notebooks/optimization_input.ipynb docs/source/examples_notebooks/representations.ipynb docs/source/examples_notebooks/segmentation.ipynb docs/source/examples_notebooks/k_maxoids.ipynb docs/source/examples_notebooks/clustering_transfer.ipynb"
-      multiprocessing_pytest_string: "-n auto"
-      multiprocessing_example_string: "-n auto"
+      python_version: ${{ needs.extract-python-versions.outputs.max_version }}
+      optional_dependency_PyPI_tag: "[develop]"
+
diff --git a/.github/workflows/test_pull_dev.yml b/.github/workflows/test_pull_dev.yml
@@ -8,10 +8,13 @@ jobs:
     uses: FZJ-IEK3-VSA/.github/.github/workflows/_extract_env_versions.yml@main
     with:
       library_name: python
+      env_file: environment.yml
+      
 
   TestPullCondaForgeDev:
     name: Test development version on ${{ matrix.runner_tag }}
     needs: extract-python-versions
+    if: github.actor != 'renovate[bot]'
     strategy:
       fail-fast: false
       matrix:
@@ -30,25 +33,27 @@ jobs:
   TestPullPyPIDev:
     name: Test PyPI development version on ${{ matrix.runner_tag }}
     needs: extract-python-versions
+    if: github.actor != 'renovate[bot]'
     strategy:
       fail-fast: false
       matrix:
         runner_tag: ["ubuntu-latest", "macos-latest", "windows-latest"]
-    uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test.yml@main
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test_uv.yml@main
     with:
       runner_tag: ${{ matrix.runner_tag }}
       examples_to_execute: "docs/source/examples_notebooks/quickstart.ipynb docs/source/examples_notebooks/clustering_methods.ipynb docs/source/examples_notebooks/optimization_input.ipynb docs/source/examples_notebooks/representations.ipynb docs/source/examples_notebooks/segmentation.ipynb docs/source/examples_notebooks/k_maxoids.ipynb docs/source/examples_notebooks/clustering_transfer.ipynb"
       python_version: ${{ needs.extract-python-versions.outputs.max_version }}
       optional_dependency_PyPI_tag: "[develop]"
-      additional_conda_forge_dependencies: "glpk"
 
   extract-min-versions:
+    if: github.actor != 'renovate[bot]'
     uses: FZJ-IEK3-VSA/.github/.github/workflows/_extract_env_versions_matrix.yml@main
     with:
       libraries: "python,scikit-learn,pandas,numpy,pyomo,networkx,tqdm,highspy"
       version_type: min
 
   TestMinDependencies:
+    if: github.actor != 'renovate[bot]'
     needs: extract-min-versions
     strategy:
       fail-fast: false
diff --git a/.github/workflows/test_pull_request_master_min_max_versions.yml b/.github/workflows/test_pull_request_master_min_max_versions.yml
@@ -9,6 +9,7 @@ jobs:
         with:
             libraries: "scikit-learn,pandas,numpy,pyomo,networkx,tqdm,highspy"
             version_type: both
+            env_file: environment.yml
 
     TestDependencyVersions:
         name: Test ${{ matrix.dependencies.library_name }} ${{ matrix.dependencies.version }} (${{ matrix.dependencies.version_type }})
diff --git a/.github/workflows/test_pull_request_master_python_versions.yml b/.github/workflows/test_pull_request_master_python_versions.yml
@@ -8,6 +8,7 @@ jobs:
         uses: FZJ-IEK3-VSA/.github/.github/workflows/_extract_env_versions.yml@main
         with:
             library_name: python
+            env_file: environment.yml
 
     TestPushCondaForgeDev:
         name: Test conda-forge development version on ${{ matrix.runner_tag }}
@@ -44,7 +45,7 @@ jobs:
                         "windows-2022",
                     ]
                 python_version: ${{ fromJSON(needs.extract-python-versions.outputs.versions) }}
-        uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test.yml@main
+        uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_pypi_test_uv.yml@main
         with:
             runner_tag: ${{ matrix.runner_tag }}
             python_version: ${{ matrix.python_version }}
diff --git a/.github/workflows/test_pull_request_renovate_max_versions.yml b/.github/workflows/test_pull_request_renovate_max_versions.yml
@@ -0,0 +1,41 @@
+on:
+  pull_request:
+    branches: ["develop"]
+
+jobs:
+  detect-changes:
+    name: Detect changed dependencies
+    if: startsWith(github.head_ref, 'renovate/')
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_detect_renovate_changed_libraries.yml@main
+    with:
+      files: "environment.yml pyproject.toml"
+
+  extract-versions:
+    name: Extract max versions of changed libraries
+    needs: detect-changes
+    if: startsWith(github.head_ref, 'renovate/') && needs.detect-changes.outputs.libraries != ''
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_extract_env_versions_matrix.yml@main
+    with:
+      libraries: ${{ needs.detect-changes.outputs.libraries }}
+      version_type: max
+      env_file: environment.yml
+
+  TestRenovateMaxVersions:
+    name: Test ${{ matrix.dependencies.library_name }} ${{ matrix.dependencies.version }} (max)
+    needs: extract-versions
+    if: startsWith(github.head_ref, 'renovate/')
+    strategy:
+      fail-fast: false
+      matrix:
+        runner_tag: ["self-hosted"]
+        dependencies: ${{ fromJSON(needs.extract-versions.outputs.matrix) }}
+    uses: FZJ-IEK3-VSA/.github/.github/workflows/_run_single_conda_forge_test.yml@main
+    with:
+      runner_tag: ${{ matrix.runner_tag }}
+      requirements_file_name: environment.yml
+      examples_to_execute: "docs/source/examples_notebooks/quickstart.ipynb docs/source/examples_notebooks/clustering_methods.ipynb docs/source/examples_notebooks/optimization_input.ipynb docs/source/examples_notebooks/representations.ipynb docs/source/examples_notebooks/segmentation.ipynb docs/source/examples_notebooks/k_maxoids.ipynb docs/source/examples_notebooks/clustering_transfer.ipynb"
+      library_name: ${{ matrix.dependencies.library_name }}
+      library_version: ${{ matrix.dependencies.version }}
+      dependency_position_env_file: ${{ matrix.dependencies.yaml_position }}
+      multiprocessing_pytest_string: "-n auto"
+      multiprocessing_example_string: "-n auto"
diff --git a/README.md b/README.md
@@ -80,7 +80,6 @@ import pandas as pd
 import tsam
 ```
 
-
 Read in the time series data set with pandas
 ```python
 raw = pd.read_csv('testdata.csv', index_col=0, parse_dates=True)
diff --git a/environment.yml b/environment.yml
@@ -2,7 +2,7 @@ name: tsam_env
 channels:
   - conda-forge
 dependencies:
-  - python>=3.10,<3.15
+  - python>=3.10,<=3.14.3
   - pip
   # Core dependencies
   - scikit-learn >=1.3.0,<=1.8.0
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tsam"
-version = "3.1.1"
+version = "3.1.2"
 description = "Time series aggregation module (tsam) to create typical periods"
 authors = [
   { name = "Leander Kotzur", email = "leander.kotzur@googlemail.com" },
@@ -79,7 +79,17 @@ pythonpath = [
   "test",
 ] # Sets the path which should be prepended to pythonpath relative to the root folder
 console_output_style = "count"
-filterwarnings = ["ignore::tsam.exceptions.LegacyAPIWarning"]
+filterwarnings = [
+  "ignore::tsam.exceptions.LegacyAPIWarning",
+  # Third-party library warnings outside of tsam's control
+  "ignore::RuntimeWarning:threadpoolctl",
+  "ignore:KMeans is known to have a memory leak:UserWarning:sklearn",
+  "ignore::sklearn.exceptions.ConvergenceWarning",
+  # Expected tsam warnings raised during edge-case tests
+  "ignore:The cluster is too small:UserWarning:tsam",
+  "ignore:Segmentation is turned off:UserWarning:tsam",
+  "ignore:Max iteration number reached:UserWarning:tsam",
+]
 
 [tool.ruff]
 target-version = "py310"
@@ -115,7 +125,6 @@ ignore = [
   "RUF012", # mutable class attributes - these are constants in this codebase
   "RUF002", # ambiguous unicode characters in docstrings
   "RUF059", # Unpacked variable is never used
-  "UP038",  # use X | Y in isinstance (performance regression, see ruff issue)
 ]
 
 [tool.ruff.lint.isort]
diff --git a/src/tsam/api.py b/src/tsam/api.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import re
 import warnings
 from typing import cast
 
@@ -37,6 +38,8 @@ def _parse_duration_hours(value: int | float | str, param_name: str) -> float:
         return float(value)
     if isinstance(value, str):
         try:
+            # Normalize deprecated lowercase day alias: '1d' → '1D' (pandas 4+)
+            value = re.sub(r"(?<=[0-9])d(?![a-z])", "D", value)
             td = pd.Timedelta(value)
             return td.total_seconds() / 3600
         except ValueError as e:
diff --git a/test/same_cluster_as_input_data.py b/test/same_cluster_as_input_data.py
@@ -0,0 +1,76 @@
+import itertools
+
+import numpy as np
+import pandas as pd
+import pytest
+
+import tsam
+from tsam import ClusterConfig
+
+# All clustering methods (excluding "averaging" which does not cluster into n_clusters)
+_METHODS = ["kmeans", "kmedoids", "kmaxoids", "hierarchical", "contiguous"]
+
+# All representation methods
+_REPRESENTATIONS = ["mean", "medoid", "maxoid", "distribution", "distribution_minmax"]
+
+# Use duration curves when clustering by value distribution
+_DISTRIBUTION_REPS = {"distribution", "distribution_minmax"}
+
+_PARAMS = [
+    pytest.param(
+        method,
+        rep,
+        rep in _DISTRIBUTION_REPS,
+        id=f"{method}_{rep}",
+    )
+    for method, rep in itertools.product(_METHODS, _REPRESENTATIONS)
+]
+
+
+@pytest.fixture(scope="module")
+def input_data() -> pd.DataFrame:
+    costs = pd.DataFrame(
+        [
+            np.array([0.05, 0.0, 0.1, 0.051]),
+            np.array([0.0, 0.0, 0.0, 0.0]),
+        ],
+        index=["ElectrolyzerLocation", "IndustryLocation"],
+    ).T
+    revenues = pd.DataFrame(
+        [
+            np.array([0.0, 0.01, 0.0, 0.0]),
+            np.array([0.0, 0.0, 0.0, 0.0]),
+        ],
+        index=["ElectrolyzerLocationRevenue", "IndustryLocationRevenue"],
+    ).T
+
+    timeSeriesData = pd.concat([costs, revenues], axis=1)
+    timeSeriesData.index = pd.date_range(
+        "2050-01-01 00:30:00",
+        periods=4,
+        freq="1h",
+        tz="Europe/Berlin",
+    )
+    return timeSeriesData
+
+
+@pytest.mark.parametrize("method,representation,use_duration_curves", _PARAMS)
+def test_same_cluster_as_input_data(
+    input_data: pd.DataFrame,
+    method: str,
+    representation: str,
+    use_duration_curves: bool,
+) -> None:
+    """When n_clusters equals the number of input periods, reconstruction must
+    be identical to the original time series for every method/representation."""
+    results = tsam.aggregate(
+        input_data,
+        n_clusters=4,
+        period_duration=1,
+        cluster=ClusterConfig(
+            method=method,
+            representation=representation,
+            use_duration_curves=use_duration_curves,
+        ),
+    )
+    pd.testing.assert_frame_equal(results.reconstructed, input_data)
diff --git a/test/test_assert_raises.py b/test/test_assert_raises.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 import tsam.timeseriesaggregation as tsam
 from conftest import TESTDATA_CSV
@@ -112,13 +113,12 @@ def test_assert_raises():
     )
 
     # check warning when number of segments per period is higher than the number of time steps per period
-    np.testing.assert_warns(
-        Warning,
-        tsam.TimeSeriesAggregation,
-        timeSeries=raw,
-        segmentation=True,
-        noSegments=25,
-    )
+    with pytest.warns(Warning):
+        tsam.TimeSeriesAggregation(
+            timeSeries=raw,
+            segmentation=True,
+            noSegments=25,
+        )
 
     # check erroneous clusterMethod argument
     np.testing.assert_raises_regex(