emdgroup · dasmy · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · anwurl
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -12,7 +12,7 @@ on:
   workflow_dispatch:
 
 concurrency:
-  group: ${{ github.workflow }}
+  group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
 jobs:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -51,7 +51,7 @@ repos:
     rev: "v1.19.1"
     hooks:
       - id: mypy
-        exclude: ^datasets/|examples/|tests/|studies/
+        exclude: ^datasets/|studies/
         additional_dependencies:
           - types-networkx
           - pandas-stubs

diff --git a/examples/basic_classification.py b/examples/basic_classification.py
@@ -9,12 +9,13 @@
 import os
 
 from sklearn.datasets import load_breast_cancer
+from sklearn.utils import Bunch
 
 from octopus.modules import Octo
 from octopus.study import OctoClassification
 
 ### Load and Preprocess Data
-breast_cancer = load_breast_cancer(as_frame=True)
+breast_cancer: Bunch = load_breast_cancer(as_frame=True)  # type: ignore[assignment]
 
 df = breast_cancer["frame"].reset_index()
 df.columns = df.columns.str.replace(" ", "_")

diff --git a/examples/basic_regression.py b/examples/basic_regression.py
@@ -9,11 +9,12 @@
 import os
 
 from sklearn.datasets import load_diabetes
+from sklearn.utils import Bunch
 
 from octopus.study import OctoRegression
 
 ### Load the diabetes dataset
-diabetes = load_diabetes(as_frame=True)
+diabetes: Bunch = load_diabetes(as_frame=True)  # type: ignore[assignment]
 
 ### Create and run OctoRegression
 study = OctoRegression(

diff --git a/examples/multi_workflow.py b/examples/multi_workflow.py
@@ -7,12 +7,13 @@
 import os
 
 from sklearn.datasets import load_diabetes
+from sklearn.utils import Bunch
 
 from octopus.modules import Mrmr, Octo
 from octopus.study import OctoRegression
 
 ### Load the diabetes dataset
-diabetes = load_diabetes(as_frame=True)
+diabetes: Bunch = load_diabetes(as_frame=True)  # type: ignore[assignment]
 
 ### Create and run OctoRegression with multi-step workflow
 study = OctoRegression(

diff --git a/examples/use_own_hyperparameters.py b/examples/use_own_hyperparameters.py
@@ -9,13 +9,14 @@
 import os
 
 from sklearn.datasets import load_diabetes
+from sklearn.utils import Bunch
 
 from octopus.models.hyperparameter import IntHyperparameter
 from octopus.modules import Octo
 from octopus.study import OctoRegression
 
 ### Load the diabetes dataset
-diabetes = load_diabetes(as_frame=True)
+diabetes: Bunch = load_diabetes(as_frame=True)  # type: ignore[assignment]
 
 ### Create and run OctoRegression with custom hyperparameters
 study = OctoRegression(

diff --git a/examples/wf_multiclass_wine.py b/examples/wf_multiclass_wine.py
@@ -11,12 +11,13 @@
 import os
 
 from sklearn.datasets import load_wine
+from sklearn.utils import Bunch
 
 from octopus.modules import Octo
 from octopus.study import OctoClassification
 
 ### Load and Preprocess Data
-wine = load_wine(as_frame=True)
+wine: Bunch = load_wine(as_frame=True)  # type: ignore[assignment]
 
 df = wine["frame"].reset_index()
 df.columns = df.columns.str.replace(" ", "_")

diff --git a/examples/wf_roc_octo.py b/examples/wf_roc_octo.py
@@ -9,6 +9,7 @@
 import os
 
 from sklearn.datasets import load_breast_cancer
+from sklearn.utils import Bunch
 
 from octopus.modules import Octo, Roc
 from octopus.study import OctoClassification
@@ -19,7 +20,7 @@
 # This is a binary classification dataset with 30 features
 # Target: 0 = malignant, 1 = benign
 
-breast_cancer = load_breast_cancer(as_frame=True)
+breast_cancer: Bunch = load_breast_cancer(as_frame=True)  # type: ignore[assignment]
 
 df = breast_cancer["frame"].reset_index()
 df.columns = df.columns.str.replace(" ", "_")

diff --git a/octopus/manager/core.py b/octopus/manager/core.py
@@ -2,6 +2,7 @@
 
 import math
 import os
+from collections.abc import Sequence
 
 from attrs import define, field, validators
 
@@ -133,8 +134,8 @@ class OctoManager:
     study_context: StudyContext = field(validator=[validators.instance_of(StudyContext)])
     """Frozen runtime context containing study configuration."""
 
-    workflow: list[Task] = field(validator=[validators.instance_of(list)])
-    """List of workflow tasks to execute."""
+    workflow: Sequence[Task] = field(validator=[validators.instance_of(list)])
+    """Workflow tasks to execute."""
 
     outer_parallelization: bool = field(validator=[validators.instance_of(bool)])
     """Whether to run outersplits in parallel."""

diff --git a/octopus/manager/workflow_runner.py b/octopus/manager/workflow_runner.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import json
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import ray
@@ -14,6 +15,9 @@
 from octopus.modules import ModuleResult, ResultType, StudyContext, Task
 from octopus.utils import calculate_feature_groups, parquet_save
 
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
 logger = get_logger()
 
 
@@ -33,7 +37,7 @@ class WorkflowTaskRunner:
     """
 
     study_context: StudyContext = field(validator=[validators.instance_of(StudyContext)])
-    workflow: list[Task] = field(validator=[validators.instance_of(list)])
+    workflow: Sequence[Task] = field(validator=[validators.instance_of(list)])
     cpus_per_outersplit: int = field(validator=[validators.instance_of(int)])
 
     def run(self, outersplit_id: int, outersplit: OuterSplit) -> None:

diff --git a/octopus/models/core.py b/octopus/models/core.py
@@ -45,6 +45,11 @@ def decorator(factory: Callable[[], ModelConfig]) -> Callable[[], ModelConfig]:
 
         return decorator
 
+    @classmethod
+    def get_registered_models(cls) -> list[ModelName]:
+        """Get a list of all registered model names."""
+        return [ModelName(name) for name in cls._config_factories]
+
     @classmethod
     def get_config(cls, name: ModelName) -> ModelConfig:
         """Get model configuration by name.
@@ -185,6 +190,5 @@ def validate_model_compatibility(cls, model_name: ModelName, ml_type: MLType) ->
         config = cls.get_config(model_name)
         if not config.supports_ml_type(ml_type):
             raise ValueError(
-                f"Model '{model_name}' does not support ml_type '{ml_type.value}'. "
-                f"Supported types: {', '.join(t.value for t in config.ml_types)}"
+                f"Model '{model_name}' does not support ml_type '{ml_type.value}'. Supported types: {', '.join(t.value for t in config.ml_types)}"
             )
diff --git a/octopus/modules/octo/enssel.py b/octopus/modules/octo/enssel.py
@@ -1,6 +1,6 @@
 """Ensemble selection."""
 
-# TOBEDONE
+# TODO
 # - issue: ACC and BALACC need integer pooling values!
 # - potential issue: check start_n, +1 or not
 # - get FI and counts

diff --git a/octopus/modules/octo/training.py b/octopus/modules/octo/training.py
@@ -24,15 +24,15 @@
 from octopus.models import ModelName, Models
 from octopus.types import MLType
 
-# # TOBEDONE pipeline
+# # TODO pipeline
 # - implement cat encoding on module level
 # - how to provide categorical info to catboost and other models?
 
 
 logger = get_logger()
 
 
-class TrainingConfig(TypedDict):
+class TrainingConfig(TypedDict, total=False):
     """Training configuration type."""
 
     outl_reduction: int

diff --git a/octopus/study/core.py b/octopus/study/core.py
@@ -5,6 +5,7 @@
 import os
 import platform
 from abc import ABC, abstractmethod
+from collections.abc import Sequence
 from datetime import UTC
 
 import pandas as pd
@@ -77,7 +78,7 @@ class OctoStudy(ABC):
     run_single_outersplit_num: int = field(default=Factory(lambda: -1), validator=[validators.instance_of(int)])
     """Select a single outersplit to execute. Defaults to -1 to run all outersplits"""
 
-    workflow: list[Task] = field(
+    workflow: Sequence[Task] = field(
         default=Factory(lambda: [Octo(task_id=0)]),
         validator=[validators.instance_of(list), validate_workflow],
     )

diff --git a/octopus/study/validation.py b/octopus/study/validation.py
@@ -1,5 +1,6 @@
 """Validation functions for OctoStudy attributes."""
 
+from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
 from attrs import Attribute
@@ -10,7 +11,7 @@
     from octopus.study.core import OctoStudy
 
 
-def validate_workflow(_instance: "OctoStudy", attribute: Attribute, value: list[Task]) -> None:
+def validate_workflow(_instance: "OctoStudy", attribute: Attribute, value: Sequence[Task]) -> None:
     """Validate the `workflow` attribute.
 
     Ensures that the value is a non-empty list where each item is an
@@ -109,9 +110,7 @@ def validate_workflow(_instance: "OctoStudy", attribute: Attribute, value: list[
         if depends_on is not None:
             if depends_on not in task_id_to_index:
                 raise ValueError(
-                    f"Item '{item.description}' (position {idx + 1}) has "
-                    f"'depends_on={depends_on}', which does not"
-                    " correspond to any 'task_id' in the workflow."
+                    f"Item '{item.description}' (position {idx + 1}) has 'depends_on={depends_on}', which does not correspond to any 'task_id' in the workflow."
                 )
             depends_on_idx = task_id_to_index[depends_on]
             if depends_on_idx >= idx:

diff --git a/pyproject.toml b/pyproject.toml
@@ -171,7 +171,7 @@ show_error_codes = true
 no_implicit_optional = true
 warn_return_any = true
 warn_unused_ignores = true
-exclude = ["examples", "docs", "tests", "studies"]
+exclude = ["docs", "studies"]
 
 
 [[tool.mypy.overrides]]

diff --git a/tests/infrastructure/test_file_io.py b/tests/infrastructure/test_file_io.py
@@ -48,7 +48,7 @@
 def test_parquet_dtype_roundtrip(tmp_path, data, dtype):
     """Test that saving and loading a DataFrame with parquet_save and parquet_load works correctly."""
     if dtype == "CategoricalDtype":
-        dtype = pd.CategoricalDtype(set(data))
+        dtype = pd.CategoricalDtype(sorted(set(data)))
     elif dtype == "StringDtype":
         dtype = pd.StringDtype()
 

diff --git a/tests/manager/test_core.py b/tests/manager/test_core.py
@@ -133,7 +133,7 @@ def test_frozen(self):
             num_outersplits=4,
         )
         with pytest.raises(attrs.exceptions.FrozenInstanceError):
-            config.num_cpus = 8
+            config.num_cpus = 8  # type: ignore[misc]
 
     def test_create_single_outersplit_gets_all_cpus(self):
         """Test that when running a single outersplit, it gets all CPUs.

diff --git a/tests/metrics/test_metrics_uniqueness.py b/tests/metrics/test_metrics_uniqueness.py
@@ -9,6 +9,7 @@
 import pytest
 
 from octopus.metrics import Metrics
+from octopus.metrics.config import Metric
 from octopus.types import ML_TYPES, MLType
 
 
@@ -28,8 +29,7 @@ def test_registry_keys_are_unique(self):
         unique_keys = set(registry_keys)
 
         assert len(registry_keys) == len(unique_keys), (
-            f"Registry keys are not unique. Found {len(registry_keys)} keys but only "
-            f"{len(unique_keys)} unique keys. Keys: {sorted(registry_keys)}"
+            f"Registry keys are not unique. Found {len(registry_keys)} keys but only {len(unique_keys)} unique keys. Keys: {sorted(registry_keys)}"
         )
 
     def test_metric_config_names_are_unique(self):
@@ -38,19 +38,14 @@ def test_metric_config_names_are_unique(self):
         This is critical for the utils functions that deduce ml_type from metrics.
         """
         config_names = []
-        config_name_to_registry_key = {}
+        config_name_to_registry_key = defaultdict(list)
 
         for registry_key in self.all_metrics:
             try:
                 config = Metrics.get_instance(registry_key)
                 config_name = config.name
                 config_names.append(config_name)
-
-                if config_name in config_name_to_registry_key:
-                    config_name_to_registry_key[config_name].append(registry_key)
-                else:
-                    config_name_to_registry_key[config_name] = [registry_key]
-
+                config_name_to_registry_key[config_name].append(registry_key)
             except Exception as e:
                 pytest.fail(f"Failed to get config for metric '{registry_key}': {e}")
 
@@ -120,9 +115,9 @@ def test_all_metrics_have_valid_ml_types(self):
 
         # Print distribution for documentation
         print("\n=== ML Type Distribution ===")
-        for ml_type in sorted(ml_type_distribution):
-            metrics = sorted(ml_type_distribution[ml_type])
-            print(f"{ml_type} ({len(metrics)}): {metrics}")
+        for ml_type_s in sorted(ml_type_distribution):
+            metrics = sorted(ml_type_distribution[ml_type_s])
+            print(f"{ml_type_s} ({len(metrics)}): {metrics}")
 
     def test_all_metrics_have_valid_prediction_types(self):
         """Test that all metrics have valid prediction_type values."""
@@ -164,14 +159,12 @@ def test_metrics_loaded_dynamically(self):
         missing_types = expected_ml_types - ml_types
 
         assert not missing_types, (
-            f"Missing expected ML types: {missing_types}. "
-            f"Found ML types: {sorted(ml_types)}. "
-            f"This suggests some metric modules may not be imported properly."
+            f"Missing expected ML types: {missing_types}. Found ML types: {sorted(ml_types)}. This suggests some metric modules may not be imported properly."
         )
 
     def test_no_metric_config_attribute_conflicts(self):
         """Test that metric configs don't have conflicting attributes for same names."""
-        configs_by_name = {}
+        configs_by_name: dict[str, Metric] = {}
         conflicts = []
 
         for registry_key in self.all_metrics:
@@ -190,14 +183,12 @@ def test_no_metric_config_attribute_conflicts(self):
 
                     if existing_config.prediction_type != config.prediction_type:
                         conflicts.append(
-                            f"'{config_name}': prediction_type conflict - "
-                            f"'{existing_config.prediction_type}' vs '{config.prediction_type}'"
+                            f"'{config_name}': prediction_type conflict - '{existing_config.prediction_type}' vs '{config.prediction_type}'"
                         )
 
                     if existing_config.higher_is_better != config.higher_is_better:
                         conflicts.append(
-                            f"'{config_name}': higher_is_better conflict - "
-                            f"'{existing_config.higher_is_better}' vs '{config.higher_is_better}'"
+                            f"'{config_name}': higher_is_better conflict - '{existing_config.higher_is_better}' vs '{config.higher_is_better}'"
                         )
                 else:
                     configs_by_name[config_name] = config