Move valid_format to class method, pre-commit fixes

Nick Manganelli · Nick Manganelli · commit 2d20ada700c1 · 2025-11-09T21:49:02.000-06:00
diff --git a/src/coffea/dataset_tools/filespec.py b/src/coffea/dataset_tools/filespec.py
@@ -199,7 +199,9 @@ def num_selected_entries(self) -> int | None:
                 total += v.num_selected_entries
         return total
 
-    def limit_steps(self, max_steps: int | slice | None, per_file: bool = False) -> Self:
+    def limit_steps(
+        self, max_steps: int | slice | None, per_file: bool = False
+    ) -> Self:
         """Limit the steps. pass per_file=True to limit steps per file, otherwise limits across all files cumulatively"""
 
         if max_steps is None:
@@ -208,7 +210,10 @@ def limit_steps(self, max_steps: int | slice | None, per_file: bool = False) ->
             return type(self)({k: v.limit_steps(max_steps) for k, v in self.items()})
         else:
             from coffea.dataset_tools.manipulations import _concatenated_step_slice
-            steps_by_file = _concatenated_step_slice({k: v.steps for k, v in self.items()}, max_steps)
+
+            steps_by_file = _concatenated_step_slice(
+                {k: v.steps for k, v in self.items()}, max_steps
+            )
             new_dict = {}
             for k, v in self.items():
                 if len(steps_by_file[k]) > 0:
@@ -398,6 +403,12 @@ def _check_form(self) -> bool | None:
         else:
             return None
 
+    def _valid_format(self) -> bool:
+        _formats = {"root", "parquet"}
+        return self.format in _formats or all(
+            fmt in _formats for fmt in self.format.split("|")
+        )
+
     def set_check_format(self) -> bool:
         """Set and/or alidate the format if manually specified"""
         if self.format is None:
@@ -411,9 +422,7 @@ def set_check_format(self) -> bool:
                 self.format = "|".join(union)
 
         # validate the format, if present
-        if not ModelFactory.valid_format(self.format):
-            return False
-        return True
+        return self._valid_format()
 
     @model_validator(mode="after")
     def post_validate(self) -> Self:
@@ -523,17 +532,17 @@ def steps(self) -> dict[str, list[StepPair]] | None:
         """Get the steps per dataset file, if available."""
         return {k: v.steps for k, v in self.items()}
 
-    def limit_steps(
-        self, max_steps: int | slice, per_file: bool = False
-    ) -> Self:
+    def limit_steps(self, max_steps: int | slice, per_file: bool = False) -> Self:
         """Limit the steps"""
         spec = copy.deepcopy(self)
         # handle both per_file True and False by passthrough
         for k, v in spec.items():
             spec[k] = v.limit_steps(max_steps, per_file=per_file)
         return type(self)(spec)
 
-    def limit_files(self, max_files: int | slice | None, per_dataset: bool = True) -> Self:
+    def limit_files(
+        self, max_files: int | slice | None, per_dataset: bool = True
+    ) -> Self:
         """Limit the number of files."""
         spec = copy.deepcopy(self)
         if per_dataset:
@@ -595,21 +604,9 @@ def identify_file_format(name_or_directory: str) -> str:
 
 
 class ModelFactory:
-    _formats = {"root", "parquet"}
-
     def __init__(self):
         pass
 
-    @classmethod
-    def valid_format(cls, format: str | DatasetSpec) -> bool:
-        if isinstance(format, DatasetSpec):
-            test_format = format.format
-        else:
-            test_format = format
-        return test_format in cls._formats or all(
-            fmt in cls._formats for fmt in test_format.split("|")
-        )
-
     @classmethod
     def attempt_promotion(
         cls,
@@ -694,7 +691,7 @@ def dict_to_datasetspec(cls, input: dict[str, Any], verbose=False) -> DatasetSpe
     def datasetspec_to_dict(
         cls,
         input: DatasetSpec,
-        coerce_filespec_to_dict=True,
+        coerce_filespec_to_dict: bool = True,
     ) -> dict[str, Any]:
         assert isinstance(
             input, DatasetSpec
diff --git a/src/coffea/dataset_tools/manipulations.py b/src/coffea/dataset_tools/manipulations.py
@@ -2,6 +2,7 @@
 
 import copy
 from typing import Any, Callable, Protocol, runtime_checkable
+
 try:
     from typing import Self
 except ImportError:
@@ -19,21 +20,22 @@
     PreprocessedFiles,
 )
 
+
 # protocol for pydantic types that implement limit_files
 @runtime_checkable
 class LimitFilesProtocol(Protocol):
     # handle both limit_files with max_files and max_files + per_dataset
-    def limit_files(self, max_files: int | slice, per_dataset: bool = True) -> Self:
-        ...
+    def limit_files(self, max_files: int | slice, per_dataset: bool = True) -> Self: ...
 @runtime_checkable
 class LimitStepsProtocol(Protocol):
     def limit_steps(
         self, max_steps: int | slice, per_file: bool = False, per_dataset: bool = True
-    ) -> Self:
-        ...
+    ) -> Self: ...
 
 
-def max_chunks(fileset: LimitStepsProtocol | FilesetSpec, maxchunks: int | None = None) -> FilesetSpec:
+def max_chunks(
+    fileset: LimitStepsProtocol | FilesetSpec, maxchunks: int | None = None
+) -> FilesetSpec:
     """
     Modify the input fileset so that only the first "maxchunks" chunks of each dataset will be processed.
 
@@ -72,7 +74,10 @@ def max_chunks_per_file(
     """
     return slice_chunks(fileset, slice(maxchunks), bydataset=False)
 
-def _concatenated_step_slice(stepdict: dict[str, Any], theslice: int | slice) -> dict[str, Any]:
+
+def _concatenated_step_slice(
+    stepdict: dict[str, Any], theslice: int | slice
+) -> dict[str, Any]:
     """
     Modify the input step description to only contain the steps specified by the input slice.
 
@@ -104,11 +109,13 @@ def _concatenated_step_slice(stepdict: dict[str, Any], theslice: int | slice) ->
     # 3) repopulate in order, up to maxchunks total
     for key, step in kept:
         out[key].append(step)
-    return out # {key: steps for key, steps in out.items() if steps}
+    return out  # {key: steps for key, steps in out.items() if steps}
 
 
 def slice_chunks(
-    fileset: LimitStepsProtocol | FilesetSpec, theslice: Any = slice(None), bydataset: bool = True
+    fileset: LimitStepsProtocol | FilesetSpec,
+    theslice: Any = slice(None),
+    bydataset: bool = True,
 ) -> FilesetSpec:
     """
     Modify the input fileset so that only the chunks of each file or each dataset specified by the input slice are processed.
diff --git a/tests/test_dataset_tools_filespec.py b/tests/test_dataset_tools_filespec.py
@@ -998,13 +998,6 @@ def test_invalid_form(self):
 
 class TestModelFactory:
     """Test ModelFactory class methods"""
-
-    def test_valid_format(self):
-        """Test valid_format method"""
-        assert ModelFactory.valid_format("root") is True
-        assert ModelFactory.valid_format("parquet") is True
-        assert ModelFactory.valid_format("invalid") is False
-
     @pytest.mark.parametrize(
         "input_dict",
         [
@@ -1395,9 +1388,7 @@ def test_limit_steps_per_file_slicing(self):
     def test_limit_steps_method_chain_slicing(self):
         """Test limit_steps with slicing"""
         spec = self.get_sliceable_spec()
-        limited_spec = spec.limit_steps(1, per_file=True).limit_steps(
-            1
-        )
+        limited_spec = spec.limit_steps(1, per_file=True).limit_steps(1)
         assert limited_spec.steps == {
             "ZJets1": {
                 "tests/samples/nano_dy.root": [[0, 5]],