chore: use enums for tag match mode instead of str

ryanseq-gyg · ryanseq-gyg · commit a19126a3e449 · 2025-11-22T14:13:01.000+01:00
diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py
@@ -14,6 +14,7 @@
     SuiteExecutionResult,
     serialize_violations,
 )
+from dataframe_expectations.core.types import TagMatchMode
 from dataframe_expectations.suite import (
     DataFrameExpectationsSuite,
     DataFrameExpectationsSuiteRunner,
@@ -27,4 +28,5 @@
     "DataFrameExpectationsSuite",
     "DataFrameExpectationsSuiteRunner",
     "DataFrameExpectationsSuiteFailure",
+    "TagMatchMode",
 ]
diff --git a/dataframe_expectations/core/suite_result.py b/dataframe_expectations/core/suite_result.py
@@ -1,11 +1,11 @@
 """Suite execution result models for capturing validation outcomes."""
 
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, Field, computed_field
 
-from dataframe_expectations.core.types import DataFrameType, DataFrameLike
+from dataframe_expectations.core.types import DataFrameType, DataFrameLike, TagMatchMode
 from dataframe_expectations.core.tagging import TagSet
 import logging
 
@@ -44,7 +44,7 @@ class ExpectationResult(BaseModel):
         description="Sample of violations as list of dicts (limited by violation_sample_limit)",
     )
 
-    model_config = {"frozen": True, "arbitrary_types_allowed": True}  # Make immutable, allow TagSet
+    model_config = {"frozen": True}  # Make immutable
 
 
 class SuiteExecutionResult(BaseModel):
@@ -60,8 +60,9 @@ class SuiteExecutionResult(BaseModel):
     applied_filters: TagSet = Field(
         default_factory=TagSet, description="Tag filters that were applied to select expectations"
     )
-    tag_match_mode: Optional[Literal["any", "all"]] = Field(
-        default=None, description="How tags were matched: 'any' (OR) or 'all' (AND)"
+    tag_match_mode: Optional[TagMatchMode] = Field(
+        default=None,
+        description="How tags were matched: TagMatchMode.ANY (OR) or TagMatchMode.ALL (AND)",
     )
     results: List[ExpectationResult] = Field(
         ..., description="Results for each expectation in execution order (including skipped)"
@@ -74,7 +75,7 @@ class SuiteExecutionResult(BaseModel):
         default=False, description="Whether PySpark dataframe was cached during execution"
     )
 
-    model_config = {"frozen": True, "arbitrary_types_allowed": True}  # Make immutable, allow TagSet
+    model_config = {"frozen": True}  # Make immutable
 
     @computed_field  # type: ignore[misc]
     @property
diff --git a/dataframe_expectations/core/tagging.py b/dataframe_expectations/core/tagging.py
@@ -8,8 +8,10 @@
 
 from typing import Dict, List, Optional, Set
 
+from pydantic import BaseModel, ConfigDict
 
-class TagSet:
+
+class TagSet(BaseModel):
     """
     Collection of tags organized by key, supporting multiple values per key.
 
@@ -19,7 +21,11 @@ class TagSet:
     Tags are specified as strings in "key:value" format.
     """
 
-    def __init__(self, tags: Optional[List[str]] = None):
+    tags: Dict[str, Set[str]] = {}
+
+    model_config = ConfigDict(frozen=True)  # Make immutable
+
+    def __init__(self, tags: Optional[List[str]] = None, **data):
         """
         Initialize TagSet from a list of tag strings.
 
@@ -30,17 +36,22 @@ def __init__(self, tags: Optional[List[str]] = None):
             >>> TagSet(["priority:high", "env:test"])
             >>> TagSet(["priority:high", "priority:medium"])  # Multiple values for same key
         """
-        self._tags: Dict[str, Set[str]] = {}
-
-        if tags:
+        # Parse tags if provided as list
+        if tags is not None:
+            parsed_tags: Dict[str, Set[str]] = {}
             for tag_string in tags:
-                self._add_tag_string(tag_string)
+                TagSet._parse_and_add_tag(tag_string, parsed_tags)
+            data["tags"] = parsed_tags
+
+        super().__init__(**data)
 
-    def _add_tag_string(self, tag_string: str) -> None:
+    @staticmethod
+    def _parse_and_add_tag(tag_string: str, tags_dict: Dict[str, Set[str]]) -> None:
         """
-        Parse and add a tag string in "key:value" format.
+        Parse and add a tag string to the provided dictionary.
 
         :param tag_string: Tag string to parse
+        :param tags_dict: Dictionary to add parsed tag to
         :raises ValueError: If format is invalid
         """
         tag_string = tag_string.strip()
@@ -59,9 +70,9 @@ def _add_tag_string(self, tag_string: str) -> None:
         if not key or not value:
             raise ValueError("Tag key and value must be non-empty strings")
 
-        if key not in self._tags:
-            self._tags[key] = set()
-        self._tags[key].add(value)
+        if key not in tags_dict:
+            tags_dict[key] = set()
+        tags_dict[key].add(value)
 
     def has_any_tag_from(self, other: TagSet) -> bool:
         """
@@ -84,14 +95,14 @@ def has_any_tag_from(self, other: TagSet) -> bool:
             other = TagSet(["priority:medium", "env:test"])
             self.has_any_tag_from(other) -> True (env:test matches)
         """
-        if not other._tags:
+        if not other.tags:
             return True  # Empty filter matches everything
 
         # OR logic: any key with overlapping values
-        for key, required_values in other._tags.items():
-            if key in self._tags:
+        for key, required_values in other.tags.items():
+            if key in self.tags:
                 # Check if there's any overlap between required values and our values
-                if required_values & self._tags[key]:
+                if required_values & self.tags[key]:
                     return True
 
         return False
@@ -117,38 +128,38 @@ def has_all_tags_from(self, other: TagSet) -> bool:
             other = TagSet(["priority:high", "env:prod"])
             self.has_all_tags_from(other) -> False (env:prod doesn't match)
         """
-        if not other._tags:
+        if not other.tags:
             return True  # Empty filter matches everything
 
         # AND logic: all keys must have ALL required values present
-        for key, required_values in other._tags.items():
-            if key not in self._tags:
+        for key, required_values in other.tags.items():
+            if key not in self.tags:
                 return False
             # Check if ALL required values are present in our values
-            if not required_values.issubset(self._tags[key]):
+            if not required_values.issubset(self.tags[key]):
                 return False
 
         return True
 
     def is_empty(self) -> bool:
         """Check if TagSet has no tags."""
-        return len(self._tags) == 0
+        return len(self.tags) == 0
 
     def __len__(self) -> int:
         """Return total number of unique tags (key:value pairs)."""
-        return sum(len(values) for values in self._tags.values())
+        return sum(len(values) for values in self.tags.values())
 
     def __bool__(self) -> bool:
         """Return True if TagSet has any tags."""
-        return bool(self._tags)
+        return bool(self.tags)
 
     def __str__(self) -> str:
         """String representation showing all tags."""
-        tags = []
-        for key in sorted(self._tags.keys()):
-            for value in sorted(self._tags[key]):
-                tags.append(f"{key}:{value}")
-        return f"TagSet({', '.join(tags)})" if tags else "TagSet(empty)"
+        tag_list = []
+        for key in sorted(self.tags.keys()):
+            for value in sorted(self.tags[key]):
+                tag_list.append(f"{key}:{value}")
+        return f"TagSet({', '.join(tag_list)})" if tag_list else "TagSet(empty)"
 
     def __repr__(self) -> str:
         return self.__str__()
diff --git a/dataframe_expectations/core/types.py b/dataframe_expectations/core/types.py
@@ -18,6 +18,13 @@ class DataFrameType(str, Enum):
     PYSPARK = "pyspark"
 
 
+class TagMatchMode(str, Enum):
+    """Enum for tag matching modes."""
+
+    ANY = "any"  # OR logic: expectation matches if it has ANY of the filter tags
+    ALL = "all"  # AND logic: expectation matches if it has ALL of the filter tags
+
+
 class ExpectationCategory(str, Enum):
     """Categories for expectations."""
 
diff --git a/dataframe_expectations/suite.py b/dataframe_expectations/suite.py
@@ -1,7 +1,7 @@
 from functools import wraps
-from typing import Any, Callable, Dict, List, Literal, Optional, cast
+from typing import Any, Callable, Dict, List, Optional, cast
 
-from dataframe_expectations.core.types import DataFrameLike
+from dataframe_expectations.core.types import DataFrameLike, TagMatchMode
 from dataframe_expectations.core.tagging import TagSet
 from dataframe_expectations.registry import (
     DataFrameExpectationRegistry,
@@ -62,35 +62,32 @@ class DataFrameExpectationsSuiteRunner:
     def _matches_tag_filter(
         expectation: Any,
         filter_tag_set: TagSet,
-        tag_match_mode: Literal["any", "all"],
+        tag_match_mode: TagMatchMode,
     ) -> bool:
         """
         Check if an expectation matches the tag filter criteria.
 
         :param expectation: Expectation instance to check.
         :param filter_tag_set: Tag filter to match against.
-        :param tag_match_mode: Match mode - "any" (OR) or "all" (AND).
+        :param tag_match_mode: Match mode - TagMatchMode.ANY (OR) or TagMatchMode.ALL (AND).
         :return: True if expectation matches filter, False otherwise.
-        :raises ValueError: If tag_match_mode is invalid.
         """
         exp_tag_set = expectation.get_tags()
 
         # Check if expectation matches filter
         match tag_match_mode:
-            case "any":
+            case TagMatchMode.ANY:
                 return exp_tag_set.has_any_tag_from(filter_tag_set)
-            case "all":
+            case TagMatchMode.ALL:
                 return exp_tag_set.has_all_tags_from(filter_tag_set)
-            case _:
-                raise ValueError(f"Invalid tag_match_mode: {tag_match_mode}")
 
     def __init__(
         self,
         expectations: List[Any],
         suite_name: Optional[str] = None,
         violation_sample_limit: int = 5,
         tags: Optional[List[str]] = None,
-        tag_match_mode: Optional[Literal["any", "all"]] = None,
+        tag_match_mode: Optional[TagMatchMode] = None,
     ):
         """
         Initialize the runner with a list of expectations and metadata.
@@ -101,10 +98,10 @@ def __init__(
         :param tags: Optional tag filters as list of strings in "key:value" format.
                     Example: ["priority:high", "priority:medium"]
                     If None or empty, all expectations will run.
-        :param tag_match_mode: How to match tags - "any" (OR logic) or "all" (AND logic).
+        :param tag_match_mode: How to match tags - TagMatchMode.ANY (OR logic) or TagMatchMode.ALL (AND logic).
                               Required if tags are provided, must be None if tags are not provided.
-                              - "any": Expectation matches if it has ANY of the filter tags
-                              - "all": Expectation matches if it has ALL of the filter tags
+                              - TagMatchMode.ANY: Expectation matches if it has ANY of the filter tags
+                              - TagMatchMode.ALL: Expectation matches if it has ALL of the filter tags
         :raises ValueError: If tag_match_mode is provided without tags, or if tags are provided without tag_match_mode,
                            or if tag filters result in zero expectations to run.
         """
@@ -122,15 +119,21 @@ def __init__(
 
         if not self.__filter_tag_set.is_empty() and tag_match_mode is None:
             raise ValueError(
-                "tag_match_mode must be specified ('any' or 'all') when tags are provided."
+                "tag_match_mode must be specified (TagMatchMode.ANY or TagMatchMode.ALL) when tags are provided."
             )
 
         self.__tag_match_mode = tag_match_mode
 
         # Filter expectations based on tags and track skipped ones
         if not self.__filter_tag_set.is_empty():
             # At this point, validation ensures tag_match_mode is not None
-            assert tag_match_mode is not None
+            # This check is for type narrowing (mypy/pyright)
+            if tag_match_mode is None:
+                # This should never happen due to validation above, but satisfies type checker
+                raise ValueError(
+                    "tag_match_mode must be specified (TagMatchMode.ANY or TagMatchMode.ALL) when tags are provided."
+                )
+
             filtered = []
             skipped = []
             for exp in self.__all_expectations:
@@ -201,7 +204,7 @@ def run(
         data_frame: DataFrameLike,
         raise_on_failure: bool = True,
         context: Optional[Dict[str, Any]] = None,
-    ) -> Optional[SuiteExecutionResult]:
+    ) -> SuiteExecutionResult:
         """
         Run all expectations on the provided DataFrame with PySpark caching optimization.
 
@@ -458,11 +461,11 @@ class DataFrameExpectationsSuite:
         runner_all.run(df)  # Runs all 3 expectations
 
         # Build runner for high OR medium priority expectations (OR logic)
-        runner_any = suite.build(tags=["priority:high", "priority:medium"], tag_match_mode="any")
+        runner_any = suite.build(tags=["priority:high", "priority:medium"], tag_match_mode=TagMatchMode.ANY)
         runner_any.run(df)  # Runs 2 expectations (age and salary checks)
 
         # Build runner for expectations with both high priority AND compliance category (AND logic)
-        runner_and = suite.build(tags=["priority:high", "category:compliance"], tag_match_mode="all")
+        runner_and = suite.build(tags=["priority:high", "category:compliance"], tag_match_mode=TagMatchMode.ALL)
         runner_and.run(df)  # Runs 1 expectation (age check - has both tags)
     """
 
@@ -530,7 +533,7 @@ def dynamic_method(tags: Optional[List[str]] = None, **kwargs):
     def build(
         self,
         tags: Optional[List[str]] = None,
-        tag_match_mode: Optional[Literal["any", "all"]] = None,
+        tag_match_mode: Optional[TagMatchMode] = None,
     ) -> DataFrameExpectationsSuiteRunner:
         """
         Build an immutable runner from the current expectations.
@@ -542,10 +545,10 @@ def build(
         :param tags: Optional tag filters as list of strings in "key:value" format.
                     Example: ["priority:high", "priority:medium"]
                     If None or empty, all expectations will be included.
-        :param tag_match_mode: How to match tags - "any" (OR logic) or "all" (AND logic).
+        :param tag_match_mode: How to match tags - TagMatchMode.ANY (OR logic) or TagMatchMode.ALL (AND logic).
                               Required if tags are provided, must be None if tags are not provided.
-                              - "any": Include expectations with ANY of the filter tags
-                              - "all": Include expectations with ALL of the filter tags
+                              - TagMatchMode.ANY: Include expectations with ANY of the filter tags
+                              - TagMatchMode.ALL: Include expectations with ALL of the filter tags
         :return: An immutable DataFrameExpectationsSuiteRunner instance.
         :raises ValueError: If no expectations have been added, if tag_match_mode validation fails,
                            or if no expectations match the tag filters.
diff --git a/dataframe_expectations/suite.pyi b/dataframe_expectations/suite.pyi
diff --git a/tests/base/test_tagging.py b/tests/base/test_tagging.py

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,7 @@`
`14`	`14`	`SuiteExecutionResult,`
`15`	`15`	`serialize_violations,`
`16`	`16`	`)`
	`17`	`+from dataframe_expectations.core.types import TagMatchMode`
`17`	`18`	`from dataframe_expectations.suite import (`
`18`	`19`	`DataFrameExpectationsSuite,`
`19`	`20`	`DataFrameExpectationsSuiteRunner,`
`@@ -27,4 +28,5 @@`
`27`	`28`	`"DataFrameExpectationsSuite",`
`28`	`29`	`"DataFrameExpectationsSuiteRunner",`
`29`	`30`	`"DataFrameExpectationsSuiteFailure",`
	`31`	`+ "TagMatchMode",`
`30`	`32`	`]`