From 42a233ade81fc2af3ce0462ab24f189d969756bd Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 13:03:24 +0100 Subject: [PATCH 1/8] feat: restructure codebase with core/ module and explicit imports --- dataframe_expectations/__init__.py | 17 +-- dataframe_expectations/core/__init__.py | 3 + .../aggregation_expectation.py | 4 +- .../column_expectation.py | 4 +- dataframe_expectations/core/expectation.py | 110 ++++++++++++++++++ dataframe_expectations/core/types.py | 57 +++++++++ .../{expectations => core}/utils.py | 2 +- .../expectations/__init__.py | 2 +- .../expectations/aggregation/__init__.py | 3 + .../any_value.py} | 8 +- .../numerical.py} | 8 +- .../unique.py | 8 +- .../aggregation_expectations/__init__.py | 0 .../expectations/column/__init__.py | 3 + .../any_value.py} | 6 +- .../numerical.py} | 6 +- .../string.py} | 6 +- .../column_expectations/__init__.py | 0 .../expectation_registry.py => registry.py} | 6 +- dataframe_expectations/result_message.py | 2 +- .../{expectations_suite.py => suite.py} | 6 +- docs/source/_ext/expectations_autodoc.py | 4 +- docs/source/adding_expectations.rst | 16 +-- docs/source/getting_started.rst | 6 +- scripts/generate_suite_stubs.py | 4 +- scripts/sanity_checks.py | 2 +- .../test_column_expectations.py | 4 +- .../test_expectation_registry.py | 2 +- .../test_expectations.py | 12 +- .../expectations_helper_classes/test_utils.py | 2 +- ...t_expect_distinct_column_values_between.py | 6 +- ...st_expect_distinct_column_values_equals.py | 6 +- ...ect_distinct_column_values_greater_than.py | 6 +- ...expect_distinct_column_values_less_than.py | 6 +- .../test_expect_max_null_count.py | 6 +- .../test_expect_max_null_percentage.py | 6 +- .../test_expect_max_rows.py | 6 +- .../test_expect_min_rows.py | 6 +- .../test_expect_unique_rows.py | 6 +- .../test_expect_column_max_between.py | 6 +- .../test_expect_column_mean_between.py | 6 +- .../test_expect_column_median_between.py | 6 +- .../test_expect_column_min_between.py | 6 +- .../test_expect_column_quantile_between.py | 6 +- .../test_expect_value_equals.py | 6 +- .../test_expect_value_in.py | 6 +- .../test_expect_value_not_equals.py | 6 +- .../test_expect_value_not_in.py | 6 +- .../test_expect_value_not_null.py | 6 +- .../test_expect_value_null.py | 6 +- .../test_expect_value_between.py | 6 +- .../test_expect_value_greater_than.py | 6 +- .../test_expect_value_less_than.py | 6 +- .../test_expect_string_contains.py | 6 +- .../test_expect_string_ends_with.py | 6 +- .../test_expect_string_length_between.py | 6 +- .../test_expect_string_length_equals.py | 6 +- .../test_expect_string_length_greater_than.py | 6 +- .../test_expect_string_length_less_than.py | 6 +- .../test_expect_string_not_contains.py | 6 +- .../test_expect_string_starts_with.py | 6 +- .../template_test_expectation.py | 2 +- tests/test_expectations_suite.py | 6 +- tests/test_result_message.py | 2 +- 64 files changed, 337 insertions(+), 172 deletions(-) create mode 100644 dataframe_expectations/core/__init__.py rename dataframe_expectations/{expectations => core}/aggregation_expectation.py (97%) rename dataframe_expectations/{expectations => core}/column_expectation.py (96%) create mode 100644 dataframe_expectations/core/expectation.py create mode 100644 dataframe_expectations/core/types.py rename dataframe_expectations/{expectations => core}/utils.py (97%) create mode 100644 dataframe_expectations/expectations/aggregation/__init__.py rename dataframe_expectations/expectations/{aggregation_expectations/any_value_expectations.py => aggregation/any_value.py} (98%) rename dataframe_expectations/expectations/{aggregation_expectations/numerical_expectations.py => aggregation/numerical.py} (98%) rename dataframe_expectations/expectations/{aggregation_expectations => aggregation}/unique.py (99%) delete mode 100644 dataframe_expectations/expectations/aggregation_expectations/__init__.py create mode 100644 dataframe_expectations/expectations/column/__init__.py rename dataframe_expectations/expectations/{column_expectations/any_value_expectations.py => column/any_value.py} (96%) rename dataframe_expectations/expectations/{column_expectations/numerical_expectations.py => column/numerical.py} (94%) rename dataframe_expectations/expectations/{column_expectations/string_expectations.py => column/string.py} (97%) delete mode 100644 dataframe_expectations/expectations/column_expectations/__init__.py rename dataframe_expectations/{expectations/expectation_registry.py => registry.py} (98%) rename dataframe_expectations/{expectations_suite.py => suite.py} (98%) diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py index a7c7e58..295fa6b 100644 --- a/dataframe_expectations/__init__.py +++ b/dataframe_expectations/__init__.py @@ -1,16 +1,5 @@ -from enum import Enum -from typing import Union +"""DataFrame Expectations - A validation library for pandas and PySpark DataFrames.""" -from pandas import DataFrame as PandasDataFrame -from pyspark.sql import DataFrame as PySparkDataFrame +__version__ = "0.3.0" -DataFrameLike = Union[PySparkDataFrame, PandasDataFrame] - - -class DataFrameType(str, Enum): - """ - Enum for DataFrame types. - """ - - PANDAS = "pandas" - PYSPARK = "pyspark" +__all__ = [] diff --git a/dataframe_expectations/core/__init__.py b/dataframe_expectations/core/__init__.py new file mode 100644 index 0000000..436fc0d --- /dev/null +++ b/dataframe_expectations/core/__init__.py @@ -0,0 +1,3 @@ +"""Core base classes and interfaces for DataFrame expectations.""" + +__all__ = [] diff --git a/dataframe_expectations/expectations/aggregation_expectation.py b/dataframe_expectations/core/aggregation_expectation.py similarity index 97% rename from dataframe_expectations/expectations/aggregation_expectation.py rename to dataframe_expectations/core/aggregation_expectation.py index b6a1b2e..351a8e4 100644 --- a/dataframe_expectations/expectations/aggregation_expectation.py +++ b/dataframe_expectations/core/aggregation_expectation.py @@ -1,8 +1,8 @@ from abc import abstractmethod from typing import List, Union -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.expectation import DataFrameExpectation from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, diff --git a/dataframe_expectations/expectations/column_expectation.py b/dataframe_expectations/core/column_expectation.py similarity index 96% rename from dataframe_expectations/expectations/column_expectation.py rename to dataframe_expectations/core/column_expectation.py index af63b08..392f82a 100644 --- a/dataframe_expectations/expectations/column_expectation.py +++ b/dataframe_expectations/core/column_expectation.py @@ -1,7 +1,7 @@ from typing import Callable -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.expectation import DataFrameExpectation from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, diff --git a/dataframe_expectations/core/expectation.py b/dataframe_expectations/core/expectation.py new file mode 100644 index 0000000..c677b32 --- /dev/null +++ b/dataframe_expectations/core/expectation.py @@ -0,0 +1,110 @@ +from abc import ABC, abstractmethod +from typing import cast + +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame + +# Import the connect DataFrame type for Spark Connect +try: + from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame +except ImportError: + # Fallback for older PySpark versions that don't have connect + PySparkConnectDataFrame = None # type: ignore[misc,assignment] + +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.result_message import ( + DataFrameExpectationResultMessage, +) + + +class DataFrameExpectation(ABC): + """ + Base class for DataFrame expectations. + """ + + def get_expectation_name(self) -> str: + """ + Returns the class name as the expectation name. + """ + return type(self).__name__ + + @abstractmethod + def get_description(self) -> str: + """ + Returns a description of the expectation. + """ + raise NotImplementedError( + f"description method must be implemented for {self.__class__.__name__}" + ) + + def __str__(self): + """ + Returns a string representation of the expectation. + """ + return f"{self.get_expectation_name()} ({self.get_description()})" + + @classmethod + def infer_data_frame_type(cls, data_frame: DataFrameLike) -> DataFrameType: + """ + Infer the DataFrame type based on the provided DataFrame. + """ + if isinstance(data_frame, PandasDataFrame): + return DataFrameType.PANDAS + elif isinstance(data_frame, PySparkDataFrame): + return DataFrameType.PYSPARK + elif PySparkConnectDataFrame is not None and isinstance( + data_frame, PySparkConnectDataFrame + ): + return DataFrameType.PYSPARK + else: + raise ValueError(f"Unsupported DataFrame type: {type(data_frame)}") + + def validate(self, data_frame: DataFrameLike, **kwargs): + """ + Validate the DataFrame against the expectation. + """ + data_frame_type = self.infer_data_frame_type(data_frame) + + if data_frame_type == DataFrameType.PANDAS: + return self.validate_pandas(data_frame=data_frame, **kwargs) + elif data_frame_type == DataFrameType.PYSPARK: + return self.validate_pyspark(data_frame=data_frame, **kwargs) + else: + raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") + + @abstractmethod + def validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a pandas DataFrame against the expectation. + """ + raise NotImplementedError( + f"validate_pandas method must be implemented for {self.__class__.__name__}" + ) + + @abstractmethod + def validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a PySpark DataFrame against the expectation. + """ + raise NotImplementedError( + f"validate_pyspark method must be implemented for {self.__class__.__name__}" + ) + + @classmethod + def num_data_frame_rows(cls, data_frame: DataFrameLike) -> int: + """ + Count the number of rows in the DataFrame. + """ + data_frame_type = cls.infer_data_frame_type(data_frame) + if data_frame_type == DataFrameType.PANDAS: + # Cast to PandasDataFrame since we know it's a Pandas DataFrame at this point + return len(cast(PandasDataFrame, data_frame)) + elif data_frame_type == DataFrameType.PYSPARK: + # Cast to PySparkDataFrame since we know it's a PySpark DataFrame at this point + return cast(PySparkDataFrame, data_frame).count() + else: + raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") diff --git a/dataframe_expectations/core/types.py b/dataframe_expectations/core/types.py new file mode 100644 index 0000000..e681994 --- /dev/null +++ b/dataframe_expectations/core/types.py @@ -0,0 +1,57 @@ +"""Core types, enums, and data models for dataframe-expectations.""" + +from enum import Enum +from typing import Any, Dict, Union + +from pandas import DataFrame as PandasDataFrame +from pydantic import BaseModel, ConfigDict, Field +from pyspark.sql import DataFrame as PySparkDataFrame + +# Type aliases +DataFrameLike = Union[PySparkDataFrame, PandasDataFrame] + + +class DataFrameType(str, Enum): + """Enum for DataFrame types.""" + + PANDAS = "pandas" + PYSPARK = "pyspark" + + +class ExpectationCategory(str, Enum): + """Categories for expectations.""" + + COLUMN_EXPECTATIONS = "Column Expectations" + COLUMN_AGGREGATION_EXPECTATIONS = "Column Aggregation Expectations" + DATAFRAME_AGGREGATION_EXPECTATIONS = "DataFrame Aggregation Expectations" + + +class ExpectationSubcategory(str, Enum): + """Subcategory of expectations.""" + + ANY_VALUE = "Any Value" + NUMERICAL = "Numerical" + STRING = "String" + UNIQUE = "Unique" + + +class ExpectationMetadata(BaseModel): + """Metadata for a registered expectation.""" + + suite_method_name: str = Field( + ..., description="Method name in ExpectationsSuite (e.g., 'expect_value_greater_than')" + ) + pydoc: str = Field(..., description="Human-readable description of the expectation") + category: ExpectationCategory = Field(..., description="Category (e.g., 'Column Expectations')") + subcategory: ExpectationSubcategory = Field( + ..., description="Subcategory (e.g., 'Numerical', 'String')" + ) + params_doc: Dict[str, str] = Field(..., description="Documentation for each parameter") + params: list = Field(default_factory=list, description="List of required parameter names") + param_types: Dict[str, Any] = Field( + default_factory=dict, description="Type hints for parameters" + ) + factory_func_name: str = Field(..., description="Name of the factory function") + expectation_name: str = Field(..., description="Name of the expectation class") + + model_config = ConfigDict(frozen=True) # Make model immutable diff --git a/dataframe_expectations/expectations/utils.py b/dataframe_expectations/core/utils.py similarity index 97% rename from dataframe_expectations/expectations/utils.py rename to dataframe_expectations/core/utils.py index 443d5c4..147f32a 100644 --- a/dataframe_expectations/expectations/utils.py +++ b/dataframe_expectations/core/utils.py @@ -1,7 +1,7 @@ from functools import wraps from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, get_args -from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.core.expectation import DataFrameExpectation def requires_params( diff --git a/dataframe_expectations/expectations/__init__.py b/dataframe_expectations/expectations/__init__.py index 09233c2..c677b32 100644 --- a/dataframe_expectations/expectations/__init__.py +++ b/dataframe_expectations/expectations/__init__.py @@ -11,7 +11,7 @@ # Fallback for older PySpark versions that don't have connect PySparkConnectDataFrame = None # type: ignore[misc,assignment] -from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.core.types import DataFrameLike, DataFrameType from dataframe_expectations.result_message import ( DataFrameExpectationResultMessage, ) diff --git a/dataframe_expectations/expectations/aggregation/__init__.py b/dataframe_expectations/expectations/aggregation/__init__.py new file mode 100644 index 0000000..d71d110 --- /dev/null +++ b/dataframe_expectations/expectations/aggregation/__init__.py @@ -0,0 +1,3 @@ +"""Aggregation expectations.""" + +__all__ = [] diff --git a/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py b/dataframe_expectations/expectations/aggregation/any_value.py similarity index 98% rename from dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py rename to dataframe_expectations/expectations/aggregation/any_value.py index 740231f..88951ed 100644 --- a/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py +++ b/dataframe_expectations/expectations/aggregation/any_value.py @@ -4,16 +4,16 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations.aggregation_expectation import ( +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, diff --git a/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py b/dataframe_expectations/expectations/aggregation/numerical.py similarity index 98% rename from dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py rename to dataframe_expectations/expectations/aggregation/numerical.py index 50489c5..2da6abd 100644 --- a/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py +++ b/dataframe_expectations/expectations/aggregation/numerical.py @@ -5,16 +5,16 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations.aggregation_expectation import ( +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, diff --git a/dataframe_expectations/expectations/aggregation_expectations/unique.py b/dataframe_expectations/expectations/aggregation/unique.py similarity index 99% rename from dataframe_expectations/expectations/aggregation_expectations/unique.py rename to dataframe_expectations/expectations/aggregation/unique.py index ce3ac4a..f046ba2 100644 --- a/dataframe_expectations/expectations/aggregation_expectations/unique.py +++ b/dataframe_expectations/expectations/aggregation/unique.py @@ -5,16 +5,16 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations.aggregation_expectation import ( +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, diff --git a/dataframe_expectations/expectations/aggregation_expectations/__init__.py b/dataframe_expectations/expectations/aggregation_expectations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/dataframe_expectations/expectations/column/__init__.py b/dataframe_expectations/expectations/column/__init__.py new file mode 100644 index 0000000..1b50892 --- /dev/null +++ b/dataframe_expectations/expectations/column/__init__.py @@ -0,0 +1,3 @@ +"""Column expectations.""" + +__all__ = [] diff --git a/dataframe_expectations/expectations/column_expectations/any_value_expectations.py b/dataframe_expectations/expectations/column/any_value.py similarity index 96% rename from dataframe_expectations/expectations/column_expectations/any_value_expectations.py rename to dataframe_expectations/expectations/column/any_value.py index 974bfb8..92f5122 100644 --- a/dataframe_expectations/expectations/column_expectations/any_value_expectations.py +++ b/dataframe_expectations/expectations/column/any_value.py @@ -1,14 +1,14 @@ from pyspark.sql import functions as F -from dataframe_expectations.expectations.column_expectation import ( +from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params @register_expectation( diff --git a/dataframe_expectations/expectations/column_expectations/numerical_expectations.py b/dataframe_expectations/expectations/column/numerical.py similarity index 94% rename from dataframe_expectations/expectations/column_expectations/numerical_expectations.py rename to dataframe_expectations/expectations/column/numerical.py index b637105..58f55ff 100644 --- a/dataframe_expectations/expectations/column_expectations/numerical_expectations.py +++ b/dataframe_expectations/expectations/column/numerical.py @@ -1,14 +1,14 @@ from pyspark.sql import functions as F -from dataframe_expectations.expectations.column_expectation import ( +from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params @register_expectation( diff --git a/dataframe_expectations/expectations/column_expectations/string_expectations.py b/dataframe_expectations/expectations/column/string.py similarity index 97% rename from dataframe_expectations/expectations/column_expectations/string_expectations.py rename to dataframe_expectations/expectations/column/string.py index a3d53c9..7eec4c4 100644 --- a/dataframe_expectations/expectations/column_expectations/string_expectations.py +++ b/dataframe_expectations/expectations/column/string.py @@ -1,14 +1,14 @@ from pyspark.sql import functions as F -from dataframe_expectations.expectations.column_expectation import ( +from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params @register_expectation( diff --git a/dataframe_expectations/expectations/column_expectations/__init__.py b/dataframe_expectations/expectations/column_expectations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/dataframe_expectations/expectations/expectation_registry.py b/dataframe_expectations/registry.py similarity index 98% rename from dataframe_expectations/expectations/expectation_registry.py rename to dataframe_expectations/registry.py index ff1c6dc..2ca256d 100644 --- a/dataframe_expectations/expectations/expectation_registry.py +++ b/dataframe_expectations/registry.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field -from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.core.expectation import DataFrameExpectation from dataframe_expectations.logging_utils import setup_logger logger = setup_logger(__name__) @@ -133,8 +133,8 @@ def _convert_to_suite_method(cls, expectation_name: str) -> str: # Remove 'Expectation' prefix name = re.sub(r"^Expectation", "", expectation_name) # Convert CamelCase to snake_case - snake = re.sub("([A-Z]+)([A-Z][a-z])", r"\1_\2", name) - snake = re.sub("([a-z\d])([A-Z])", r"\1_\2", snake) + snake = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name) + snake = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", snake) return "expect_" + snake.lower() @classmethod diff --git a/dataframe_expectations/result_message.py b/dataframe_expectations/result_message.py index 34e5f56..25d7a80 100644 --- a/dataframe_expectations/result_message.py +++ b/dataframe_expectations/result_message.py @@ -3,7 +3,7 @@ from tabulate import tabulate # type: ignore -from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.core.types import DataFrameLike, DataFrameType class DataFrameExpectationResultMessage(ABC): diff --git a/dataframe_expectations/expectations_suite.py b/dataframe_expectations/suite.py similarity index 98% rename from dataframe_expectations/expectations_suite.py rename to dataframe_expectations/suite.py index 6c26cdd..67f9d3b 100644 --- a/dataframe_expectations/expectations_suite.py +++ b/dataframe_expectations/suite.py @@ -2,7 +2,7 @@ from typing import Callable, List, Optional, cast from dataframe_expectations.expectations import DataFrameLike -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) from dataframe_expectations.logging_utils import setup_logger @@ -84,8 +84,8 @@ def run( :param data_frame: The DataFrame to validate. """ - from dataframe_expectations import DataFrameType - from dataframe_expectations.expectations import DataFrameExpectation + from dataframe_expectations.core.types import DataFrameType + from dataframe_expectations.core.expectation import DataFrameExpectation successes = [] failures = [] diff --git a/docs/source/_ext/expectations_autodoc.py b/docs/source/_ext/expectations_autodoc.py index e8306e5..556f9a4 100644 --- a/docs/source/_ext/expectations_autodoc.py +++ b/docs/source/_ext/expectations_autodoc.py @@ -13,8 +13,8 @@ from sphinx.application import Sphinx from sphinx.util.docutils import SphinxDirective -from dataframe_expectations.expectations.expectation_registry import DataFrameExpectationRegistry -from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite +from dataframe_expectations.registry import DataFrameExpectationRegistry +from dataframe_expectations.suite import DataFrameExpectationsSuite def parse_metadata_from_docstring(docstring: str) -> Tuple[str, str]: diff --git a/docs/source/adding_expectations.rst b/docs/source/adding_expectations.rst index 9aa6304..74d86b5 100644 --- a/docs/source/adding_expectations.rst +++ b/docs/source/adding_expectations.rst @@ -38,13 +38,13 @@ Once you have decided where the expectation needs to be added, you can define it .. code-block:: python - from dataframe_expectations.expectations.column_expectation import DataFrameColumnExpectation - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.core.column_expectation import DataFrameColumnExpectation + from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) - from dataframe_expectations.expectations.utils import requires_params + from dataframe_expectations.core.utils import requires_params from pyspark.sql import functions as F @@ -109,15 +109,15 @@ Here's an example of how to implement an aggregation-based expectation: .. code-block:: python from dataframe_expectations import DataFrameLike, DataFrameType - from dataframe_expectations.expectations.aggregation_expectation import ( + from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.registry import ( ExpectationCategory, ExpectationSubcategory, register_expectation, ) - from dataframe_expectations.expectations.utils import requires_params + from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, @@ -353,7 +353,7 @@ To help you get started, here's a template you can customize to fit your specifi from typing import Callable from dataframe_expectations import DataFrameLike, DataFrameType - from dataframe_expectations.expectations import DataFrameExpectation + from dataframe_expectations.core.expectation import DataFrameExpectation from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, @@ -439,7 +439,7 @@ To ensure your expectations work as expected (pun intended), make sure to add un import pandas as pd from dataframe_expectations import DataFrameType - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) from dataframe_expectations.result_message import ( diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 6d8fa2d..783a0bc 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -32,7 +32,7 @@ Basic Usage with Pandas .. code-block:: python import pandas as pd - from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + from dataframe_expectations.suite import DataFrameExpectationsSuite # Build a suite with expectations suite = ( @@ -61,7 +61,7 @@ PySpark Example .. code-block:: python - from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + from dataframe_expectations.suite import DataFrameExpectationsSuite from pyspark.sql import SparkSession # Initialize Spark session @@ -97,7 +97,7 @@ Decorator Pattern for Automatic Validation .. code-block:: python - from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + from dataframe_expectations.suite import DataFrameExpectationsSuite from pyspark.sql import SparkSession # Initialize Spark session diff --git a/scripts/generate_suite_stubs.py b/scripts/generate_suite_stubs.py index f0e8c52..6c57fb6 100755 --- a/scripts/generate_suite_stubs.py +++ b/scripts/generate_suite_stubs.py @@ -132,7 +132,7 @@ def generate_pyi_file() -> str: The complete .pyi file content as a string """ # Import here to avoid issues if not in the right directory - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) @@ -212,7 +212,7 @@ def update_pyi_file(dry_run: bool = False) -> bool: f.write(new_content) # Count the methods - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) method_count = len(DataFrameExpectationRegistry.get_suite_method_mapping()) diff --git a/scripts/sanity_checks.py b/scripts/sanity_checks.py index 44632ad..5f2bee8 100644 --- a/scripts/sanity_checks.py +++ b/scripts/sanity_checks.py @@ -115,7 +115,7 @@ def _extract_expectation_name(self, decorator) -> Optional[str]: def _discover_suite_methods(self): """Find all expect_* methods available via the registry.""" try: - from dataframe_expectations.expectations.expectation_registry import ( + from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) diff --git a/tests/expectations_helper_classes/test_column_expectations.py b/tests/expectations_helper_classes/test_column_expectations.py index 7827b7c..28d4929 100644 --- a/tests/expectations_helper_classes/test_column_expectations.py +++ b/tests/expectations_helper_classes/test_column_expectations.py @@ -3,8 +3,8 @@ import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.column_expectation import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) diff --git a/tests/expectations_helper_classes/test_expectation_registry.py b/tests/expectations_helper_classes/test_expectation_registry.py index dcb1e8f..d7943b6 100644 --- a/tests/expectations_helper_classes/test_expectation_registry.py +++ b/tests/expectations_helper_classes/test_expectation_registry.py @@ -1,6 +1,6 @@ import pytest -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ExpectationCategory, ExpectationSubcategory, diff --git a/tests/expectations_helper_classes/test_expectations.py b/tests/expectations_helper_classes/test_expectations.py index 7826956..b73d528 100644 --- a/tests/expectations_helper_classes/test_expectations.py +++ b/tests/expectations_helper_classes/test_expectations.py @@ -3,8 +3,8 @@ import pandas as pd -from dataframe_expectations import DataFrameLike, DataFrameType -from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.core.types import DataFrameLike, DataFrameType +from dataframe_expectations.core.expectation import DataFrameExpectation class MyTestExpectation(DataFrameExpectation): @@ -229,7 +229,7 @@ def test_infer_data_frame_type_with_connect_dataframe_available(): # Patch the PySparkConnectDataFrame import to be our mock class with patch( - "dataframe_expectations.expectations.PySparkConnectDataFrame", + "dataframe_expectations.core.expectation.PySparkConnectDataFrame", MockConnectDataFrame, ): # Create an instance of our mock Connect DataFrame @@ -242,7 +242,7 @@ def test_infer_data_frame_type_with_connect_dataframe_available(): ) -@patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None) +@patch("dataframe_expectations.core.expectation.PySparkConnectDataFrame", None) def test_infer_data_frame_type_without_connect_support(spark): """ Test that the method works correctly when PySpark Connect is not available. @@ -270,7 +270,7 @@ def test_infer_data_frame_type_connect_import_behavior(spark): expectation = MyTestExpectation() # Test case 1: When PySparkConnectDataFrame is None (import failed) - with patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None): + with patch("dataframe_expectations.core.expectation.PySparkConnectDataFrame", None): # Should still work with regular DataFrames pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) result_type = expectation.infer_data_frame_type(pandas_df) @@ -282,7 +282,7 @@ def test_infer_data_frame_type_connect_import_behavior(spark): # Test case 2: When PySparkConnectDataFrame is available (mocked) with patch( - "dataframe_expectations.expectations.PySparkConnectDataFrame", + "dataframe_expectations.core.expectation.PySparkConnectDataFrame", MockConnectDataFrame, ): # Regular DataFrames should still work diff --git a/tests/expectations_helper_classes/test_utils.py b/tests/expectations_helper_classes/test_utils.py index f435c4d..0b4a1a9 100644 --- a/tests/expectations_helper_classes/test_utils.py +++ b/tests/expectations_helper_classes/test_utils.py @@ -1,7 +1,7 @@ from typing import Union import pytest -from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.core.utils import requires_params def test_requires_params_success(): diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py index eb21f6e..e7ab84b 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py index c6a200c..0b7b6d2 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py index 02207e6..f73a305 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py index 3c747f5..bc5b500 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py index 358f68f..04240f9 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py @@ -2,11 +2,11 @@ import numpy as np import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py index ca5ad23..d0180d9 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py @@ -2,11 +2,11 @@ import pandas as pd import pytest -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py index eb76591..5bedf79 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py index cd6bbc7..2bb453d 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py index 5486e4f..5f2b7b2 100644 --- a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py @@ -2,11 +2,11 @@ import pandas as pd from pyspark.sql.types import IntegerType, StructField, StructType -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py index 6d02778..c3e79db 100644 --- a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py index e864d08..cbca83c 100644 --- a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py index 43cb53f..5d0a9f2 100644 --- a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py index 40a9ade..d909a67 100644 --- a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py index 4c4fd40..70fe489 100644 --- a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py @@ -2,11 +2,11 @@ import numpy as np import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py index df28187..2df11ba 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py index af7a54e..3166aa4 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py index a697078..ea5c4f5 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py index 5bcdaf5..7bf79a9 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py index 45fd2dd..0362651 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py @@ -2,11 +2,11 @@ import numpy as np import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py index b2d4ce2..f2ca82a 100644 --- a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py @@ -2,11 +2,11 @@ import numpy as np import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py index bf6d811..9c492dc 100644 --- a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py index b1318d9..b0f2d90 100644 --- a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py index a2d8fb2..018c149 100644 --- a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py index 24bdd27..df06789 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py index b53be88..24796ff 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py index fc3fb39..6b03781 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py index 13613b8..93e4832 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py index 858ca83..1714b33 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py index b6bd3cb..f0bb32b 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py index 043242c..b8bf99f 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py index 46ffb21..0d02563 100644 --- a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py @@ -1,11 +1,11 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) diff --git a/tests/expectations_implemented/template_test_expectation.py b/tests/expectations_implemented/template_test_expectation.py index 0e0da24..a3325e8 100644 --- a/tests/expectations_implemented/template_test_expectation.py +++ b/tests/expectations_implemented/template_test_expectation.py @@ -1,4 +1,4 @@ -from dataframe_expectations.expectations.expectation_registry import ( +from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) diff --git a/tests/test_expectations_suite.py b/tests/test_expectations_suite.py index 146c8aa..6d0a148 100644 --- a/tests/test_expectations_suite.py +++ b/tests/test_expectations_suite.py @@ -1,8 +1,8 @@ import pytest import pandas as pd -from dataframe_expectations import DataFrameType -from dataframe_expectations.expectations_suite import ( +from dataframe_expectations.core.types import DataFrameType +from dataframe_expectations.suite import ( DataFrameExpectationsSuite, DataFrameExpectationsSuiteFailure, ) @@ -130,7 +130,7 @@ def unpersist(self): runner = suite.build() with patch( - "dataframe_expectations.expectations.PySparkConnectDataFrame", + "dataframe_expectations.core.expectation.PySparkConnectDataFrame", MockConnectDataFrame, ): # Create mock expectation that can handle Connect DataFrame diff --git a/tests/test_result_message.py b/tests/test_result_message.py index def309f..1d9c2d9 100644 --- a/tests/test_result_message.py +++ b/tests/test_result_message.py @@ -2,7 +2,7 @@ import pandas as pd from tabulate import tabulate # type: ignore -from dataframe_expectations import DataFrameType +from dataframe_expectations.core.types import DataFrameType from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, DataFrameExpectationResultMessage, From d47eb8be2eef84d820653f5ef07a35e44695c5a3 Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 13:09:02 +0100 Subject: [PATCH 2/8] fix: deleted duplicate DataFrameExpectation codefrom expectations package --- .../expectations/__init__.py | 111 +----------------- dataframe_expectations/suite.py | 2 +- 2 files changed, 2 insertions(+), 111 deletions(-) diff --git a/dataframe_expectations/expectations/__init__.py b/dataframe_expectations/expectations/__init__.py index c677b32..81836d8 100644 --- a/dataframe_expectations/expectations/__init__.py +++ b/dataframe_expectations/expectations/__init__.py @@ -1,110 +1 @@ -from abc import ABC, abstractmethod -from typing import cast - -from pandas import DataFrame as PandasDataFrame -from pyspark.sql import DataFrame as PySparkDataFrame - -# Import the connect DataFrame type for Spark Connect -try: - from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame -except ImportError: - # Fallback for older PySpark versions that don't have connect - PySparkConnectDataFrame = None # type: ignore[misc,assignment] - -from dataframe_expectations.core.types import DataFrameLike, DataFrameType -from dataframe_expectations.result_message import ( - DataFrameExpectationResultMessage, -) - - -class DataFrameExpectation(ABC): - """ - Base class for DataFrame expectations. - """ - - def get_expectation_name(self) -> str: - """ - Returns the class name as the expectation name. - """ - return type(self).__name__ - - @abstractmethod - def get_description(self) -> str: - """ - Returns a description of the expectation. - """ - raise NotImplementedError( - f"description method must be implemented for {self.__class__.__name__}" - ) - - def __str__(self): - """ - Returns a string representation of the expectation. - """ - return f"{self.get_expectation_name()} ({self.get_description()})" - - @classmethod - def infer_data_frame_type(cls, data_frame: DataFrameLike) -> DataFrameType: - """ - Infer the DataFrame type based on the provided DataFrame. - """ - if isinstance(data_frame, PandasDataFrame): - return DataFrameType.PANDAS - elif isinstance(data_frame, PySparkDataFrame): - return DataFrameType.PYSPARK - elif PySparkConnectDataFrame is not None and isinstance( - data_frame, PySparkConnectDataFrame - ): - return DataFrameType.PYSPARK - else: - raise ValueError(f"Unsupported DataFrame type: {type(data_frame)}") - - def validate(self, data_frame: DataFrameLike, **kwargs): - """ - Validate the DataFrame against the expectation. - """ - data_frame_type = self.infer_data_frame_type(data_frame) - - if data_frame_type == DataFrameType.PANDAS: - return self.validate_pandas(data_frame=data_frame, **kwargs) - elif data_frame_type == DataFrameType.PYSPARK: - return self.validate_pyspark(data_frame=data_frame, **kwargs) - else: - raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") - - @abstractmethod - def validate_pandas( - self, data_frame: DataFrameLike, **kwargs - ) -> DataFrameExpectationResultMessage: - """ - Validate a pandas DataFrame against the expectation. - """ - raise NotImplementedError( - f"validate_pandas method must be implemented for {self.__class__.__name__}" - ) - - @abstractmethod - def validate_pyspark( - self, data_frame: DataFrameLike, **kwargs - ) -> DataFrameExpectationResultMessage: - """ - Validate a PySpark DataFrame against the expectation. - """ - raise NotImplementedError( - f"validate_pyspark method must be implemented for {self.__class__.__name__}" - ) - - @classmethod - def num_data_frame_rows(cls, data_frame: DataFrameLike) -> int: - """ - Count the number of rows in the DataFrame. - """ - data_frame_type = cls.infer_data_frame_type(data_frame) - if data_frame_type == DataFrameType.PANDAS: - # Cast to PandasDataFrame since we know it's a Pandas DataFrame at this point - return len(cast(PandasDataFrame, data_frame)) - elif data_frame_type == DataFrameType.PYSPARK: - # Cast to PySparkDataFrame since we know it's a PySpark DataFrame at this point - return cast(PySparkDataFrame, data_frame).count() - else: - raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") +"""Expectations package - contains all expectation implementations.""" diff --git a/dataframe_expectations/suite.py b/dataframe_expectations/suite.py index 67f9d3b..70ae4ec 100644 --- a/dataframe_expectations/suite.py +++ b/dataframe_expectations/suite.py @@ -1,7 +1,7 @@ from functools import wraps from typing import Callable, List, Optional, cast -from dataframe_expectations.expectations import DataFrameLike +from dataframe_expectations.core.types import DataFrameLike from dataframe_expectations.registry import ( DataFrameExpectationRegistry, ) From 82bec0ce13be1e2a1bc16fb77d0aaf91edb5692f Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 13:38:50 +0100 Subject: [PATCH 3/8] fix: deleted duplicate dataclass and enums from registry --- dataframe_expectations/registry.py | 47 ++++-------------------------- 1 file changed, 5 insertions(+), 42 deletions(-) diff --git a/dataframe_expectations/registry.py b/dataframe_expectations/registry.py index 2ca256d..8644f26 100644 --- a/dataframe_expectations/registry.py +++ b/dataframe_expectations/registry.py @@ -1,54 +1,17 @@ import re -from enum import Enum from typing import Any, Callable, Dict, Optional -from pydantic import BaseModel, ConfigDict, Field - from dataframe_expectations.core.expectation import DataFrameExpectation +from dataframe_expectations.core.types import ( + ExpectationCategory, + ExpectationMetadata, + ExpectationSubcategory, +) from dataframe_expectations.logging_utils import setup_logger logger = setup_logger(__name__) -class ExpectationCategory(str, Enum): - """Categories for expectations.""" - - COLUMN_EXPECTATIONS = "Column Expectations" - COLUMN_AGGREGATION_EXPECTATIONS = "Column Aggregation Expectations" - DATAFRAME_AGGREGATION_EXPECTATIONS = "DataFrame Aggregation Expectations" - - -class ExpectationSubcategory(str, Enum): - """Subcategory of expectations.""" - - ANY_VALUE = "Any Value" - NUMERICAL = "Numerical" - STRING = "String" - UNIQUE = "Unique" - - -class ExpectationMetadata(BaseModel): - """Metadata for a registered expectation.""" - - suite_method_name: str = Field( - ..., description="Method name in ExpectationsSuite (e.g., 'expect_value_greater_than')" - ) - pydoc: str = Field(..., description="Human-readable description of the expectation") - category: ExpectationCategory = Field(..., description="Category (e.g., 'Column Expectations')") - subcategory: ExpectationSubcategory = Field( - ..., description="Subcategory (e.g., 'Numerical', 'String')" - ) - params_doc: Dict[str, str] = Field(..., description="Documentation for each parameter") - params: list = Field(default_factory=list, description="List of required parameter names") - param_types: Dict[str, Any] = Field( - default_factory=dict, description="Type hints for parameters" - ) - factory_func_name: str = Field(..., description="Name of the factory function") - expectation_name: str = Field(..., description="Name of the expectation class") - - model_config = ConfigDict(frozen=True) # Make model immutable - - class DataFrameExpectationRegistry: """Registry for dataframe expectations.""" From fa847643a310a27e615290567d3e11fad4344977 Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 13:44:39 +0100 Subject: [PATCH 4/8] fix: import enums from types --- dataframe_expectations/expectations/aggregation/any_value.py | 4 ++-- dataframe_expectations/expectations/aggregation/numerical.py | 4 ++-- dataframe_expectations/expectations/aggregation/unique.py | 4 ++-- dataframe_expectations/expectations/column/any_value.py | 4 ++-- dataframe_expectations/expectations/column/numerical.py | 4 ++-- dataframe_expectations/expectations/column/string.py | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dataframe_expectations/expectations/aggregation/any_value.py b/dataframe_expectations/expectations/aggregation/any_value.py index 88951ed..17ca456 100644 --- a/dataframe_expectations/expectations/aggregation/any_value.py +++ b/dataframe_expectations/expectations/aggregation/any_value.py @@ -8,11 +8,11 @@ from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, diff --git a/dataframe_expectations/expectations/aggregation/numerical.py b/dataframe_expectations/expectations/aggregation/numerical.py index 2da6abd..5a382d0 100644 --- a/dataframe_expectations/expectations/aggregation/numerical.py +++ b/dataframe_expectations/expectations/aggregation/numerical.py @@ -9,11 +9,11 @@ from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, diff --git a/dataframe_expectations/expectations/aggregation/unique.py b/dataframe_expectations/expectations/aggregation/unique.py index f046ba2..8ca3289 100644 --- a/dataframe_expectations/expectations/aggregation/unique.py +++ b/dataframe_expectations/expectations/aggregation/unique.py @@ -9,11 +9,11 @@ from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params from dataframe_expectations.result_message import ( DataFrameExpectationFailureMessage, diff --git a/dataframe_expectations/expectations/column/any_value.py b/dataframe_expectations/expectations/column/any_value.py index 92f5122..b60c877 100644 --- a/dataframe_expectations/expectations/column/any_value.py +++ b/dataframe_expectations/expectations/column/any_value.py @@ -3,11 +3,11 @@ from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params diff --git a/dataframe_expectations/expectations/column/numerical.py b/dataframe_expectations/expectations/column/numerical.py index 58f55ff..4899949 100644 --- a/dataframe_expectations/expectations/column/numerical.py +++ b/dataframe_expectations/expectations/column/numerical.py @@ -3,11 +3,11 @@ from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params diff --git a/dataframe_expectations/expectations/column/string.py b/dataframe_expectations/expectations/column/string.py index 7eec4c4..f50a75e 100644 --- a/dataframe_expectations/expectations/column/string.py +++ b/dataframe_expectations/expectations/column/string.py @@ -3,11 +3,11 @@ from dataframe_expectations.core.column_expectation import ( DataFrameColumnExpectation, ) -from dataframe_expectations.registry import ( +from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, - register_expectation, ) +from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params From 9a76467cd63c9ba15bd4878e247aef2b631316df Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 13:48:31 +0100 Subject: [PATCH 5/8] fix: consolidate imports --- dataframe_expectations/expectations/aggregation/any_value.py | 3 ++- dataframe_expectations/expectations/aggregation/numerical.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dataframe_expectations/expectations/aggregation/any_value.py b/dataframe_expectations/expectations/aggregation/any_value.py index 17ca456..c59111e 100644 --- a/dataframe_expectations/expectations/aggregation/any_value.py +++ b/dataframe_expectations/expectations/aggregation/any_value.py @@ -4,13 +4,14 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from dataframe_expectations.core.types import DataFrameLike, DataFrameType from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, + DataFrameLike, + DataFrameType, ) from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params diff --git a/dataframe_expectations/expectations/aggregation/numerical.py b/dataframe_expectations/expectations/aggregation/numerical.py index 5a382d0..2d05a02 100644 --- a/dataframe_expectations/expectations/aggregation/numerical.py +++ b/dataframe_expectations/expectations/aggregation/numerical.py @@ -5,13 +5,14 @@ from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F -from dataframe_expectations.core.types import DataFrameLike, DataFrameType from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, + DataFrameLike, + DataFrameType, ) from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params From c18285874837952bf1a7af3f2d1f21613286c34f Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 15:32:13 +0100 Subject: [PATCH 6/8] feat: simplified registry --- README.md | 6 +- dataframe_expectations/__init__.py | 4 +- .../expectations/aggregation/unique.py | 4 +- dataframe_expectations/registry.py | 131 +++++++++++++----- dataframe_expectations/suite.py | 34 ++--- .../{expectations_suite.pyi => suite.pyi} | 0 docs/source/adding_expectations.rst | 14 +- docs/source/api_reference.rst | 12 +- docs/source/expectations.rst | 2 +- scripts/README.md | 6 +- scripts/generate_suite_stubs.py | 8 +- scripts/sanity_checks.py | 4 +- uv.lock | 2 +- 13 files changed, 142 insertions(+), 85 deletions(-) rename dataframe_expectations/{expectations_suite.pyi => suite.pyi} (100%) diff --git a/README.md b/README.md index 0f8e904..c1ae71b 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ uv run pytest tests/ --cov=dataframe_expectations **Basic usage with Pandas:** ```python -from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite +from dataframe_expectations.suite import DataFrameExpectationsSuite import pandas as pd # Build a suite with expectations @@ -82,7 +82,7 @@ runner.run(df) **PySpark example:** ```python -from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite +from dataframe_expectations.suite import DataFrameExpectationsSuite from pyspark.sql import SparkSession # Initialize Spark session @@ -116,7 +116,7 @@ runner.run(df) **Decorator pattern for automatic validation:** ```python -from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite +from dataframe_expectations.suite import DataFrameExpectationsSuite from pyspark.sql import SparkSession # Initialize Spark session diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py index 295fa6b..ab81620 100644 --- a/dataframe_expectations/__init__.py +++ b/dataframe_expectations/__init__.py @@ -1,5 +1,7 @@ """DataFrame Expectations - A validation library for pandas and PySpark DataFrames.""" -__version__ = "0.3.0" +from importlib.metadata import version + +__version__ = version("dataframe-expectations") __all__ = [] diff --git a/dataframe_expectations/expectations/aggregation/unique.py b/dataframe_expectations/expectations/aggregation/unique.py index 8ca3289..247e238 100644 --- a/dataframe_expectations/expectations/aggregation/unique.py +++ b/dataframe_expectations/expectations/aggregation/unique.py @@ -4,14 +4,14 @@ from pandas import DataFrame as PandasDataFrame from pyspark.sql import DataFrame as PySparkDataFrame from pyspark.sql import functions as F - -from dataframe_expectations.core.types import DataFrameLike, DataFrameType from dataframe_expectations.core.aggregation_expectation import ( DataFrameAggregationExpectation, ) from dataframe_expectations.core.types import ( ExpectationCategory, ExpectationSubcategory, + DataFrameLike, + DataFrameType, ) from dataframe_expectations.registry import register_expectation from dataframe_expectations.core.utils import requires_params diff --git a/dataframe_expectations/registry.py b/dataframe_expectations/registry.py index 8644f26..4581589 100644 --- a/dataframe_expectations/registry.py +++ b/dataframe_expectations/registry.py @@ -1,5 +1,5 @@ import re -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, Optional, Tuple from dataframe_expectations.core.expectation import DataFrameExpectation from dataframe_expectations.core.types import ( @@ -11,12 +11,20 @@ logger = setup_logger(__name__) +# Type alias for registry entry (factory function + metadata) +FactoryFunction = Callable[..., DataFrameExpectation] +RegistryEntry = Tuple[FactoryFunction, ExpectationMetadata] + class DataFrameExpectationRegistry: """Registry for dataframe expectations.""" - _expectations: Dict[str, Callable[..., DataFrameExpectation]] = {} - _metadata: Dict[str, ExpectationMetadata] = {} + # Primary registry: keyed by suite_method_name for O(1) suite access + _registry: Dict[str, RegistryEntry] = {} + + # Secondary index: maps expectation_name -> suite_method_name for O(1) lookups + _by_name: Dict[str, str] = {} + _loaded: bool = False @classmethod @@ -41,21 +49,31 @@ def register( :return: Decorator function. """ - def decorator(func: Callable[..., DataFrameExpectation]): + def decorator(func: FactoryFunction) -> FactoryFunction: expectation_name = name logger.debug( f"Registering expectation '{expectation_name}' with function {func.__name__}" ) - # Check if the name is already registered - if expectation_name in cls._expectations: - error_message = f"Expectation '{expectation_name}' is already registered." + suite_method = suite_method_name or cls._convert_to_suite_method(expectation_name) + + # Check for duplicate suite method name + if suite_method in cls._registry: + existing_metadata = cls._registry[suite_method][1] + error_message = ( + f"Suite method '{suite_method}' is already registered by expectation '{existing_metadata.expectation_name}'. " + f"Cannot register '{expectation_name}'." + ) logger.error(error_message) raise ValueError(error_message) - # Register factory function - cls._expectations[expectation_name] = func + # Check for duplicate expectation name + if expectation_name in cls._by_name: + existing_suite_method = cls._by_name[expectation_name] + error_message = f"Expectation '{expectation_name}' is already registered with suite method '{existing_suite_method}'." + logger.error(error_message) + raise ValueError(error_message) # Extract params from @requires_params if present extracted_params = [] @@ -64,10 +82,8 @@ def decorator(func: Callable[..., DataFrameExpectation]): extracted_params = list(func._required_params) extracted_types = getattr(func, "_param_types", {}) - # Store metadata - cls._metadata[expectation_name] = ExpectationMetadata( - suite_method_name=suite_method_name - or cls._convert_to_suite_method(expectation_name), + metadata = ExpectationMetadata( + suite_method_name=suite_method, pydoc=pydoc, category=category, subcategory=subcategory, @@ -78,6 +94,12 @@ def decorator(func: Callable[..., DataFrameExpectation]): expectation_name=expectation_name, ) + # Store in primary registry + cls._registry[suite_method] = (func, metadata) + + # Store in secondary index + cls._by_name[expectation_name] = suite_method + return func return decorator @@ -93,8 +115,9 @@ def _convert_to_suite_method(cls, expectation_name: str) -> str: ExpectationValueGreaterThan -> expect_value_greater_than ExpectationMinRows -> expect_min_rows """ - # Remove 'Expectation' prefix + name = re.sub(r"^Expectation", "", expectation_name) + # Convert CamelCase to snake_case snake = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name) snake = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", snake) @@ -141,13 +164,17 @@ def _load_all_expectations(cls): def get_expectation(cls, expectation_name: str, **kwargs) -> DataFrameExpectation: """Get an expectation instance by name. + Note: This method is kept for backward compatibility with tests. + The suite uses get_expectation_by_suite_method() for better performance. + :param expectation_name: The name of the expectation. :param kwargs: Parameters to pass to the expectation factory function. :return: An instance of DataFrameExpectation. """ - cls._ensure_loaded() # Lazy load expectations + cls._ensure_loaded() logger.debug(f"Retrieving expectation '{expectation_name}' with arguments: {kwargs}") - if expectation_name not in cls._expectations: + + if expectation_name not in cls._by_name: available = cls.list_expectations() error_message = ( f"Unknown expectation '{expectation_name}'. " @@ -155,7 +182,10 @@ def get_expectation(cls, expectation_name: str, **kwargs) -> DataFrameExpectatio ) logger.error(error_message) raise ValueError(error_message) - return cls._expectations[expectation_name](**kwargs) + + suite_method = cls._by_name[expectation_name] + factory, metadata = cls._registry[suite_method] + return factory(**kwargs) @classmethod def get_metadata(cls, expectation_name: str) -> ExpectationMetadata: @@ -166,9 +196,13 @@ def get_metadata(cls, expectation_name: str) -> ExpectationMetadata: :raises ValueError: If expectation not found. """ cls._ensure_loaded() - if expectation_name not in cls._metadata: + + if expectation_name not in cls._by_name: raise ValueError(f"No metadata found for expectation '{expectation_name}'") - return cls._metadata[expectation_name] + + suite_method = cls._by_name[expectation_name] + factory, metadata = cls._registry[suite_method] + return metadata @classmethod def get_all_metadata(cls) -> Dict[str, ExpectationMetadata]: @@ -177,7 +211,35 @@ def get_all_metadata(cls) -> Dict[str, ExpectationMetadata]: :return: Dictionary mapping expectation names to their metadata. """ cls._ensure_loaded() - return cls._metadata.copy() + return {metadata.expectation_name: metadata for _, (_, metadata) in cls._registry.items()} + + @classmethod + def get_expectation_by_suite_method( + cls, suite_method_name: str, **kwargs + ) -> DataFrameExpectation: + """Get an expectation instance by suite method name. + + :param suite_method_name: The suite method name (e.g., 'expect_value_greater_than'). + :param kwargs: Parameters to pass to the expectation factory function. + :return: An instance of DataFrameExpectation. + :raises ValueError: If suite method not found. + """ + cls._ensure_loaded() + logger.debug( + f"Retrieving expectation for suite method '{suite_method_name}' with arguments: {kwargs}" + ) + + if suite_method_name not in cls._registry: + available = list(cls._registry.keys()) + error_message = ( + f"Unknown suite method '{suite_method_name}'. " + f"Available methods: {', '.join(available[:10])}..." + ) + logger.error(error_message) + raise ValueError(error_message) + + factory, metadata = cls._registry[suite_method_name] + return factory(**kwargs) @classmethod def get_suite_method_mapping(cls) -> Dict[str, str]: @@ -187,7 +249,10 @@ def get_suite_method_mapping(cls) -> Dict[str, str]: to expectation names (e.g., 'ExpectationValueGreaterThan'). """ cls._ensure_loaded() - return {meta.suite_method_name: exp_name for exp_name, meta in cls._metadata.items()} + return { + suite_method: metadata.expectation_name + for suite_method, (_, metadata) in cls._registry.items() + } @classmethod def list_expectations(cls) -> list: @@ -195,8 +260,8 @@ def list_expectations(cls) -> list: :return: List of registered expectation names. """ - cls._ensure_loaded() # Lazy load expectations - return list(cls._expectations.keys()) + cls._ensure_loaded() + return [metadata.expectation_name for _, (_, metadata) in cls._registry.items()] @classmethod def remove_expectation(cls, expectation_name: str): @@ -205,23 +270,25 @@ def remove_expectation(cls, expectation_name: str): :param expectation_name: The name of the expectation to remove. :raises ValueError: If expectation not found. """ - cls._ensure_loaded() # Lazy load expectations + cls._ensure_loaded() logger.debug(f"Removing expectation '{expectation_name}'") - if expectation_name in cls._expectations: - del cls._expectations[expectation_name] - if expectation_name in cls._metadata: - del cls._metadata[expectation_name] - else: + + if expectation_name not in cls._by_name: error_message = f"Expectation '{expectation_name}' not found." logger.error(error_message) raise ValueError(error_message) + # Remove from both dictionaries + suite_method = cls._by_name[expectation_name] + del cls._registry[suite_method] + del cls._by_name[expectation_name] + @classmethod def clear_expectations(cls): """Clear all registered expectations.""" - logger.debug(f"Clearing {len(cls._expectations)} expectations from the registry") - cls._expectations.clear() - cls._metadata.clear() + logger.debug(f"Clearing {len(cls._registry)} expectations from the registry") + cls._registry.clear() + cls._by_name.clear() cls._loaded = False # Allow reloading diff --git a/dataframe_expectations/suite.py b/dataframe_expectations/suite.py index 70ae4ec..e80481f 100644 --- a/dataframe_expectations/suite.py +++ b/dataframe_expectations/suite.py @@ -108,7 +108,6 @@ def run( pyspark_df = cast(PySparkDataFrame, data_frame) was_already_cached = pyspark_df.is_cached - # Cache the DataFrame if it wasn't already cached if not was_already_cached: logger.debug("Caching PySpark DataFrame for expectations suite execution") pyspark_df.cache() @@ -206,7 +205,6 @@ def wrapper(*args, **kwargs): logger.info(f"Validating DataFrame returned from '{f.__name__}'") self.run(data_frame=result) - # Return the original DataFrame if validation passes return result return wrapper @@ -254,42 +252,32 @@ def __getattr__(self, name: str): if not name.startswith("expect_"): raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") - mapping = DataFrameExpectationRegistry.get_suite_method_mapping() + # Create and return the dynamic method - validation happens in _create_expectation_method + return self._create_expectation_method(name) - # Check if this method exists in the registry - if name not in mapping: - available = list(mapping.keys()) - raise AttributeError( - f"Unknown expectation method '{name}'. " - f"Available methods: {', '.join(available[:5])}..." - ) - - expectation_name = mapping[name] - - # Create and return the dynamic method - return self._create_expectation_method(expectation_name, name) - - def _create_expectation_method(self, expectation_name: str, method_name: str): + def _create_expectation_method(self, suite_method_name: str): """ Create a dynamic expectation method. - Returns a closure that captures the expectation_name and self. + Returns a closure that captures the suite_method_name and self. """ def dynamic_method(**kwargs): """Dynamically generated expectation method.""" - expectation = DataFrameExpectationRegistry.get_expectation( - expectation_name=expectation_name, **kwargs - ) + try: + expectation = DataFrameExpectationRegistry.get_expectation_by_suite_method( + suite_method_name=suite_method_name, **kwargs + ) + except ValueError as e: + raise AttributeError(str(e)) from e logger.info(f"Adding expectation: {expectation}") - # Add to internal list self.__expectations.append(expectation) return self # Set helpful name for debugging - dynamic_method.__name__ = method_name + dynamic_method.__name__ = suite_method_name return dynamic_method diff --git a/dataframe_expectations/expectations_suite.pyi b/dataframe_expectations/suite.pyi similarity index 100% rename from dataframe_expectations/expectations_suite.pyi rename to dataframe_expectations/suite.pyi diff --git a/docs/source/adding_expectations.rst b/docs/source/adding_expectations.rst index 74d86b5..9755d9f 100644 --- a/docs/source/adding_expectations.rst +++ b/docs/source/adding_expectations.rst @@ -28,12 +28,12 @@ Defining Your Expectations Most use cases that involve validating a single column in the dataframe can be covered by initialising the ``DataFrameColumnExpectation`` class with the correct parameters. Expectations implemented by initialising -``DataFrameColumnExpectation`` can be found in the ``column_expectations`` module, categorised based on the data-type of +``DataFrameColumnExpectation`` can be found in the ``expectations/column`` module, categorised based on the data-type of the column value. If you want to go ahead with implementing ``DataFrameColumnExpectation``, you first need to identify the data-type of the column value. Existing expectations are already categorised into ``string``, ``numerical`` or ``any_value`` -expectations. Create a new category in column_expectations if you think existing categories don't fit your use case. +expectations. Create a new category in ``expectations/column`` if you think existing categories don't fit your use case. Once you have decided where the expectation needs to be added, you can define it as follows: .. code-block:: python @@ -75,7 +75,7 @@ Once you have decided where the expectation needs to be added, you can define it ) For additional guidance, you can refer to the implementation of ``ExpectationValueGreaterThan`` and -``ExpectationValueLessThan`` in the ``column_expectations`` module. These examples demonstrate how to initialise the +``ExpectationValueLessThan`` in the ``expectations/column`` module. These examples demonstrate how to initialise the ``DataFrameColumnExpectation`` class with the right parameters and define filtering logic for different dataframes. The ``@register_expectation`` decorator is required and has the following mandatory parameters: @@ -93,7 +93,7 @@ The ``@requires_params`` decorator is a utility that helps you validate the inpu Adding Aggregation-Based Expectations -------------------------------------- -Just like the column expectations, you can find the aggregation-based expectations in the ``aggregation_expectations`` +Just like the column expectations, you can find the aggregation-based expectations in the ``expectations/aggregation`` module. For expectations that require aggregation operations (such as row counts, distinct value counts, null percentages, etc.), you should implement custom expectation classes by inheriting from ``DataFrameAggregationExpectation``. These types of expectations cannot be easily covered @@ -337,7 +337,7 @@ Examples of aggregation-based expectations include: - ``ExpectationColumnMeanBetween``: Validate that column mean falls within a range - ``ExpectationColumnQuantileBetween``: Validate that column quantiles fall within ranges -For more examples, check the aggregation_expectations module. +For more examples, check the ``expectations/aggregation`` module. Custom Expectations with Full Control -------------------------------------- @@ -424,7 +424,7 @@ To provide IDE autocomplete and type hints for all expect methods, run the stub uv run python scripts/generate_suite_stubs.py -This creates ``expectations_suite.pyi`` with type hints for all registered expectations. The stub file is automatically +This creates ``suite.pyi`` with type hints for all registered expectations. The stub file is automatically validated by the sanity check script and pre-commit hooks. Adding Unit Tests @@ -532,7 +532,7 @@ Run the stub generator to create IDE autocomplete support: uv run python scripts/generate_suite_stubs.py -This updates ``dataframe_expectations/expectations_suite.pyi`` with type hints for your new expectation method. +This updates ``dataframe_expectations/suite.pyi`` with type hints for your new expectation method. **2. Build Documentation** diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst index 8c9c9b5..38f9230 100644 --- a/docs/source/api_reference.rst +++ b/docs/source/api_reference.rst @@ -9,7 +9,7 @@ Core Infrastructure Base Expectation Classes ~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: dataframe_expectations.expectations +.. automodule:: dataframe_expectations.core.expectation :members: :undoc-members: :show-inheritance: @@ -17,7 +17,7 @@ Base Expectation Classes Column Expectations ~~~~~~~~~~~~~~~~~~~ -.. automodule:: dataframe_expectations.expectations.column_expectation +.. automodule:: dataframe_expectations.core.column_expectation :members: :undoc-members: :show-inheritance: @@ -25,7 +25,7 @@ Column Expectations Aggregation Expectations ~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: dataframe_expectations.expectations.aggregation_expectation +.. automodule:: dataframe_expectations.core.aggregation_expectation :members: :undoc-members: :show-inheritance: @@ -33,7 +33,7 @@ Aggregation Expectations Expectation Registry -------------------- -.. automodule:: dataframe_expectations.expectations.expectation_registry +.. automodule:: dataframe_expectations.registry :members: :undoc-members: :show-inheritance: @@ -49,7 +49,7 @@ Result Messages Utilities --------- -.. automodule:: dataframe_expectations.expectations.utils +.. automodule:: dataframe_expectations.core.utils :members: :undoc-members: :show-inheritance: @@ -57,6 +57,6 @@ Utilities Exception Classes ----------------- -.. automodule:: dataframe_expectations.expectations_suite +.. automodule:: dataframe_expectations.suite :members: DataFrameExpectationsSuiteFailure :show-inheritance: diff --git a/docs/source/expectations.rst b/docs/source/expectations.rst index 8c82b3b..600e1c0 100644 --- a/docs/source/expectations.rst +++ b/docs/source/expectations.rst @@ -6,6 +6,6 @@ This page provides comprehensive documentation for all available DataFrame expec The expectations are automatically categorized and organized for easy browsing. .. expectations:: - :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite + :class: dataframe_expectations.suite.DataFrameExpectationsSuite :show-summary: :show-cards: diff --git a/scripts/README.md b/scripts/README.md index 07ae7b3..9d3758a 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -17,7 +17,7 @@ uv run python scripts/generate_suite_stubs.py This will: 1. Read all expectation metadata from the registry 2. Generate method signatures with full docstrings and type hints -3. Create/update `dataframe_expectations/expectations_suite.pyi` +3. Create/update `dataframe_expectations/suite.pyi` The `.pyi` file is automatically discovered by IDEs (VS Code, PyCharm, etc.) and type checkers (mypy, pyright). @@ -51,7 +51,7 @@ Run the script whenever you: The `.pyi` file contains type stubs that IDEs use for autocomplete: ```python -# expectations_suite.pyi +# suite.pyi class DataFrameExpectationsSuite: def expect_value_equals( self, @@ -127,6 +127,6 @@ The script is self-contained and requires no maintenance. When adding new expect 1. Register with metadata in your expectation file 2. Run `python scripts/generate_suite_stubs.py` -3. Commit the updated `expectations_suite.py` +3. Commit the updated `suite.py` That's it! 🎉 diff --git a/scripts/generate_suite_stubs.py b/scripts/generate_suite_stubs.py index 6c57fb6..5f15240 100755 --- a/scripts/generate_suite_stubs.py +++ b/scripts/generate_suite_stubs.py @@ -6,7 +6,7 @@ a .pyi stub file that provides IDE autocomplete for all expect_* methods. Usage: - python scripts/generate_suite_stubs.py # Generate expectations_suite.pyi + python scripts/generate_suite_stubs.py # Generate suite.pyi python scripts/generate_suite_stubs.py --check # Only check if stub file is up-to-date python scripts/generate_suite_stubs.py --print # Print generated stubs to stdout """ @@ -180,7 +180,7 @@ def generate_pyi_file() -> str: def update_pyi_file(dry_run: bool = False) -> bool: """ - Update the expectations_suite.pyi stub file. + Update the suite.pyi stub file. Args: dry_run: If True, only check if update is needed without writing @@ -188,7 +188,7 @@ def update_pyi_file(dry_run: bool = False) -> bool: Returns: True if file was updated (or would be updated in dry_run mode), False otherwise """ - pyi_file = Path(__file__).parent.parent / 'dataframe_expectations' / 'expectations_suite.pyi' + pyi_file = Path(__file__).parent.parent / 'dataframe_expectations' / 'suite.pyi' # Generate the new .pyi content new_content = generate_pyi_file() @@ -228,7 +228,7 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - # Generate expectations_suite.pyi + # Generate suite.pyi python scripts/generate_suite_stubs.py # Check if stub file is up-to-date (useful for CI) diff --git a/scripts/sanity_checks.py b/scripts/sanity_checks.py index 5f2bee8..d01157f 100644 --- a/scripts/sanity_checks.py +++ b/scripts/sanity_checks.py @@ -23,8 +23,8 @@ class ExpectationsSanityChecker: def __init__(self, project_root: Path): self.project_root = project_root self.expectations_dir = project_root / "dataframe_expectations" / "expectations" - self.suite_file = project_root / "dataframe_expectations" / "expectations_suite.py" - self.stub_file = project_root / "dataframe_expectations" / "expectations_suite.pyi" + self.suite_file = project_root / "dataframe_expectations" / "suite.py" + self.stub_file = project_root / "dataframe_expectations" / "suite.pyi" self.tests_dir = project_root / "tests" / "expectations_implemented" # Results storage diff --git a/uv.lock b/uv.lock index 4435aa0..efb50ba 100644 --- a/uv.lock +++ b/uv.lock @@ -308,7 +308,7 @@ toml = [ [[package]] name = "dataframe-expectations" -version = "0.2.0" +version = "0.3.0" source = { virtual = "." } dependencies = [ { name = "pandas" }, From 82ff3435c6b6ea904a1a58b71eb6a890d80991d6 Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 16:03:19 +0100 Subject: [PATCH 7/8] fix: return corrent version when package is built --- dataframe_expectations/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py index ab81620..0711927 100644 --- a/dataframe_expectations/__init__.py +++ b/dataframe_expectations/__init__.py @@ -1,7 +1,12 @@ """DataFrame Expectations - A validation library for pandas and PySpark DataFrames.""" -from importlib.metadata import version +try: + from importlib.metadata import version -__version__ = version("dataframe-expectations") + __version__ = version("dataframe-expectations") +except Exception: + # Package is not installed (e.g., during development or linting) + # Catch all exceptions to handle various edge cases in different environments + __version__ = "0.0.0.dev0" __all__ = [] From 276589da8c5fec5537427c2d9ea622a33f40642d Mon Sep 17 00:00:00 2001 From: ryanseq-gyg Date: Mon, 10 Nov 2025 16:25:33 +0100 Subject: [PATCH 8/8] docs: remove unused imports --- docs/source/_ext/expectations_autodoc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/_ext/expectations_autodoc.py b/docs/source/_ext/expectations_autodoc.py index 556f9a4..c79787f 100644 --- a/docs/source/_ext/expectations_autodoc.py +++ b/docs/source/_ext/expectations_autodoc.py @@ -14,7 +14,6 @@ from sphinx.util.docutils import SphinxDirective from dataframe_expectations.registry import DataFrameExpectationRegistry -from dataframe_expectations.suite import DataFrameExpectationsSuite def parse_metadata_from_docstring(docstring: str) -> Tuple[str, str]: @@ -86,7 +85,7 @@ class ExpectationsDirective(SphinxDirective): Usage: .. expectations:: - :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite + :class: dataframe_expectations.suite.DataFrameExpectationsSuite :show-summary: true :show-cards: true """