diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..cc25291
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,34 @@
+version: 2
+updates:
+ # UV package manager support
+ - package-ecosystem: "uv"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "sunday"
+ timezone: "Europe/Berlin"
+ open-pull-requests-limit: 10
+ # Group updates to reduce PR noise
+ groups:
+ major-updates:
+ patterns:
+ - "*"
+ update-types:
+ - "major"
+ minor-updates:
+ patterns:
+ - "*"
+ update-types:
+ - "minor"
+ patch-updates:
+ patterns:
+ - "*"
+ update-types:
+ - "patch"
+ # Ignore dependencies that need manual review
+ ignore:
+ - dependency-name: "pyspark"
+ # PySpark updates can break compatibility, needs manual testing
+ - dependency-name: "pandas"
+ update-types: ["version-update:semver-major"]
+ # Only allow minor/patch updates for pandas to avoid breaking changes
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..55fdb4b
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,17 @@
+## Description
+
+
+## Checklist
+
+
+- [ ] Tests have been added in the prescribed format
+- [ ] `CHANGELOG.md` has been updated to reflect changes
+- [ ] Version has been updated in `pyproject.toml`
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
new file mode 100644
index 0000000..a799fb6
--- /dev/null
+++ b/.github/workflows/main.yaml
@@ -0,0 +1,71 @@
+name: CI
+
+on: [push]
+
+permissions:
+ contents: write
+
+concurrency:
+ group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }}
+ cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }}
+
+jobs:
+ test:
+ runs-on: [ self-hosted, python-small ]
+ strategy:
+ matrix:
+ python-version: ["3.10", "3.11", "3.12"]
+ fail-fast: false # Don't cancel other jobs if one fails
+ name: Test (Python ${{ matrix.python-version }})
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Python and UV
+ uses: astral-sh/setup-uv@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ uv sync --group dev
+ - name: Run sanity check
+ run: |
+ uv run python sanity_checks.py
+ working-directory: dataframe_expectations
+ - name: Run tests
+ run: |
+ uv run pytest tests/ --cov=dataframe_expectations
+
+ lint:
+ runs-on: [ self-hosted, python-small ]
+ env:
+ PYTHON_VERSION: "3.11" # Use a single version for linting
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Python and UV
+ uses: astral-sh/setup-uv@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+ - name: Install dependencies
+ run: |
+ uv sync --group dev
+ - name: Pre-commit
+ run: |
+ uv run pre-commit run --all-files --show-diff-on-failure
+
+ docs:
+ runs-on: [ self-hosted, python-small ]
+ env:
+ PYTHON_VERSION: "3.11" # Use a single version for docs
+ needs: [test, lint]
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install Python and UV
+ uses: astral-sh/setup-uv@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+ - name: Install dependencies
+ run: |
+ uv sync --group docs
+ - name: Build docs
+ run: |
+ uv run sphinx-build source build/html
+ working-directory: docs
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2372d6a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,227 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+# Pipfile.lock
+
+# sphinx
+# Sphinx documentation build output
+build/
+
+# Sphinx cache and temporary files
+source/.doctrees/
+.doctrees/
+
+# Auto-generated API documentation (if using sphinx-apidoc)
+source/_autosummary/
+source/_generated/
+source/api/
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+# in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# Redis
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+# and can be added to the global gitignore or merged into this file. However, if you prefer,
+# you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
+
+
+# Ignore generated documentation
+docs/build/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..8d5dd55
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,61 @@
+fail_fast: false
+default_stages: [pre-commit]
+
+repos:
+ # ============================================================================
+ # General checks
+ # ============================================================================
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v6.0.0
+ hooks:
+ - id: trailing-whitespace
+ stages: [pre-commit, manual]
+ - id: end-of-file-fixer
+ stages: [pre-commit, manual]
+ - id: check-yaml
+ stages: [pre-commit, manual]
+ - id: check-added-large-files
+ stages: [pre-commit, manual]
+ - id: check-docstring-first
+ files: \.py$
+ stages: [pre-commit, manual]
+ - id: check-merge-conflict
+ stages: [pre-commit, manual]
+ - id: check-case-conflict
+ stages: [pre-commit, manual]
+
+ # ============================================================================
+ # Ruff - Modern Python linter and formatter
+ # ============================================================================
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.14.1
+ hooks:
+ # Ruff linter with auto-fix
+ - id: ruff
+ args: [--fix]
+ files: ^(dataframe_expectations|tests)/.*\.py$
+ stages: [pre-commit, manual]
+
+ # Ruff formatter (replaces black)
+ - id: ruff-format
+ files: ^(dataframe_expectations|tests)/.*\.py$
+ stages: [pre-commit, manual]
+
+ # ============================================================================
+ # Type checking - mypy
+ # ============================================================================
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.18.2
+ hooks:
+ - id: mypy
+ files: ^(dataframe_expectations|tests)/.*\.py$
+ args: [--config-file, ./pyproject.toml]
+ additional_dependencies: [
+ types-tabulate,
+ pandas-stubs,
+ pyspark-stubs,
+ types-PyYAML,
+ pytest,
+ pytest-cov,
+ ]
+ stages: [pre-commit, manual]
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..2c07333
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3ab705c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Changelog dataframe-expectations
+
+## Version 0.1.0
+- Initial commit contains all the basic functionality for the library
+- Added documentation
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..0582197
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2025 GetYourGuide
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
index 45f630e..f95bbfc 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,143 @@
-# dataframe-expectations
+## 🎯 DataFrameExpectations
+
+**DataFrameExpectations** is a Python library designed to validate **Pandas** and **PySpark** DataFrames using customizable, reusable expectations. It simplifies testing in data pipelines and end-to-end workflows by providing a standardized framework for DataFrame validation.
+
+Instead of using different validation approaches for DataFrames, this library provides a
+standardized solution for this use case. As a result, any contributions made here—such as adding new expectations—can be leveraged by all users of the library.
+You can find the complete list of expectations [here](docs/build/html/expectations.html).
+
+
+### Installation:
+```bash
+pip install dataframe-expectations
+```
+
+### Development setup
+
+To set up the development environment:
+
+```bash
+# 1. Clone the repository
+git clone https://github.com/getyourguide/dataframe-expectations.git
+cd dataframe-expectations
+
+# 2. Install UV package manager
+pip install uv
+
+# 3. Install development dependencies (this will automatically create a virtual environment)
+uv sync --group dev
+
+# 4. (Optional) To explicitly activate the virtual environment:
+source .venv/bin/activate # On Windows: .venv\Scripts\activate
+
+# 5. Run tests (this will run the tests in the virtual environment)
+uv run pytest tests/ --cov=dataframe_expectations
+```
+
+### Using the library
+
+**Pandas example:**
+```python
+from dataframe_expectations.expectations_suite import DataFameExpectationsSuite
+
+suite = (
+ DataFrameExpectationsSuite()
+ .expect_value_greater_than("age", 18)
+ .expect_value_less_than("age", 10)
+)
+
+# Create a Pandas DataFrame
+import pandas as pd
+test_pandas_df = pd.DataFrame({"age": [20, 15, 30], "name": ["Alice", "Bob", "Charlie"]})
+
+suite.run(test_pandas_df)
+
+```
+
+
+**PySpark example:**
+```python
+from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
+
+suite = (
+ DataFrameExpectationsSuite()
+ .expect_value_greater_than("age", 18)
+ .expect_value_less_than("age", 40)
+)
+
+# Create a PySpark DataFrame
+test_spark_df = spark.createDataFrame(
+ [
+ {"name": "Alice", "age": 20},
+ {"name": "Bob", "age": 15},
+ {"name": "Charlie", "age": 30},
+ ]
+)
+
+suite.run(test_spark_df)
+
+```
+
+**Output:**
+```python
+========================== Running expectations suite ==========================
+ExpectationValueGreaterThan ('age' greater than 18) ... FAIL
+ExpectationValueLessThan ('age' less than 40) ... OK
+============================ 1 success, 1 failures =============================
+
+ExpectationSuiteFailure: (1/2) expectations failed.
+
+================================================================================
+List of violations:
+--------------------------------------------------------------------------------
+[Failed 1/1] ExpectationValueGreaterThan ('age' greater than 18): Found 1 row(s) where 'age' is not greater than 18.
+Some examples of violations:
++-----+------+
+| age | name |
++-----+------+
+| 15 | Bob |
++-----+------+
+================================================================================
+
+```
+
+### How to contribute?
+Contributions are welcome! You can enhance the library by adding new expectations, refining existing ones, or improving the testing framework.
+
+### Versioning
+
+This project follows [Semantic Versioning](https://semver.org/) (SemVer):
+- **MAJOR** version for incompatible API changes
+- **MINOR** version for backward-compatible functionality additions
+- **PATCH** version for backward-compatible bug fixes
+
+To update the version, manually edit the version field in `pyproject.toml`:
+
+```toml
+[project]
+version = "0.2.0" # Update this line
+```
+
+Alternatively, you can use uv to bump version number:
+
+```bash
+# Bump patch version (0.1.0 -> 0.1.1)
+uv version --bump patch
+
+# Bump minor version (0.1.0 -> 0.2.0)
+uv version --bump minor
+
+# Bump major version (0.1.0 -> 1.0.0)
+uv version --bump major
+
+# Set specific version
+uv version 0.2.0
+```
+
+Don't forget to update the [CHANGELOG.md](CHANGELOG.md) file with a detailed description of the changes you've introduced.
+
+### Security
+For security issues please contact security@getyourguide.com.
+
+### Legal
+dataframe-expectations is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for the full text.
diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py
new file mode 100644
index 0000000..a7c7e58
--- /dev/null
+++ b/dataframe_expectations/__init__.py
@@ -0,0 +1,16 @@
+from enum import Enum
+from typing import Union
+
+from pandas import DataFrame as PandasDataFrame
+from pyspark.sql import DataFrame as PySparkDataFrame
+
+DataFrameLike = Union[PySparkDataFrame, PandasDataFrame]
+
+
+class DataFrameType(str, Enum):
+ """
+ Enum for DataFrame types.
+ """
+
+ PANDAS = "pandas"
+ PYSPARK = "pyspark"
diff --git a/dataframe_expectations/expectations/__init__.py b/dataframe_expectations/expectations/__init__.py
new file mode 100644
index 0000000..09233c2
--- /dev/null
+++ b/dataframe_expectations/expectations/__init__.py
@@ -0,0 +1,110 @@
+from abc import ABC, abstractmethod
+from typing import cast
+
+from pandas import DataFrame as PandasDataFrame
+from pyspark.sql import DataFrame as PySparkDataFrame
+
+# Import the connect DataFrame type for Spark Connect
+try:
+ from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame
+except ImportError:
+ # Fallback for older PySpark versions that don't have connect
+ PySparkConnectDataFrame = None # type: ignore[misc,assignment]
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.result_message import (
+ DataFrameExpectationResultMessage,
+)
+
+
+class DataFrameExpectation(ABC):
+ """
+ Base class for DataFrame expectations.
+ """
+
+ def get_expectation_name(self) -> str:
+ """
+ Returns the class name as the expectation name.
+ """
+ return type(self).__name__
+
+ @abstractmethod
+ def get_description(self) -> str:
+ """
+ Returns a description of the expectation.
+ """
+ raise NotImplementedError(
+ f"description method must be implemented for {self.__class__.__name__}"
+ )
+
+ def __str__(self):
+ """
+ Returns a string representation of the expectation.
+ """
+ return f"{self.get_expectation_name()} ({self.get_description()})"
+
+ @classmethod
+ def infer_data_frame_type(cls, data_frame: DataFrameLike) -> DataFrameType:
+ """
+ Infer the DataFrame type based on the provided DataFrame.
+ """
+ if isinstance(data_frame, PandasDataFrame):
+ return DataFrameType.PANDAS
+ elif isinstance(data_frame, PySparkDataFrame):
+ return DataFrameType.PYSPARK
+ elif PySparkConnectDataFrame is not None and isinstance(
+ data_frame, PySparkConnectDataFrame
+ ):
+ return DataFrameType.PYSPARK
+ else:
+ raise ValueError(f"Unsupported DataFrame type: {type(data_frame)}")
+
+ def validate(self, data_frame: DataFrameLike, **kwargs):
+ """
+ Validate the DataFrame against the expectation.
+ """
+ data_frame_type = self.infer_data_frame_type(data_frame)
+
+ if data_frame_type == DataFrameType.PANDAS:
+ return self.validate_pandas(data_frame=data_frame, **kwargs)
+ elif data_frame_type == DataFrameType.PYSPARK:
+ return self.validate_pyspark(data_frame=data_frame, **kwargs)
+ else:
+ raise ValueError(f"Unsupported DataFrame type: {data_frame_type}")
+
+ @abstractmethod
+ def validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a pandas DataFrame against the expectation.
+ """
+ raise NotImplementedError(
+ f"validate_pandas method must be implemented for {self.__class__.__name__}"
+ )
+
+ @abstractmethod
+ def validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a PySpark DataFrame against the expectation.
+ """
+ raise NotImplementedError(
+ f"validate_pyspark method must be implemented for {self.__class__.__name__}"
+ )
+
+ @classmethod
+ def num_data_frame_rows(cls, data_frame: DataFrameLike) -> int:
+ """
+ Count the number of rows in the DataFrame.
+ """
+ data_frame_type = cls.infer_data_frame_type(data_frame)
+ if data_frame_type == DataFrameType.PANDAS:
+ # Cast to PandasDataFrame since we know it's a Pandas DataFrame at this point
+ return len(cast(PandasDataFrame, data_frame))
+ elif data_frame_type == DataFrameType.PYSPARK:
+ # Cast to PySparkDataFrame since we know it's a PySpark DataFrame at this point
+ return cast(PySparkDataFrame, data_frame).count()
+ else:
+ raise ValueError(f"Unsupported DataFrame type: {data_frame_type}")
diff --git a/dataframe_expectations/expectations/aggregation_expectation.py b/dataframe_expectations/expectations/aggregation_expectation.py
new file mode 100644
index 0000000..b6a1b2e
--- /dev/null
+++ b/dataframe_expectations/expectations/aggregation_expectation.py
@@ -0,0 +1,130 @@
+from abc import abstractmethod
+from typing import List, Union
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations import DataFrameExpectation
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+)
+
+
+class DataFrameAggregationExpectation(DataFrameExpectation):
+ """
+ Base class for DataFrame aggregation expectations.
+ This class is designed to first aggregate data and then validate the aggregation results.
+ """
+
+ def __init__(
+ self,
+ expectation_name: str,
+ column_names: List[str],
+ description: str,
+ ):
+ """
+ Template for implementing DataFrame aggregation expectations, where data is first aggregated
+ and then the aggregation results are validated.
+
+ :param expectation_name: The name of the expectation. This will be used during logging.
+ :param column_names: The list of column names to aggregate on.
+ :param description: A description of the expectation used in logging.
+ """
+ self.expectation_name = expectation_name
+ self.column_names = column_names
+ self.description = description
+
+ def get_expectation_name(self) -> str:
+ """
+ Returns the expectation name.
+ """
+ return self.expectation_name
+
+ def get_description(self) -> str:
+ """
+ Returns a description of the expectation.
+ """
+ return self.description
+
+ @abstractmethod
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Aggregate and validate a pandas DataFrame against the expectation.
+
+ Note: This method should NOT check for column existence - that's handled
+ automatically by the validate_pandas method.
+ """
+ raise NotImplementedError(
+ f"aggregate_and_validate_pandas method must be implemented for {self.__class__.__name__}"
+ )
+
+ @abstractmethod
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Aggregate and validate a PySpark DataFrame against the expectation.
+
+ Note: This method should NOT check for column existence - that's handled
+ automatically by the validate_pyspark method.
+ """
+ raise NotImplementedError(
+ f"aggregate_and_validate_pyspark method must be implemented for {self.__class__.__name__}"
+ )
+
+ def validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a pandas DataFrame against the expectation.
+ Automatically checks column existence before calling the implementation.
+ """
+ # Check if all required columns exist
+ column_error = self._check_columns_exist(data_frame)
+ if column_error:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=column_error,
+ )
+
+ # Call the implementation-specific validation
+ return self.aggregate_and_validate_pandas(data_frame, **kwargs)
+
+ def validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a PySpark DataFrame against the expectation.
+ Automatically checks column existence before calling the implementation.
+ """
+ # Check if all required columns exist
+ column_error = self._check_columns_exist(data_frame)
+ if column_error:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=column_error,
+ )
+
+ # Call the implementation-specific validation
+ return self.aggregate_and_validate_pyspark(data_frame, **kwargs)
+
+ def _check_columns_exist(self, data_frame: DataFrameLike) -> Union[str, None]:
+ """
+ Check if all required columns exist in the DataFrame.
+ Returns error message if columns are missing, None otherwise.
+ """
+ # Skip column check if no columns are required (e.g., for DataFrame-level expectations)
+ if not self.column_names:
+ return None
+
+ missing_columns = [col for col in self.column_names if col not in data_frame.columns]
+ if missing_columns:
+ if len(missing_columns) == 1:
+ return f"Column '{missing_columns[0]}' does not exist in the DataFrame."
+ else:
+ missing_columns_str = ", ".join([f"'{col}'" for col in missing_columns])
+ return f"Columns [{missing_columns_str}] do not exist in the DataFrame."
+ return None
diff --git a/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py b/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py
new file mode 100644
index 0000000..c428b4b
--- /dev/null
+++ b/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py
@@ -0,0 +1,490 @@
+from typing import cast
+
+from pandas import DataFrame as PandasDataFrame
+from pyspark.sql import DataFrame as PySparkDataFrame
+from pyspark.sql import functions as F
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations.aggregation_expectation import (
+ DataFrameAggregationExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+class ExpectationMinRows(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a DataFrame has at least a minimum number of rows.
+
+ This expectation counts the total number of rows in the DataFrame and checks if it
+ meets or exceeds the specified minimum threshold.
+
+ Examples:
+ DataFrame with 100 rows:
+ - ExpectationMinRows(min_rows=50) → PASS
+ - ExpectationMinRows(min_rows=150) → FAIL
+ """
+
+ def __init__(self, min_rows: int):
+ """
+ Initialize the minimum rows expectation.
+
+ Args:
+ min_rows (int): Minimum number of rows required (inclusive).
+ """
+ if min_rows < 0:
+ raise ValueError(f"min_rows must be non-negative, got {min_rows}")
+
+ description = f"DataFrame contains at least {min_rows} rows"
+
+ self.min_rows = min_rows
+
+ super().__init__(
+ expectation_name="ExpectationMinRows",
+ column_names=[], # No specific columns required
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate minimum rows in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ row_count = len(pandas_df)
+
+ if row_count >= self.min_rows:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"DataFrame has {row_count} rows, expected at least {self.min_rows}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting DataFrame rows: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate minimum rows in a PySpark DataFrame."""
+ try:
+ row_count = data_frame.count()
+
+ if row_count >= self.min_rows:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"DataFrame has {row_count} rows, expected at least {self.min_rows}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting DataFrame rows: {str(e)}",
+ )
+
+
+class ExpectationMaxRows(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a DataFrame has at most a maximum number of rows.
+
+ This expectation counts the total number of rows in the DataFrame and checks if it
+ does not exceed the specified maximum threshold.
+
+ Examples:
+ DataFrame with 100 rows:
+ - ExpectationMaxRows(max_rows=150) → PASS
+ - ExpectationMaxRows(max_rows=50) → FAIL
+ """
+
+ def __init__(self, max_rows: int):
+ """
+ Initialize the maximum rows expectation.
+
+ Args:
+ max_rows (int): Maximum number of rows allowed (inclusive).
+ """
+ if max_rows < 0:
+ raise ValueError(f"max_rows must be non-negative, got {max_rows}")
+
+ description = f"DataFrame contains at most {max_rows} rows"
+
+ self.max_rows = max_rows
+
+ super().__init__(
+ expectation_name="ExpectationMaxRows",
+ column_names=[], # No specific columns required
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum rows in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ row_count = len(pandas_df)
+
+ if row_count <= self.max_rows:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"DataFrame has {row_count} rows, expected at most {self.max_rows}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting DataFrame rows: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum rows in a PySpark DataFrame."""
+ try:
+ row_count = data_frame.count()
+
+ if row_count <= self.max_rows:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"DataFrame has {row_count} rows, expected at most {self.max_rows}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting DataFrame rows: {str(e)}",
+ )
+
+
+class ExpectationMaxNullPercentage(DataFrameAggregationExpectation):
+ """
+ Expectation that validates the percentage of null/NaN values in a specific column
+ is below a specified threshold.
+
+ This expectation counts null values (including NaN for pandas) in the specified column
+ and calculates the percentage relative to total rows, then checks if it's below the
+ specified maximum threshold.
+
+ Examples:
+ Column with 100 rows and 5 null values (5% null):
+ - ExpectationMaxNullPercentage(column_name="age", max_percentage=10.0) → PASS
+ - ExpectationMaxNullPercentage(column_name="age", max_percentage=3.0) → FAIL
+
+ Note: The percentage is expressed as a value between 0.0 and 100.0 (e.g., 5.5 for 5.5%).
+ """
+
+ def __init__(self, column_name: str, max_percentage: float):
+ """
+ Initialize the maximum null percentage expectation.
+
+ Args:
+ column_name (str): Name of the column to check for null percentage.
+ max_percentage (float): Maximum percentage of null values allowed (0.0-100.0).
+ """
+ if not 0 <= max_percentage <= 100:
+ raise ValueError(f"max_percentage must be between 0.0 and 100.0, got {max_percentage}")
+
+ description = f"column '{column_name}' null percentage is at most {max_percentage}%"
+
+ self.column_name = column_name
+ self.max_percentage = max_percentage
+
+ super().__init__(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_names=[column_name], # Specify the required column
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum null percentage in a pandas DataFrame column."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Get total number of rows
+ total_rows = len(pandas_df)
+
+ if total_rows == 0:
+ # Empty DataFrame has 0% null values
+ actual_percentage = 0.0
+ else:
+ # Count null and NaN values in the specific column using isnull() which handles both
+ null_count = pandas_df[self.column_name].isnull().sum()
+ actual_percentage = (null_count / total_rows) * 100
+
+ if actual_percentage <= self.max_percentage:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {actual_percentage:.2f}% null values, expected at most {self.max_percentage:.2f}%.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error calculating null percentage for column '{self.column_name}': {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum null percentage in a PySpark DataFrame column."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+
+ # Get total number of rows
+ total_rows = pyspark_df.count()
+
+ if total_rows == 0:
+ # Empty DataFrame has 0% null values
+ actual_percentage = 0.0
+ else:
+ # Count null values in the specific column
+ null_count_result = pyspark_df.select(
+ F.sum(F.when(F.col(self.column_name).isNull(), 1).otherwise(0)).alias(
+ "null_count"
+ )
+ ).collect()
+
+ null_count = null_count_result[0]["null_count"]
+ actual_percentage = (null_count / total_rows) * 100
+
+ if actual_percentage <= self.max_percentage:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {actual_percentage:.2f}% null values, expected at most {self.max_percentage:.2f}%.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error calculating null percentage for column '{self.column_name}': {str(e)}",
+ )
+
+
+class ExpectationMaxNullCount(DataFrameAggregationExpectation):
+ """
+ Expectation that validates the absolute count of null/NaN values in a specific column
+ is below a specified threshold.
+
+ This expectation counts null values (including NaN for pandas) in the specified column
+ and checks if the absolute count is below the specified maximum threshold.
+
+ Examples:
+ Column with 100 rows and 5 null values:
+ - ExpectationMaxNullCount(column_name="age", max_count=10) → PASS
+ - ExpectationMaxNullCount(column_name="age", max_count=3) → FAIL
+
+ Note: The count is the absolute number of null values, not a percentage.
+ """
+
+ def __init__(self, column_name: str, max_count: int):
+ """
+ Initialize the maximum null count expectation.
+
+ Args:
+ column_name (str): Name of the column to check for null count.
+ max_count (int): Maximum number of null values allowed.
+ """
+ if max_count < 0:
+ raise ValueError(f"max_count must be non-negative, got {max_count}")
+
+ description = f"column '{column_name}' has at most {max_count} null values"
+
+ self.column_name = column_name
+ self.max_count = max_count
+
+ super().__init__(
+ expectation_name="ExpectationMaxNullCount",
+ column_names=[column_name], # Specify the required column
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum null count in a pandas DataFrame column."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Count null and NaN values in the specific column using isnull() which handles both
+ null_count = pandas_df[self.column_name].isnull().sum()
+
+ if null_count <= self.max_count:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {null_count} null values, expected at most {self.max_count}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error calculating null count for column '{self.column_name}': {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate maximum null count in a PySpark DataFrame column."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+
+ # Count null values in the specific column
+ null_count_result = pyspark_df.select(
+ F.sum(F.when(F.col(self.column_name).isNull(), 1).otherwise(0)).alias("null_count")
+ ).collect()
+
+ # Handle the case where null_count might be None (e.g., empty DataFrame)
+ null_count = null_count_result[0]["null_count"]
+ if null_count is None:
+ null_count = 0
+
+ if null_count <= self.max_count:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {null_count} null values, expected at most {self.max_count}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error calculating null count for column '{self.column_name}': {str(e)}",
+ )
+
+
+# Factory functions for the registry
+@register_expectation("ExpectationMinRows")
+@requires_params("min_rows", types={"min_rows": int})
+def create_expectation_min_rows(**kwargs) -> ExpectationMinRows:
+ """
+ Create an ExpectMinRows instance.
+
+ Args:
+ min_rows (int): Minimum number of rows required.
+
+ Returns:
+ ExpectationMinRows: A configured expectation instance.
+ """
+ return ExpectationMinRows(min_rows=kwargs["min_rows"])
+
+
+@register_expectation("ExpectationMaxRows")
+@requires_params("max_rows", types={"max_rows": int})
+def create_expectation_max_rows(**kwargs) -> ExpectationMaxRows:
+ """
+ Create an ExpectationMaxRows instance.
+
+ Args:
+ max_rows (int): Maximum number of rows allowed.
+
+ Returns:
+ ExpectationMaxRows: A configured expectation instance.
+ """
+ return ExpectationMaxRows(max_rows=kwargs["max_rows"])
+
+
+@register_expectation("ExpectationMaxNullPercentage")
+@requires_params(
+ "column_name",
+ "max_percentage",
+ types={"column_name": str, "max_percentage": (int, float)},
+)
+def create_expectation_max_null_percentage(**kwargs) -> ExpectationMaxNullPercentage:
+ """
+ Create an ExpectationMaxNullPercentage instance.
+
+ Args:
+ column_name (str): Name of the column to check for null percentage.
+ max_percentage (float): Maximum percentage of null values allowed (0.0-100.0).
+
+ Returns:
+ ExpectationMaxNullPercentage: A configured expectation instance.
+ """
+ return ExpectationMaxNullPercentage(
+ column_name=kwargs["column_name"],
+ max_percentage=kwargs["max_percentage"],
+ )
+
+
+@register_expectation("ExpectationMaxNullCount")
+@requires_params(
+ "column_name",
+ "max_count",
+ types={"column_name": str, "max_count": int},
+)
+def create_expectation_max_null_count(**kwargs) -> ExpectationMaxNullCount:
+ """
+ Create an ExpectationMaxNullCount instance.
+
+ Args:
+ column_name (str): Name of the column to check for null count.
+ max_count (int): Maximum number of null values allowed.
+
+ Returns:
+ ExpectationMaxNullCount: A configured expectation instance.
+ """
+ return ExpectationMaxNullCount(
+ column_name=kwargs["column_name"],
+ max_count=kwargs["max_count"],
+ )
diff --git a/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py b/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py
new file mode 100644
index 0000000..b9e8b53
--- /dev/null
+++ b/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py
@@ -0,0 +1,472 @@
+from typing import Union, cast
+
+import pandas as pd
+from pandas import DataFrame as PandasDataFrame
+from pyspark.sql import DataFrame as PySparkDataFrame
+from pyspark.sql import functions as F
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations.aggregation_expectation import (
+ DataFrameAggregationExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+class ExpectationColumnQuantileBetween(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a quantile value of a column falls within a specified range.
+
+ This expectation computes the specified quantile of the column and checks if it
+ falls between the provided minimum and maximum bounds (inclusive).
+
+ Quantile values:
+ - 0.0 = minimum value
+ - 0.5 = median value
+ - 1.0 = maximum value
+ - Any value between 0.0 and 1.0 for custom quantiles
+
+ Examples:
+ Column 'age' with values [20, 25, 30, 35]:
+ - quantile=0.5 (median) = 27.5
+ - ExpectationColumnQuantileBetween(column_name="age", quantile=0.5, min_value=25, max_value=30) → PASS
+ - ExpectationColumnQuantileBetween(column_name="age", quantile=1.0, min_value=30, max_value=40) → PASS (max=35)
+ - ExpectationColumnQuantileBetween(column_name="age", quantile=0.0, min_value=15, max_value=25) → PASS (min=20)
+ """
+
+ def __init__(
+ self,
+ column_name: str,
+ quantile: float,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Initialize the column quantile between expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ quantile (float): Quantile to compute (0.0 to 1.0, where 0.0=min, 0.5=median, 1.0=max).
+ min_value (Union[int, float]): Minimum allowed value for the column quantile (inclusive).
+ max_value (Union[int, float]): Maximum allowed value for the column quantile (inclusive).
+
+ Raises:
+ ValueError: If quantile is not between 0.0 and 1.0.
+ """
+ if not (0.0 <= quantile <= 1.0):
+ raise ValueError(f"Quantile must be between 0.0 and 1.0, got {quantile}")
+
+ # Create descriptive names for common quantiles
+ quantile_names = {
+ 0.0: "minimum",
+ 0.25: "25th percentile",
+ 0.5: "median",
+ 0.75: "75th percentile",
+ 1.0: "maximum",
+ }
+ self.quantile_desc = quantile_names.get(quantile, f"{quantile} quantile")
+
+ description = (
+ f"column '{column_name}' {self.quantile_desc} value between {min_value} and {max_value}"
+ )
+
+ self.column_name = column_name
+ self.quantile = quantile
+ self.min_value = min_value
+ self.max_value = max_value
+
+ super().__init__(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column quantile in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Calculate quantile
+ if self.quantile == 0.0:
+ quantile_val = pandas_df[self.column_name].min()
+ elif self.quantile == 1.0:
+ quantile_val = pandas_df[self.column_name].max()
+ elif self.quantile == 0.5:
+ quantile_val = pandas_df[self.column_name].median()
+ else:
+ quantile_val = pandas_df[self.column_name].quantile(self.quantile)
+
+ # Handle case where all values are null
+ if pd.isna(quantile_val):
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ # Check if quantile is within bounds
+ if self.min_value <= quantile_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=(
+ f"Column '{self.column_name}' {self.quantile_desc} value {quantile_val} is not between "
+ f"{self.min_value} and {self.max_value}."
+ ),
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error calculating {self.quantile} quantile for column '{self.column_name}': {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column quantile in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # First check if all values are null to avoid edge cases
+ non_null_count = pyspark_df.select(F.count(self.column_name)).collect()[0][0]
+ if non_null_count == 0:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ # Calculate quantile
+ if self.quantile == 0.0:
+ result = pyspark_df.select(F.min(self.column_name).alias("quantile_val")).collect()
+ elif self.quantile == 1.0:
+ result = pyspark_df.select(F.max(self.column_name).alias("quantile_val")).collect()
+ elif self.quantile == 0.5:
+ result = pyspark_df.select(
+ F.median(self.column_name).alias("quantile_val") # type: ignore
+ ).collect()
+ else:
+ # Use percentile_approx for other quantiles
+ result = pyspark_df.select(
+ F.percentile_approx(F.col(self.column_name), F.lit(self.quantile)).alias( # type: ignore
+ "quantile_val"
+ )
+ ).collect()
+
+ quantile_val = result[0]["quantile_val"]
+
+ # Defensive check: quantile_val should not be None after the non-null count check above,
+ # but we keep this for extra safety in case of unexpected Spark behavior or schema issues.
+ if quantile_val is None:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ # Check if quantile is within bounds
+ if self.min_value <= quantile_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' {self.quantile_desc} value {quantile_val} is not between {self.min_value} and {self.max_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error calculating {self.quantile} quantile for column '{self.column_name}': {str(e)}",
+ )
+
+
+class ExpectationColumnMeanBetween(DataFrameAggregationExpectation):
+ """
+ Expectation that validates the mean value of a column falls within a specified range.
+
+ This expectation computes the mean (average) value of the specified column and checks if it
+ falls between the provided minimum and maximum bounds (inclusive).
+
+ Note: Mean is implemented separately since it's not a quantile operation.
+
+ Examples:
+ Column 'age' with values [20, 25, 30, 35]:
+ - mean_value = 27.5
+ - ExpectationColumnMeanBetween(column_name="age", min_value=25, max_value=30) → PASS
+ - ExpectationColumnMeanBetween(column_name="age", min_value=30, max_value=35) → FAIL
+ """
+
+ def __init__(
+ self,
+ column_name: str,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Initialize the column mean between expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (Union[int, float]): Minimum allowed value for the column mean (inclusive).
+ max_value (Union[int, float]): Maximum allowed value for the column mean (inclusive).
+ """
+ description = f"column '{column_name}' mean value between {min_value} and {max_value}"
+
+ self.column_name = column_name
+ self.min_value = min_value
+ self.max_value = max_value
+
+ super().__init__(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column mean in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Calculate mean
+ mean_val = pandas_df[self.column_name].mean()
+
+ # Handle case where all values are null
+ if pd.isna(mean_val):
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ # Check if mean is within bounds
+ if self.min_value <= mean_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error calculating mean for column '{self.column_name}': {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column mean in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # Calculate mean
+ mean_result = pyspark_df.select(F.avg(self.column_name).alias("mean_val")).collect()
+ mean_val = mean_result[0]["mean_val"]
+
+ # Handle case where all values are null
+ if mean_val is None:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ # Check if mean is within bounds
+ if self.min_value <= mean_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error calculating mean for column '{self.column_name}': {str(e)}",
+ )
+
+
+# Register the main expectation
+@register_expectation("ExpectationColumnQuantileBetween")
+@requires_params(
+ "column_name",
+ "quantile",
+ "min_value",
+ "max_value",
+ types={
+ "column_name": str,
+ "quantile": float,
+ "min_value": (int, float),
+ "max_value": (int, float),
+ },
+)
+def create_expectation_column_quantile_to_be_between(
+ **kwargs,
+) -> ExpectationColumnQuantileBetween:
+ """
+ Create an ExpectationColumnQuantileBetween instance.
+
+ Args:
+ column_name (str): Name of the column to check.
+ quantile (float): Quantile to compute (0.0 to 1.0).
+ min_value (Union[int, float]): Minimum allowed value for the column quantile.
+ max_value (Union[int, float]): Maximum allowed value for the column quantile.
+
+ Returns:
+ ExpectationColumnQuantileBetween: A configured expectation instance.
+ """
+ return ExpectationColumnQuantileBetween(
+ column_name=kwargs["column_name"],
+ quantile=kwargs["quantile"],
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
+
+
+# Convenience functions for common quantiles
+@register_expectation("ExpectationColumnMaxBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={"column_name": str, "min_value": (int, float), "max_value": (int, float)},
+)
+def create_expectation_column_max_to_be_between(
+ **kwargs,
+) -> ExpectationColumnQuantileBetween:
+ """
+ Create an ExpectationColumnQuantileBetween instance for maximum values (quantile=1.0).
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (Union[int, float]): Minimum allowed value for the column maximum.
+ max_value (Union[int, float]): Maximum allowed value for the column maximum.
+
+ Returns:
+ ExpectationColumnQuantileBetween: A configured expectation instance for maximum values.
+ """
+ return ExpectationColumnQuantileBetween(
+ column_name=kwargs["column_name"],
+ quantile=1.0,
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
+
+
+@register_expectation("ExpectationColumnMinBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={"column_name": str, "min_value": (int, float), "max_value": (int, float)},
+)
+def create_expectation_column_min_to_be_between(
+ **kwargs,
+) -> ExpectationColumnQuantileBetween:
+ """
+ Create an ExpectationColumnQuantileBetween instance for minimum values (quantile=0.0).
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (Union[int, float]): Minimum allowed value for the column minimum.
+ max_value (Union[int, float]): Maximum allowed value for the column minimum.
+
+ Returns:
+ ExpectationColumnQuantileBetween: A configured expectation instance for minimum values.
+ """
+ return ExpectationColumnQuantileBetween(
+ column_name=kwargs["column_name"],
+ quantile=0.0,
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
+
+
+@register_expectation("ExpectationColumnMeanBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={"column_name": str, "min_value": (int, float), "max_value": (int, float)},
+)
+def create_expectation_column_mean_to_be_between(
+ **kwargs,
+) -> ExpectationColumnMeanBetween:
+ """
+ Create a custom ExpectationColumnMeanBetween instance for mean values.
+ Note: This uses a separate implementation since mean is not a quantile.
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (Union[int, float]): Minimum allowed value for the column mean.
+ max_value (Union[int, float]): Maximum allowed value for the column mean.
+
+ Returns:
+ ExpectationColumnMeanBetween: A configured expectation instance for mean values.
+ """
+ # For mean, we need a separate class since it's not a quantile
+ return ExpectationColumnMeanBetween(
+ column_name=kwargs["column_name"],
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
+
+
+@register_expectation("ExpectationColumnMedianBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={"column_name": str, "min_value": (int, float), "max_value": (int, float)},
+)
+def create_expectation_column_median_to_be_between(
+ **kwargs,
+) -> ExpectationColumnQuantileBetween:
+ """
+ Create an ExpectationColumnQuantileBetween instance for median values (quantile=0.5).
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (Union[int, float]): Minimum allowed value for the column median.
+ max_value (Union[int, float]): Maximum allowed value for the column median.
+
+ Returns:
+ ExpectationColumnQuantileBetween: A configured expectation instance for median values.
+ """
+ return ExpectationColumnQuantileBetween(
+ column_name=kwargs["column_name"],
+ quantile=0.5,
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
diff --git a/dataframe_expectations/expectations/aggregation_expectations/unique.py b/dataframe_expectations/expectations/aggregation_expectations/unique.py
new file mode 100644
index 0000000..5583494
--- /dev/null
+++ b/dataframe_expectations/expectations/aggregation_expectations/unique.py
@@ -0,0 +1,667 @@
+from typing import List, cast
+
+import pandas as pd
+from pandas import DataFrame as PandasDataFrame
+from pyspark.sql import DataFrame as PySparkDataFrame
+from pyspark.sql import functions as F
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations.aggregation_expectation import (
+ DataFrameAggregationExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+class ExpectationUniqueRows(DataFrameAggregationExpectation):
+ """
+ Expectation that checks if there are no duplicate rows for the given column names. If columns list is empty, checks for duplicates across all columns.
+
+ For example:
+ For column_names ["col1", "col2"]
+
+ Given the following DataFrame:
+
+ | col1 | col2 | col3 |
+ |------|------|------|
+ | 1 | 10 | 100 |
+ | 2 | 20 | 100 |
+ | 3 | 30 | 100 |
+ | 1 | 20 | 100 |
+
+ All rows are unique for columns ["col1", "col2"] and there will be no violations.
+
+ For the same columns_names and the following DataFrame:
+
+ | col1 | col2 | col3 |
+ |------|------|------|
+ | 1 | 10 | 100 |
+ | 2 | 20 | 100 |
+ | 3 | 30 | 100 |
+ | 1 | 10 | 100 |
+
+ There will be 1 violation because the first and last rows are duplicates for columns ["col1", "col2"].
+
+ """
+
+ def __init__(self, column_names: List[str]):
+ """
+ Initialize the unique expectation.
+
+ :param column_names: List of column names to check for uniqueness.
+ If empty, checks all column_names.
+ """
+ description = (
+ f"all rows unique for columns {column_names}"
+ if column_names
+ else "all rows unique across all columns"
+ )
+
+ self.column_names = column_names
+
+ super().__init__(
+ expectation_name="ExpectationUniqueRows",
+ column_names=column_names,
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate uniqueness in a pandas DataFrame.
+ """
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+
+ # If columns list is empty, use all columns
+ check_columns = self.column_names if self.column_names else list(pandas_df.columns)
+
+ # Find duplicates - dropna=False ensures null values are considered in duplicate detection
+ # This means rows with null values can be duplicates of each other
+ duplicates = pandas_df[pandas_df.duplicated(subset=check_columns, keep=False)]
+
+ if len(duplicates) == 0:
+ return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name())
+
+ # Add duplicate count column and keep only one row per duplicate group
+ duplicate_counts = (
+ pandas_df.groupby(check_columns, dropna=False).size().reset_index(name="#duplicates")
+ )
+ # Filter to only keep groups with duplicates (count > 1)
+ duplicate_counts = duplicate_counts[duplicate_counts["#duplicates"] > 1]
+
+ # Order by #duplicates, then by the specified columns
+ sort_columns = ["#duplicates"] + check_columns
+ duplicates_with_counts = duplicate_counts.sort_values(sort_columns)
+
+ # Replace NaN with None
+ duplicates_with_counts = duplicates_with_counts.map(lambda x: None if pd.isna(x) else x)
+
+ # Calculate total number of duplicate rows (not groups)
+ total_duplicate_rows = duplicates_with_counts["#duplicates"].sum()
+
+ # Generate dynamic error message
+ error_msg = (
+ f"duplicate rows found for columns {self.column_names}"
+ if self.column_names
+ else "duplicate rows found"
+ )
+
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=duplicates_with_counts,
+ message=f"Found {total_duplicate_rows} duplicate row(s). {error_msg}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate uniqueness in a PySpark DataFrame.
+ """
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+
+ # If columns list is empty, use all columns
+ check_columns = self.column_names if self.column_names else pyspark_df.columns
+
+ # Group by the specified columns and count duplicates
+ duplicates_df = (
+ pyspark_df.groupBy(*check_columns)
+ .count()
+ .filter(F.col("count") > 1)
+ .withColumnRenamed("count", "#duplicates")
+ .orderBy(F.col("#duplicates"), *check_columns)
+ )
+
+ duplicate_count = duplicates_df.count()
+
+ if duplicate_count == 0:
+ return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name())
+
+ # Calculate total number of duplicate rows (not groups)
+ total_duplicate_rows = duplicates_df.agg(F.sum("#duplicates")).collect()[0][0]
+
+ # Generate dynamic error message
+ error_msg = (
+ f"duplicate rows found for columns {self.column_names}"
+ if self.column_names
+ else "duplicate rows found"
+ )
+
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=duplicates_df,
+ message=f"Found {total_duplicate_rows} duplicate row(s). {error_msg}",
+ )
+
+
+class ExpectationDistinctColumnValuesEquals(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a column has exactly a specified number of distinct values.
+
+ This expectation counts the number of unique/distinct values in a specified column
+ and checks if it equals the expected count.
+
+ Examples:
+ Column with values [1, 2, 3, 2, 1] has 3 distinct values:
+ - ExpectationDistinctColumnValuesEquals(column_name="col1", expected_value=3) → PASS
+ - ExpectationDistinctColumnValuesEquals(column_name="col1", expected_value=5) → FAIL
+
+ Note: The comparison is exact equality (inclusive).
+ """
+
+ def __init__(self, column_name: str, expected_value: int):
+ """
+ Initialize the distinct values equals expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ expected_value (int): Expected number of distinct values (exact match).
+ """
+ if expected_value < 0:
+ raise ValueError(f"expected_value must be non-negative, got {expected_value}")
+
+ description = f"column '{column_name}' has exactly {expected_value} distinct values"
+
+ self.column_name = column_name
+ self.expected_value = expected_value
+
+ super().__init__(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Count distinct values (dropna=False includes NaN as a distinct value)
+ actual_count = pandas_df[self.column_name].nunique(dropna=False)
+
+ if actual_count == self.expected_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected exactly {self.expected_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # Count distinct values including nulls
+ actual_count = pyspark_df.select(self.column_name).distinct().count()
+
+ if actual_count == self.expected_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected exactly {self.expected_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+
+class ExpectationDistinctColumnValuesLessThan(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a column has fewer than a specified number of distinct values.
+
+ This expectation counts the number of unique/distinct values in a specified column
+ and checks if it's less than the specified threshold.
+
+ Examples:
+ Column with values [1, 2, 3, 2, 1] has 3 distinct values:
+ - ExpectationDistinctColumnValuesLessThan(column_name="col1", threshold=5) → PASS (3 < 5)
+ - ExpectationDistinctColumnValuesLessThan(column_name="col1", threshold=3) → FAIL (3 is not < 3)
+
+ Note: The threshold is exclusive (actual_count < threshold).
+ """
+
+ def __init__(self, column_name: str, threshold: int):
+ """
+ Initialize the distinct values less than expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ threshold (int): Threshold for distinct values count (exclusive upper bound).
+ """
+ if threshold < 0:
+ raise ValueError(f"threshold must be non-negative, got {threshold}")
+
+ description = f"column '{column_name}' has fewer than {threshold} distinct values"
+
+ self.column_name = column_name
+ self.threshold = threshold
+
+ super().__init__(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Count distinct values (dropna=False includes NaN as a distinct value)
+ actual_count = pandas_df[self.column_name].nunique(dropna=False)
+
+ if actual_count < self.threshold:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected fewer than {self.threshold}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # Count distinct values including nulls
+ actual_count = pyspark_df.select(self.column_name).distinct().count()
+
+ if actual_count < self.threshold:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected fewer than {self.threshold}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+
+class ExpectationDistinctColumnValuesGreaterThan(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a column has more than a specified number of distinct values.
+
+ This expectation counts the number of unique/distinct values in a specified column
+ and checks if it's greater than the specified threshold.
+
+ Examples:
+ Column with values [1, 2, 3, 2, 1] has 3 distinct values:
+ - ExpectationDistinctColumnValuesGreaterThan(column_name="col1", threshold=2) → PASS (3 > 2)
+ - ExpectationDistinctColumnValuesGreaterThan(column_name="col1", threshold=3) → FAIL (3 is not > 3)
+
+ Note: The threshold is exclusive (actual_count > threshold).
+ """
+
+ def __init__(self, column_name: str, threshold: int):
+ """
+ Initialize the distinct values greater than expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ threshold (int): Threshold for distinct values count (exclusive lower bound).
+ """
+ if threshold < 0:
+ raise ValueError(f"threshold must be non-negative, got {threshold}")
+
+ description = f"column '{column_name}' has more than {threshold} distinct values"
+
+ self.column_name = column_name
+ self.threshold = threshold
+
+ super().__init__(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Count distinct values (dropna=False includes NaN as a distinct value)
+ actual_count = pandas_df[self.column_name].nunique(dropna=False)
+
+ if actual_count > self.threshold:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected more than {self.threshold}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # Count distinct values including nulls
+ actual_count = pyspark_df.select(self.column_name).distinct().count()
+
+ if actual_count > self.threshold:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected more than {self.threshold}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+
+class ExpectationDistinctColumnValuesBetween(DataFrameAggregationExpectation):
+ """
+ Expectation that validates a column has a number of distinct values within a specified range.
+
+ This expectation counts the number of unique/distinct values in a specified column
+ and checks if it's between the specified minimum and maximum values.
+
+ Examples:
+ Column with values [1, 2, 3, 2, 1] has 3 distinct values:
+ - ExpectationDistinctColumnValuesBetween(column_name="col1", min_value=2, max_value=5) → PASS (2 ≤ 3 ≤ 5)
+ - ExpectationDistinctColumnValuesBetween(column_name="col1", min_value=4, max_value=6) → FAIL (3 is not ≥ 4)
+
+ Note: Both bounds are inclusive (min_value ≤ actual_count ≤ max_value).
+ """
+
+ def __init__(self, column_name: str, min_value: int, max_value: int):
+ """
+ Initialize the distinct values between expectation.
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (int): Minimum number of distinct values (inclusive lower bound).
+ max_value (int): Maximum number of distinct values (inclusive upper bound).
+ """
+ if min_value < 0:
+ raise ValueError(f"min_value must be non-negative, got {min_value}")
+ if max_value < 0:
+ raise ValueError(f"max_value must be non-negative, got {max_value}")
+ if min_value > max_value:
+ raise ValueError(f"min_value ({min_value}) must be <= max_value ({max_value})")
+
+ description = (
+ f"column '{column_name}' has between {min_value} and {max_value} distinct values"
+ )
+
+ self.column_name = column_name
+ self.min_value = min_value
+ self.max_value = max_value
+
+ super().__init__(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_names=[column_name],
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a pandas DataFrame."""
+ try:
+ # Cast to PandasDataFrame for type safety
+ pandas_df = cast(PandasDataFrame, data_frame)
+ # Count distinct values (dropna=False includes NaN as a distinct value)
+ actual_count = pandas_df[self.column_name].nunique(dropna=False)
+
+ if self.min_value <= actual_count <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected between {self.min_value} and {self.max_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate distinct values count in a PySpark DataFrame."""
+ try:
+ # Cast to PySparkDataFrame for type safety
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+ # Count distinct values including nulls
+ actual_count = pyspark_df.select(self.column_name).distinct().count()
+
+ if self.min_value <= actual_count <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' has {actual_count} distinct values, expected between {self.min_value} and {self.max_value}.",
+ )
+
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting distinct values: {str(e)}",
+ )
+
+
+# Register the expectations
+@register_expectation("ExpectationUniqueRows")
+@requires_params("column_names", types={"column_names": list})
+def create_expectation_unique(**kwargs) -> ExpectationUniqueRows:
+ """
+ Create an ExpectationUniqueRows instance.
+
+ :param column_names: List of column names to check for uniqueness. If empty, checks all columns.
+ :return: ExpectationUniqueRows instance
+ """
+ column_names = kwargs["column_names"]
+ return ExpectationUniqueRows(column_names=column_names)
+
+
+@register_expectation("ExpectationDistinctColumnValuesEquals")
+@requires_params(
+ "column_name",
+ "expected_value",
+ types={"column_name": str, "expected_value": int},
+)
+def create_expectation_distinct_column_values_equals(
+ **kwargs,
+) -> ExpectationDistinctColumnValuesEquals:
+ """
+ Create an ExpectationDistinctColumnValuesEquals instance.
+
+ Args:
+ column_name (str): Name of the column to check.
+ expected_value (int): Expected number of distinct values.
+
+ Returns:
+ ExpectationDistinctColumnValuesEquals: A configured expectation instance.
+ """
+ return ExpectationDistinctColumnValuesEquals(
+ column_name=kwargs["column_name"],
+ expected_value=kwargs["expected_value"],
+ )
+
+
+@register_expectation("ExpectationDistinctColumnValuesLessThan")
+@requires_params(
+ "column_name",
+ "threshold",
+ types={"column_name": str, "threshold": int},
+)
+def create_expectation_distinct_column_values_less_than(
+ **kwargs,
+) -> ExpectationDistinctColumnValuesLessThan:
+ """
+ Create an ExpectationDistinctColumnValuesLessThan instance.
+
+ Args:
+ column_name (str): Name of the column to check.
+ threshold (int): Threshold for distinct values count (exclusive upper bound).
+
+ Returns:
+ ExpectationDistinctColumnValuesLessThan: A configured expectation instance.
+ """
+ return ExpectationDistinctColumnValuesLessThan(
+ column_name=kwargs["column_name"],
+ threshold=kwargs["threshold"],
+ )
+
+
+@register_expectation("ExpectationDistinctColumnValuesGreaterThan")
+@requires_params(
+ "column_name",
+ "threshold",
+ types={"column_name": str, "threshold": int},
+)
+def create_expectation_distinct_column_values_greater_than(
+ **kwargs,
+) -> ExpectationDistinctColumnValuesGreaterThan:
+ """
+ Create an ExpectationDistinctColumnValuesGreaterThan instance.
+
+ Args:
+ column_name (str): Name of the column to check.
+ threshold (int): Threshold for distinct values count (exclusive lower bound).
+
+ Returns:
+ ExpectationDistinctColumnValuesGreaterThan: A configured expectation instance.
+ """
+ return ExpectationDistinctColumnValuesGreaterThan(
+ column_name=kwargs["column_name"],
+ threshold=kwargs["threshold"],
+ )
+
+
+@register_expectation("ExpectationDistinctColumnValuesBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={"column_name": str, "min_value": int, "max_value": int},
+)
+def create_expectation_distinct_column_values_between(
+ **kwargs,
+) -> ExpectationDistinctColumnValuesBetween:
+ """
+ Create an ExpectationDistinctColumnValuesBetween instance.
+
+ Args:
+ column_name (str): Name of the column to check.
+ min_value (int): Minimum number of distinct values (inclusive lower bound).
+ max_value (int): Maximum number of distinct values (inclusive upper bound).
+
+ Returns:
+ ExpectationDistinctColumnValuesBetween: A configured expectation instance.
+ """
+ return ExpectationDistinctColumnValuesBetween(
+ column_name=kwargs["column_name"],
+ min_value=kwargs["min_value"],
+ max_value=kwargs["max_value"],
+ )
diff --git a/dataframe_expectations/expectations/column_expectation.py b/dataframe_expectations/expectations/column_expectation.py
new file mode 100644
index 0000000..af63b08
--- /dev/null
+++ b/dataframe_expectations/expectations/column_expectation.py
@@ -0,0 +1,113 @@
+from typing import Callable
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations import DataFrameExpectation
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+class DataFrameColumnExpectation(DataFrameExpectation):
+ """
+ Base class for DataFrame column expectations.
+ This class is designed to validate a specific column in a DataFrame against a condition defined by
+ `fn_violations_pandas` and `fn_violations_pyspark` functions."""
+
+ def __init__(
+ self,
+ expectation_name: str,
+ column_name: str,
+ fn_violations_pandas: Callable,
+ fn_violations_pyspark: Callable,
+ description: str,
+ error_message: str,
+ ):
+ """
+ Template for implementing DataFrame column expectations, where a column value is tested against a
+ condition. The conditions are defined by the `fn_violations_pandas` and `fn_violations_pyspark` functions.
+
+ :param expectation_name: The name of the expectation. This will be used during logging.
+ :param column_name: The name of the column to check.
+ :param fn_violations_pandas: Function to find violations in a pandas DataFrame.
+ :param fn_violations_pyspark: Function to find violations in a PySpark DataFrame.
+ :param description: A description of the expectation used in logging.
+ :param error_message: The error message to return if the expectation fails.
+ """
+ self.column_name = column_name
+ self.expectation_name = expectation_name
+ self.fn_violations_pandas = fn_violations_pandas
+ self.fn_violations_pyspark = fn_violations_pyspark
+ self.description = description
+ self.error_message = error_message
+
+ def get_expectation_name(self) -> str:
+ """
+ Returns the expectation name.
+ """
+ return self.expectation_name
+
+ def get_description(self) -> str:
+ """
+ Returns a description of the expectation.
+ """
+ return self.description
+
+ def row_validation(
+ self,
+ data_frame_type: DataFrameType,
+ data_frame: DataFrameLike,
+ fn_violations: Callable,
+ **kwargs,
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate the DataFrame against the expectation.
+
+ :param data_frame_type: The type of DataFrame (Pandas or PySpark).
+ :param data_frame: The DataFrame to validate.
+ :param fn_violations: The function to find violations.
+ :return: ExpectationResultMessage indicating success or failure.
+ """
+
+ if self.column_name not in data_frame.columns:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=data_frame_type,
+ message=f"Column '{self.column_name}' does not exist in the DataFrame.",
+ )
+
+ violations = fn_violations(data_frame)
+
+ # calculate number of violations based on DataFrame type
+ num_violations = self.num_data_frame_rows(violations)
+
+ if num_violations == 0:
+ return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name())
+
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=data_frame_type,
+ violations_data_frame=violations,
+ message=f"Found {num_violations} row(s) where {self.error_message}",
+ )
+
+ def validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ return self.row_validation(
+ data_frame_type=DataFrameType.PANDAS,
+ data_frame=data_frame,
+ fn_violations=self.fn_violations_pandas,
+ **kwargs,
+ )
+
+ def validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ return self.row_validation(
+ data_frame_type=DataFrameType.PYSPARK,
+ data_frame=data_frame,
+ fn_violations=self.fn_violations_pyspark,
+ **kwargs,
+ )
diff --git a/dataframe_expectations/expectations/column_expectations/__init__.py b/dataframe_expectations/expectations/column_expectations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dataframe_expectations/expectations/column_expectations/any_value_expectations.py b/dataframe_expectations/expectations/column_expectations/any_value_expectations.py
new file mode 100644
index 0000000..b3c47db
--- /dev/null
+++ b/dataframe_expectations/expectations/column_expectations/any_value_expectations.py
@@ -0,0 +1,97 @@
+from pyspark.sql import functions as F
+
+from dataframe_expectations.expectations.column_expectation import (
+ DataFrameColumnExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+
+
+@register_expectation("ExpectationValueEquals")
+@requires_params("column_name", "value", types={"column_name": str, "value": object})
+def create_expectation_value_equals(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ value = kwargs["value"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueEquals",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name] != value],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name) != value),
+ description=f"'{column_name}' equals {value}",
+ error_message=f"'{column_name}' is not equal to {value}.",
+ )
+
+
+@register_expectation("ExpectationValueNotEquals")
+@requires_params("column_name", "value", types={"column_name": str, "value": object})
+def create_expectation_value_not_equals(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ value = kwargs["value"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name] == value],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name) == value),
+ description=f"'{column_name}' is not equal to {value}",
+ error_message=f"'{column_name}' is equal to {value}.",
+ )
+
+
+@register_expectation("ExpectationValueNull")
+@requires_params("column_name", types={"column_name": str})
+def create_expectation_value_null(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueNull",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].notnull()],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isNotNull()),
+ description=f"'{column_name}' is null",
+ error_message=f"'{column_name}' is not null.",
+ )
+
+
+@register_expectation("ExpectationValueNotNull")
+@requires_params("column_name", types={"column_name": str})
+def create_expectation_value_not_null(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].isnull()],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isNull()),
+ description=f"'{column_name}' is not null",
+ error_message=f"'{column_name}' is null.",
+ )
+
+
+@register_expectation("ExpectationValueIn")
+@requires_params("column_name", "values", types={"column_name": str, "values": list})
+def create_expectation_value_in(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ values = kwargs["values"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueIn",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[~df[column_name].isin(values)],
+ fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).isin(values)),
+ description=f"'{column_name}' is in {values}",
+ error_message=f"'{column_name}' is not in {values}.",
+ )
+
+
+@register_expectation("ExpectationValueNotIn")
+@requires_params("column_name", "values", types={"column_name": str, "values": list})
+def create_expectation_value_not_in(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ values = kwargs["values"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].isin(values)],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isin(values)),
+ description=f"'{column_name}' is not in {values}",
+ error_message=f"'{column_name}' is in {values}.",
+ )
diff --git a/dataframe_expectations/expectations/column_expectations/numerical_expectations.py b/dataframe_expectations/expectations/column_expectations/numerical_expectations.py
new file mode 100644
index 0000000..e635964
--- /dev/null
+++ b/dataframe_expectations/expectations/column_expectations/numerical_expectations.py
@@ -0,0 +1,68 @@
+from pyspark.sql import functions as F
+
+from dataframe_expectations.expectations.column_expectation import (
+ DataFrameColumnExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+
+
+@register_expectation("ExpectationValueGreaterThan")
+@requires_params("column_name", "value", types={"column_name": str, "value": (int, float)})
+def create_expectation_value_greater_than(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ value = kwargs["value"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name] <= value],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name) <= value),
+ description=f"'{column_name}' is greater than {value}",
+ error_message=f"'{column_name}' is not greater than {value}.",
+ )
+
+
+@register_expectation("ExpectationValueLessThan")
+@requires_params("column_name", "value", types={"column_name": str, "value": (int, float)})
+def create_expectation_value_less_than(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ value = kwargs["value"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name] >= value],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name) >= value),
+ description=f"'{column_name}' is less than {value}",
+ error_message=f"'{column_name}' is not less than {value}.",
+ )
+
+
+@register_expectation("ExpectationValueBetween")
+@requires_params(
+ "column_name",
+ "min_value",
+ "max_value",
+ types={
+ "column_name": str,
+ "min_value": (int, float),
+ "max_value": (int, float),
+ },
+)
+def create_expectation_value_between(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ min_value = kwargs["min_value"]
+ max_value = kwargs["max_value"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationValueBetween",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[
+ (df[column_name] < min_value) | (df[column_name] > max_value)
+ ],
+ fn_violations_pyspark=lambda df: df.filter(
+ (F.col(column_name) < min_value) | (F.col(column_name) > max_value)
+ ),
+ description=f"'{column_name}' is between {min_value} and {max_value}",
+ error_message=f"'{column_name}' is not between {min_value} and {max_value}.",
+ )
diff --git a/dataframe_expectations/expectations/column_expectations/string_expectations.py b/dataframe_expectations/expectations/column_expectations/string_expectations.py
new file mode 100644
index 0000000..204729b
--- /dev/null
+++ b/dataframe_expectations/expectations/column_expectations/string_expectations.py
@@ -0,0 +1,142 @@
+from pyspark.sql import functions as F
+
+from dataframe_expectations.expectations.column_expectation import (
+ DataFrameColumnExpectation,
+)
+from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+)
+from dataframe_expectations.expectations.utils import requires_params
+
+
+@register_expectation("ExpectationStringContains")
+@requires_params("column_name", "substring", types={"column_name": str, "substring": str})
+def create_expectation_string_contains(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ substring = kwargs["substring"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringContains",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[~df[column_name].str.contains(substring, na=False)],
+ fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).contains(substring)),
+ description=f"'{column_name}' contains '{substring}'",
+ error_message=f"'{column_name}' does not contain '{substring}'.",
+ )
+
+
+@register_expectation("ExpectationStringNotContains")
+@requires_params("column_name", "substring", types={"column_name": str, "substring": str})
+def create_expectation_string_not_contains(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ substring = kwargs["substring"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].str.contains(substring, na=False)],
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name).contains(substring)),
+ description=f"'{column_name}' does not contain '{substring}'",
+ error_message=f"'{column_name}' contains '{substring}'.",
+ )
+
+
+@register_expectation("ExpectationStringStartsWith")
+@requires_params("column_name", "prefix", types={"column_name": str, "prefix": str})
+def create_expectation_string_starts_with(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ prefix = kwargs["prefix"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[~df[column_name].str.startswith(prefix, na=False)],
+ fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).startswith(prefix)),
+ description=f"'{column_name}' starts with '{prefix}'",
+ error_message=f"'{column_name}' does not start with '{prefix}'.",
+ )
+
+
+@register_expectation("ExpectationStringEndsWith")
+@requires_params("column_name", "suffix", types={"column_name": str, "suffix": str})
+def create_expectation_string_ends_with(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ suffix = kwargs["suffix"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[~df[column_name].str.endswith(suffix, na=False)],
+ fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).endswith(suffix)),
+ description=f"'{column_name}' ends with '{suffix}'",
+ error_message=f"'{column_name}' does not end with '{suffix}'.",
+ )
+
+
+@register_expectation("ExpectationStringLengthLessThan")
+@requires_params("column_name", "length", types={"column_name": str, "length": int})
+def create_expectation_string_length_less_than(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ length = kwargs["length"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].str.len() >= length],
+ fn_violations_pyspark=lambda df: df.filter(F.length(column_name) >= length),
+ description=f"'{column_name}' length is less than {length}",
+ error_message=f"'{column_name}' length is not less than {length}.",
+ )
+
+
+@register_expectation("ExpectationStringLengthGreaterThan")
+@requires_params("column_name", "length", types={"column_name": str, "length": int})
+def create_expectation_string_length_greater_than(
+ **kwargs,
+) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ length = kwargs["length"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].str.len() <= length],
+ fn_violations_pyspark=lambda df: df.filter(F.length(F.col(column_name)) <= length),
+ description=f"'{column_name}' length is greater than {length}",
+ error_message=f"'{column_name}' length is not greater than {length}.",
+ )
+
+
+@register_expectation("ExpectationStringLengthBetween")
+@requires_params(
+ "column_name",
+ "min_length",
+ "max_length",
+ types={"column_name": str, "min_length": int, "max_length": int},
+)
+def create_expectation_string_length_between(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ min_length = kwargs["min_length"]
+ max_length = kwargs["max_length"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[
+ (df[column_name].str.len() < min_length) | (df[column_name].str.len() > max_length)
+ ],
+ fn_violations_pyspark=lambda df: df.filter(
+ (F.length(F.col(column_name)) < min_length)
+ | (F.length(F.col(column_name)) > max_length)
+ ),
+ description=f"'{column_name}' length is between {min_length} and {max_length}",
+ error_message=f"'{column_name}' length is not between {min_length} and {max_length}.",
+ )
+
+
+@register_expectation("ExpectationStringLengthEquals")
+@requires_params("column_name", "length", types={"column_name": str, "length": int})
+def create_expectation_string_length_equals(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ length = kwargs["length"]
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name].str.len() != length],
+ fn_violations_pyspark=lambda df: df.filter(F.length(F.col(column_name)) != length),
+ description=f"'{column_name}' length equals {length}",
+ error_message=f"'{column_name}' length is not equal to {length}.",
+ )
diff --git a/dataframe_expectations/expectations/expectation_registry.py b/dataframe_expectations/expectations/expectation_registry.py
new file mode 100644
index 0000000..a83ceba
--- /dev/null
+++ b/dataframe_expectations/expectations/expectation_registry.py
@@ -0,0 +1,110 @@
+from typing import Callable, Dict
+
+from dataframe_expectations.expectations import DataFrameExpectation
+from dataframe_expectations.logging_utils import setup_logger
+
+logger = setup_logger(__name__)
+
+
+class DataFrameExpectationRegistry:
+ """Registry for dataframe expectations."""
+
+ _expectations: Dict[str, Callable[..., DataFrameExpectation]] = {}
+ _loaded: bool = False
+
+ @classmethod
+ def register(cls, name: str):
+ """Decorator to register an expectation factory function."""
+
+ def decorator(func: Callable[..., DataFrameExpectation]):
+ logger.debug(f"Registering expectation '{name}' with function {func.__name__}")
+
+ # check if the name is already registered
+ if name in cls._expectations:
+ error_message = f"Expectation '{name}' is already registered."
+ logger.error(error_message)
+ raise ValueError(error_message)
+
+ cls._expectations[name] = func
+ return func
+
+ return decorator
+
+ @classmethod
+ def _ensure_loaded(cls):
+ """Ensure all expectation modules are loaded (lazy loading)."""
+ if not cls._loaded:
+ cls._load_all_expectations()
+ cls._loaded = True
+
+ @classmethod
+ def _load_all_expectations(cls):
+ """Load all expectation modules to ensure their decorators are executed."""
+ import importlib
+
+ # Automatically discover all Python modules in expectations subdirectories
+ # Explicitly import expectation modules
+ modules_to_import = [
+ "dataframe_expectations.expectations.column_expectations.null_expectation",
+ "dataframe_expectations.expectations.column_expectations.type_expectation",
+ "dataframe_expectations.expectations.column_expectations.any_value_expectations",
+ "dataframe_expectations.expectations.column_expectations.numerical_expectations",
+ "dataframe_expectations.expectations.column_expectations.string_expectations",
+ "dataframe_expectations.expectations.aggregation_expectations.count_expectation",
+ "dataframe_expectations.expectations.aggregation_expectations.sum_expectation",
+ "dataframe_expectations.expectations.aggregation_expectations.any_value_expectations",
+ "dataframe_expectations.expectations.aggregation_expectations.numerical_expectations",
+ "dataframe_expectations.expectations.aggregation_expectations.unique",
+ # Add more modules as needed
+ ]
+
+ for module_name in modules_to_import:
+ try:
+ importlib.import_module(module_name)
+ logger.debug(f"Loaded expectation module: {module_name}")
+ except ImportError as e:
+ logger.warning(f"Failed to import expectation module {module_name}: {e}")
+
+ @classmethod
+ def get_expectation(cls, expectation_name: str, **kwargs) -> DataFrameExpectation:
+ """Get an expectation instance by name."""
+ cls._ensure_loaded() # Lazy load expectations
+ logger.debug(f"Retrieving expectation '{expectation_name}' with arguments: {kwargs}")
+ if expectation_name not in cls._expectations:
+ available = cls.list_expectations()
+ error_message = (
+ f"Unknown expectation '{expectation_name}'. "
+ f"Available expectations: {', '.join(available)}"
+ )
+ logger.error(error_message)
+ raise ValueError(error_message)
+ return cls._expectations[expectation_name](**kwargs)
+
+ @classmethod
+ def list_expectations(cls) -> list:
+ """List all registered expectation names."""
+ cls._ensure_loaded() # Lazy load expectations
+ return list(cls._expectations.keys())
+
+ @classmethod
+ def remove_expectation(cls, expectation_name: str):
+ """Remove an expectation from the registry."""
+ cls._ensure_loaded() # Lazy load expectations
+ logger.debug(f"Removing expectation '{expectation_name}'")
+ if expectation_name in cls._expectations:
+ del cls._expectations[expectation_name]
+ else:
+ error_message = f"Expectation '{expectation_name}' not found."
+ logger.error(error_message)
+ raise ValueError(error_message)
+
+ @classmethod
+ def clear_expectations(cls):
+ """Clear all registered expectations."""
+ logger.debug(f"Clearing {len(cls._expectations)} expectations from the registry")
+ cls._expectations.clear()
+ cls._loaded = False # Allow reloading
+
+
+# Convenience decorator
+register_expectation = DataFrameExpectationRegistry.register
diff --git a/dataframe_expectations/expectations/utils.py b/dataframe_expectations/expectations/utils.py
new file mode 100644
index 0000000..5c54ebb
--- /dev/null
+++ b/dataframe_expectations/expectations/utils.py
@@ -0,0 +1,81 @@
+from functools import wraps
+from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, get_args
+
+from dataframe_expectations.expectations import DataFrameExpectation
+
+
+def requires_params(
+ *required_params, types: Optional[Dict[str, Union[Type, Tuple[Type, ...]]]] = None
+):
+ """
+ Decorator that validates required parameters and optionally checks their types.
+
+ :param required_params: Required parameter names
+ :param types: Optional dict mapping parameter names to expected types
+
+ Usage:
+ @requires_params("column_name", "value")
+ def func(**kwargs): ...
+
+ @requires_params("column_name", "value", types={"column_name": str, "value": int})
+ def func(**kwargs): ...
+ """
+
+ def decorator(func: Callable[..., DataFrameExpectation]):
+ @wraps(func)
+ def wrapper(**kwargs):
+ func_name = func.__name__
+
+ # Check for missing parameters
+ missing_params = [param for param in required_params if param not in kwargs]
+ if missing_params:
+ param_list = ", ".join(required_params)
+ raise ValueError(
+ f"{func_name} missing required parameters: {', '.join(missing_params)}. "
+ f"Required: [{param_list}]"
+ )
+
+ # Type checking if types dict is provided
+ if types:
+ type_errors = []
+ for param_name, expected_type in types.items():
+ if param_name in kwargs:
+ actual_value = kwargs[param_name]
+ if not _is_instance_of_type(actual_value, expected_type):
+ type_errors.append(
+ f"'{param_name}' expected {_get_type_name(expected_type)}, "
+ f"got {type(actual_value).__name__}"
+ )
+
+ if type_errors:
+ raise TypeError(f"{func_name} type validation errors: {'; '.join(type_errors)}")
+
+ return func(**kwargs)
+
+ return wrapper
+
+ return decorator
+
+
+def _is_instance_of_type(value: Any, expected_type: Type) -> bool:
+ """Helper function to check if value is instance of expected_type, handling Union types."""
+ # Handle Union types (like Optional[str] which is Union[str, None])
+ if hasattr(expected_type, "__origin__") and expected_type.__origin__ is Union:
+ # For Union types, check if value matches any of the union members
+ union_args = get_args(expected_type)
+ return any(isinstance(value, arg) for arg in union_args if arg is not type(None)) or (
+ type(None) in union_args and value is None
+ )
+
+ # Handle regular types
+ return isinstance(value, expected_type)
+
+
+def _get_type_name(type_hint: Type) -> str:
+ """Helper function to get a readable name for type hints."""
+ if hasattr(type_hint, "__origin__") and type_hint.__origin__ is Union:
+ union_args = get_args(type_hint)
+ arg_names = [arg.__name__ if hasattr(arg, "__name__") else str(arg) for arg in union_args]
+ return f"Union[{', '.join(arg_names)}]"
+
+ return getattr(type_hint, "__name__", str(type_hint))
diff --git a/dataframe_expectations/expectations_suite.py b/dataframe_expectations/expectations_suite.py
new file mode 100644
index 0000000..e468378
--- /dev/null
+++ b/dataframe_expectations/expectations_suite.py
@@ -0,0 +1,961 @@
+from typing import List, Union, cast
+
+from dataframe_expectations.expectations import DataFrameLike
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.logging_utils import setup_logger
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+logger = setup_logger(__name__)
+
+
+class DataFrameExpectationsSuiteFailure(Exception):
+ """Raised when one or more expectations in the suite fail."""
+
+ def __init__(
+ self,
+ total_expectations: int,
+ failures: List[DataFrameExpectationFailureMessage],
+ *args,
+ ):
+ self.failures = failures
+ self.total_expectations = total_expectations
+ super().__init__(*args)
+
+ def __str__(self):
+ margin_len = 80
+ lines = [
+ f"({len(self.failures)}/{self.total_expectations}) expectations failed.",
+ "\n" + "=" * margin_len,
+ "List of violations:",
+ "-" * margin_len,
+ ]
+
+ for index, failure in enumerate(self.failures):
+ lines.append(f"[Failed {index + 1}/{len(self.failures)}] {failure}")
+ if index < len(self.failures) - 1:
+ lines.append("-" * margin_len)
+
+ lines.append("=" * margin_len)
+ return "\n".join(lines)
+
+
+class DataFrameExpectationsSuite:
+ """
+ A suite of expectations for validating DataFrames.
+ """
+
+ def __init__(self):
+ """
+ Initialize the expectation suite.
+ """
+ self.__expectations = []
+
+ # Expectations for any data type
+
+ def expect_value_equals(
+ self,
+ column_name: str,
+ value: object,
+ ):
+ """
+ Add an expectation to check if the values in a column equal a specified value.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :param value: The value to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name=column_name,
+ value=value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_not_equals(
+ self,
+ column_name: str,
+ value: object,
+ ):
+ """
+ Add an expectation to check if the values in a column do not equal a specified value.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :param value: The value to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name=column_name,
+ value=value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_null(
+ self,
+ column_name: str,
+ ):
+ """
+ Add an expectation to check if the values in a column are null.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name=column_name,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_not_null(
+ self,
+ column_name: str,
+ ):
+ """
+ Add an expectation to check if the values in a column are not null.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name=column_name,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_in(
+ self,
+ column_name: str,
+ values: List[object],
+ ):
+ """
+ Add an expectation to check if the values in a column are in a specified list of values.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :param values: The list of values to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name=column_name,
+ values=values,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_not_in(
+ self,
+ column_name: str,
+ values: List[object],
+ ):
+ """
+ Add an expectation to check if the values in a column are not in a specified list of values.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Any Value
+
+ :param column_name: The name of the column to check.
+ :param values: The list of values to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name=column_name,
+ values=values,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ # Expectations for numerical data types
+
+ def expect_value_greater_than(
+ self,
+ column_name: str,
+ value: float,
+ ):
+ """
+ Add an expectation to check if the values in a column are greater than a specified value.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param value: The value to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name=column_name,
+ value=value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_less_than(
+ self,
+ column_name: str,
+ value: float,
+ ):
+ """
+ Add an expectation to check if the values in a column are less than a specified value.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param value: The value to compare against.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name=column_name,
+ value=value,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_value_between(
+ self,
+ column_name: str,
+ min_value: float,
+ max_value: float,
+ ):
+ """
+ Add an expectation to check if the values in a column are between two specified values.
+
+ Categories:
+ category: Column Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum value for the range.
+ :param max_value: The maximum value for the range.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ # Expectations for string data types
+
+ def expect_string_contains(
+ self,
+ column_name: str,
+ substring: str,
+ ):
+ """
+ Add an expectation to check if the values in a string column contain a specified substring.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param substring: The substring to search for.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name=column_name,
+ substring=substring,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_not_contains(
+ self,
+ column_name: str,
+ substring: str,
+ ):
+ """
+ Add an expectation to check if the values in a string column do not contain a specified substring.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param substring: The substring to search for.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name=column_name,
+ substring=substring,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_starts_with(
+ self,
+ column_name: str,
+ prefix: str,
+ ):
+ """
+ Add an expectation to check if the values in a string column start with a specified prefix.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param prefix: The prefix to search for.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name=column_name,
+ prefix=prefix,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_ends_with(
+ self,
+ column_name: str,
+ suffix: str,
+ ):
+ """
+ Add an expectation to check if the values in a string column end with a specified suffix.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param suffix: The suffix to search for.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name=column_name,
+ suffix=suffix,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_length_less_than(
+ self,
+ column_name: str,
+ length: int,
+ ):
+ """
+ Add an expectation to check if the length of the values in a string column is less than a specified length.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param length: The length that the values should be less than.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name=column_name,
+ length=length,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_length_greater_than(
+ self,
+ column_name: str,
+ length: int,
+ ):
+ """
+ Add an expectation to check if the length of the values in a string column is greater than a specified length.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param length: The length that the values should be greater than.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name=column_name,
+ length=length,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_length_between(
+ self,
+ column_name: str,
+ min_length: int,
+ max_length: int,
+ ):
+ """
+ Add an expectation to check if the length of the values in a string column is between two specified lengths.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param min_length: The minimum length that the values should be.
+ :param max_length: The maximum length that the values should be.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name=column_name,
+ min_length=min_length,
+ max_length=max_length,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_string_length_equals(
+ self,
+ column_name: str,
+ length: int,
+ ):
+ """
+ Add an expectation to check if the length of the values in a string column equals a specified length.
+
+ Categories:
+ category: Column Expectations
+ subcategory: String
+
+ :param column_name: The name of the column to check.
+ :param length: The length that the values should equal.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name=column_name,
+ length=length,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ # Expectations for aggregation data types
+ def expect_min_rows(
+ self,
+ min_rows: int,
+ ):
+ """
+ Add an expectation to check if the DataFrame has at least a minimum number of rows.
+ Categories:
+ category: DataFrame Aggregation Expectations
+ subcategory: Any Value
+ :param min_rows: The minimum number of rows expected.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=min_rows,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_max_rows(
+ self,
+ max_rows: int,
+ ):
+ """
+ Add an expectation to check if the DataFrame has at most a maximum number of rows.
+ Categories:
+ category: DataFrame Aggregation Expectations
+ subcategory: Any Value
+ :param max_rows: The maximum number of rows expected.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=max_rows,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_max_null_percentage(
+ self,
+ column_name: str,
+ max_percentage: float,
+ ):
+ """
+ Add an expectation to check if the percentage of null/NaN values in a specific column is below a threshold.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check for null percentage.
+ :param max_percentage: The maximum allowed percentage of null/NaN values (0.0 to 100.0).
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name=column_name,
+ max_percentage=max_percentage,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_max_null_count(
+ self,
+ column_name: str,
+ max_count: int,
+ ):
+ """
+ Add an expectation to check if the count of null/NaN values in a specific column is below a threshold.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check for null count.
+ :param max_count: The maximum allowed count of null/NaN values.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name=column_name,
+ max_count=max_count,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_unique_rows(
+ self,
+ column_names: List[str],
+ ):
+ """
+ Add an expectation to check if the rows in the DataFrame are unique based on specified columns.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+
+ :param column_names: The list of column names to check for uniqueness.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=column_names,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_distinct_column_values_equals(
+ self,
+ column_name: str,
+ expected_value: int,
+ ):
+ """
+ Add an expectation to check if the number of distinct values in a column equals an expected count.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check.
+ :param expected_value: The expected number of distinct values (exact match).
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name=column_name,
+ expected_value=expected_value,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_distinct_column_values_less_than(
+ self,
+ column_name: str,
+ threshold: int,
+ ):
+ """
+ Add an expectation to check if the number of distinct values in a column is less than a threshold.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check.
+ :param threshold: The threshold for distinct values count (exclusive upper bound).
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name=column_name,
+ threshold=threshold,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_distinct_column_values_greater_than(
+ self,
+ column_name: str,
+ threshold: int,
+ ):
+ """
+ Add an expectation to check if the number of distinct values in a column is greater than a threshold.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check.
+ :param threshold: The threshold for distinct values count (exclusive lower bound).
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name=column_name,
+ threshold=threshold,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_distinct_column_values_between(
+ self,
+ column_name: str,
+ min_value: int,
+ max_value: int,
+ ):
+ """
+ Add an expectation to check if the number of distinct values in a column falls within a range.
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Any Value
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum number of distinct values (inclusive lower bound).
+ :param max_value: The maximum number of distinct values (inclusive upper bound).
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_column_quantile_between(
+ self,
+ column_name: str,
+ quantile: float,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Add an expectation to check if a quantile of a column falls within a specified range.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param quantile: The quantile to compute (0.0 to 1.0, where 0.0=min, 0.5=median, 1.0=max).
+ :param min_value: The minimum allowed value for the quantile.
+ :param max_value: The maximum allowed value for the quantile.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name=column_name,
+ quantile=quantile,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_column_max_between(
+ self,
+ column_name: str,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Add an expectation to check if the maximum value of a column falls within a specified range.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum allowed value for the column maximum.
+ :param max_value: The maximum allowed value for the column maximum.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_column_min_between(
+ self,
+ column_name: str,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Add an expectation to check if the minimum value of a column falls within a specified range.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum allowed value for the column minimum.
+ :param max_value: The maximum allowed value for the column minimum.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_column_mean_between(
+ self,
+ column_name: str,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Add an expectation to check if the mean value of a column falls within a specified range.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum allowed value for the column mean.
+ :param max_value: The maximum allowed value for the column mean.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def expect_column_median_between(
+ self,
+ column_name: str,
+ min_value: Union[int, float],
+ max_value: Union[int, float],
+ ):
+ """
+ Add an expectation to check if the median value of a column falls within a specified range.
+
+ Categories:
+ category: Column Aggregation Expectations
+ subcategory: Numerical
+
+ :param column_name: The name of the column to check.
+ :param min_value: The minimum allowed value for the column median.
+ :param max_value: The maximum allowed value for the column median.
+ :return: an instance of DataFrameExpectationsSuite.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name=column_name,
+ min_value=min_value,
+ max_value=max_value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+ def run(
+ self,
+ data_frame: DataFrameLike,
+ ) -> None:
+ """
+ Run all expectations on the provided DataFrame with PySpark caching optimization.
+
+ :param data_frame: The DataFrame to validate.
+ """
+ from dataframe_expectations import DataFrameType
+ from dataframe_expectations.expectations import DataFrameExpectation
+
+ successes = []
+ failures = []
+ margin_len = 80
+
+ header_message = "Running expectations suite"
+ header_prefix = "=" * ((margin_len - len(header_message) - 2) // 2)
+ header_suffix = "=" * (
+ (margin_len - len(header_message) - 2) // 2 - len(header_message) % 2
+ )
+ logger.info(f"{header_prefix} {header_message} {header_suffix}")
+
+ # PySpark caching optimization
+ data_frame_type = DataFrameExpectation.infer_data_frame_type(data_frame)
+ was_already_cached = False
+
+ if data_frame_type == DataFrameType.PYSPARK:
+ # Import PySpark DataFrame for type casting
+ from pyspark.sql import DataFrame as PySparkDataFrame
+
+ # Cast to PySpark DataFrame since we know it's PySpark at this point
+ pyspark_df = cast(PySparkDataFrame, data_frame)
+
+ # Check if DataFrame is already cached
+ was_already_cached = pyspark_df.is_cached
+
+ # Cache the DataFrame if it wasn't already cached
+ if not was_already_cached:
+ logger.debug("Caching PySpark DataFrame for expectations suite execution")
+ pyspark_df.cache()
+ # Update the original reference for subsequent operations
+ data_frame = pyspark_df
+
+ try:
+ # Run all expectations
+ for expectation in self.__expectations:
+ result = expectation.validate(data_frame=data_frame)
+ if isinstance(result, DataFrameExpectationSuccessMessage):
+ logger.info(
+ f"{expectation.get_expectation_name()} ({expectation.get_description()}) ... OK"
+ )
+ successes.append(result)
+ elif isinstance(result, DataFrameExpectationFailureMessage):
+ logger.info(
+ f"{expectation.get_expectation_name()} ({expectation.get_description()}) ... FAIL"
+ )
+ failures.append(result)
+ else:
+ raise ValueError(
+ f"Unexpected result type: {type(result)} for expectation: {expectation.get_expectation_name()}"
+ )
+ finally:
+ # Uncache the DataFrame if we cached it (and it wasn't already cached)
+ if data_frame_type == DataFrameType.PYSPARK and not was_already_cached:
+ from pyspark.sql import DataFrame as PySparkDataFrame
+
+ logger.debug("Uncaching PySpark DataFrame after expectations suite execution")
+ cast(PySparkDataFrame, data_frame).unpersist()
+
+ footer_message = f"{len(successes)} success, {len(failures)} failures"
+ footer_prefix = "=" * ((margin_len - len(footer_message) - 2) // 2)
+ footer_suffix = "=" * (
+ (margin_len - len(footer_message) - 2) // 2 + len(footer_message) % 2
+ )
+ logger.info(f"{footer_prefix} {footer_message} {footer_suffix}")
+
+ if len(failures) > 0:
+ raise DataFrameExpectationsSuiteFailure(
+ total_expectations=len(self.__expectations), failures=failures
+ )
+
+
+if __name__ == "__main__":
+ # Example usage
+ suite = DataFrameExpectationsSuite()
+ suite.expect_value_greater_than(column_name="age", value=18)
+ suite.expect_value_less_than(column_name="salary", value=100000)
+ suite.expect_unique_rows(column_names=["id"])
+ suite.expect_column_mean_between(column_name="age", min_value=20, max_value=40)
+ suite.expect_column_max_between(column_name="salary", min_value=80000, max_value=150000)
+
+ import pandas as pd
+
+ # Create a sample DataFrame
+ df = pd.DataFrame(
+ {
+ "id": [1, 2, 3, 4],
+ "age": [20, 25, 30, 35],
+ "salary": [50000, 120000, 80000, 90000],
+ }
+ )
+
+ suite.run(data_frame=df)
diff --git a/dataframe_expectations/logging_utils.py b/dataframe_expectations/logging_utils.py
new file mode 100644
index 0000000..f2f74e6
--- /dev/null
+++ b/dataframe_expectations/logging_utils.py
@@ -0,0 +1,30 @@
+import logging
+
+
+def setup_logger(name=None):
+ """Sets up the logger for the entire run."""
+ # Suppress verbose logs from py4j
+ logging.getLogger("py4j").setLevel(logging.ERROR)
+ logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
+
+ # Create or get a logger
+ logger = logging.getLogger(name)
+ logger.setLevel(logging.INFO) # Set the default log level
+ logger.propagate = False # Disable logger propagation to prevent duplicate logs
+ DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
+ MSG_FORMAT = "%(asctime)s %(levelname)-8s [%(filename)s:%(funcName)s():%(lineno)d] %(message)s"
+
+ # Check if the logger already has handlers to avoid duplicate logs
+ if not logger.hasHandlers():
+ # Create a console handler
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(logging.INFO)
+
+ # Create a formatter and set it for the handler
+ formatter = logging.Formatter(MSG_FORMAT, DATE_FORMAT)
+ console_handler.setFormatter(formatter)
+
+ # Add the handler to the logger
+ logger.addHandler(console_handler)
+
+ return logger
diff --git a/dataframe_expectations/result_message.py b/dataframe_expectations/result_message.py
new file mode 100644
index 0000000..34e5f56
--- /dev/null
+++ b/dataframe_expectations/result_message.py
@@ -0,0 +1,76 @@
+from abc import ABC
+from typing import Optional
+
+from tabulate import tabulate # type: ignore
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+
+
+class DataFrameExpectationResultMessage(ABC):
+ """
+ Base class for expectation result message.
+ """
+
+ message: str = ""
+
+ def __str__(self):
+ """
+ Print the result of the expectation.
+ """
+ return self.message
+
+ def dataframe_to_str(self, data_frame_type: DataFrameType, data_frame, rows: int) -> str:
+ """
+ Print the DataFrame based on its type.
+ """
+
+ if data_frame_type == DataFrameType.PANDAS:
+ data_frame = data_frame.head(rows)
+ elif data_frame_type == DataFrameType.PYSPARK:
+ data_frame = data_frame.limit(rows).toPandas()
+ else:
+ raise ValueError(f"Unsupported DataFrame type: {data_frame_type}")
+
+ return tabulate(data_frame, headers="keys", tablefmt="pretty", showindex=False)
+
+
+class DataFrameExpectationSuccessMessage(DataFrameExpectationResultMessage):
+ def __init__(self, expectation_name: str, message: Optional[str] = None):
+ """
+ Initialize the expectation success message.
+ """
+ self.message = f"{expectation_name} succeeded."
+ if message is not None:
+ self.message = f"{self.message}: {message}"
+
+
+class DataFrameExpectationFailureMessage(DataFrameExpectationResultMessage):
+ def __init__(
+ self,
+ expectation_str: str,
+ data_frame_type: DataFrameType,
+ violations_data_frame: Optional[DataFrameLike] = None,
+ message: Optional[str] = None,
+ limit_violations: int = 5,
+ ):
+ self.message = expectation_str
+ if message is not None:
+ self.message = f"{self.message}: {message}"
+ if violations_data_frame is not None:
+ self.data_frame_type = data_frame_type
+
+ self.violations_data_frame = violations_data_frame
+ violations_dataframe_str = self.dataframe_to_str(
+ data_frame_type=data_frame_type,
+ data_frame=violations_data_frame,
+ rows=limit_violations,
+ )
+ self.message = (
+ f"{self.message} \nSome examples of violations: \n{violations_dataframe_str}"
+ )
+
+ def get_violations_data_frame(self) -> Optional[DataFrameLike]:
+ """
+ Get the DataFrame with violations.
+ """
+ return self.violations_data_frame if hasattr(self, "violations_data_frame") else None
diff --git a/dataframe_expectations/sanity_checks.py b/dataframe_expectations/sanity_checks.py
new file mode 100644
index 0000000..ec26529
--- /dev/null
+++ b/dataframe_expectations/sanity_checks.py
@@ -0,0 +1,392 @@
+"""
+DataFrame Expectations Framework Sanity Check Script
+
+This script validates consistency across the entire expectations framework by checking:
+1. All expectations implemented in the expectations/ directory are registered in the registry
+2. All registered expectations have corresponding expect_* methods in DataFrameExpectationsSuite
+3. All registered expectations have corresponding unit tests in tests/dataframe_expectations/expectations_implemented/
+
+Usage:
+ python sanity_check_expectations.py
+"""
+
+import ast
+import re
+import sys
+from pathlib import Path
+from typing import Dict, List, Optional, Set
+
+
+class ExpectationsSanityChecker:
+ """Validates consistency across the expectations framework."""
+
+ def __init__(self, project_root: Path):
+ self.project_root = project_root
+ self.expectations_dir = project_root / "dataframe_expectations" / "expectations"
+ self.suite_file = project_root / "dataframe_expectations" / "expectations_suite.py"
+ self.tests_dir = project_root / "tests" / "expectations_implemented"
+
+ # Results storage
+ self.registered_expectations: Dict[str, str] = {} # expectation_name -> file_path
+ self.suite_methods: Set[str] = set() # expect_* method names
+ self.test_files: Dict[str, str] = {} # expectation_name -> test_file_path
+
+ # Issues tracking
+ self.issues: List[str] = []
+
+ def run_full_check(self) -> bool:
+ """Run all consistency checks and return True if all pass."""
+ print("🔍 Starting DataFrame Expectations Framework Sanity Check...")
+ print("=" * 70)
+
+ # Step 1: Discover registered expectations
+ print("\n📋 Step 1: Discovering registered expectations...")
+ self._discover_registered_expectations()
+ print(f" Found {len(self.registered_expectations)} registered expectations")
+
+ # Step 2: Discover suite methods
+ print("\n🎯 Step 2: Discovering suite methods...")
+ self._discover_suite_methods()
+ print(f" Found {len(self.suite_methods)} expect_* methods in suite")
+
+ # Step 3: Discover test files
+ print("\n🧪 Step 3: Discovering test files...")
+ self._discover_test_files()
+ print(f" Found {len(self.test_files)} test files")
+
+ # Step 4: Validate consistency
+ print("\n✅ Step 4: Validating consistency...")
+ self._validate_registry_to_suite_mapping()
+ self._validate_registry_to_tests_mapping()
+ self._validate_orphaned_suite_methods()
+ self._validate_orphaned_test_files()
+
+ # Report results
+ self._print_results()
+
+ return len(self.issues) == 0
+
+ def _discover_registered_expectations(self):
+ """Find all @register_expectation decorators in expectation files."""
+ expectation_files = list(self.expectations_dir.rglob("*.py"))
+
+ for file_path in expectation_files:
+ if file_path.name == "__init__.py":
+ continue
+
+ try:
+ with open(file_path, "r") as f:
+ content = f.read()
+
+ # Parse AST to find @register_expectation decorators
+ tree = ast.parse(content)
+
+ for node in ast.walk(tree):
+ if isinstance(node, ast.FunctionDef):
+ for decorator in node.decorator_list:
+ if self._is_register_expectation_decorator(decorator):
+ expectation_name = self._extract_expectation_name(decorator)
+ if expectation_name:
+ self.registered_expectations[expectation_name] = str(file_path)
+
+ except Exception as e:
+ print(f" ⚠️ Warning: Could not parse {file_path}: {e}")
+
+ def _is_register_expectation_decorator(self, decorator) -> bool:
+ """Check if a decorator is @register_expectation."""
+ if isinstance(decorator, ast.Call):
+ if isinstance(decorator.func, ast.Name) and decorator.func.id == "register_expectation":
+ return True
+ return False
+
+ def _extract_expectation_name(self, decorator) -> Optional[str]:
+ """Extract expectation name from @register_expectation("Name") decorator."""
+ if isinstance(decorator, ast.Call) and decorator.args:
+ first_arg = decorator.args[0]
+ if isinstance(first_arg, ast.Constant):
+ return str(first_arg.value)
+ return None
+
+ def _discover_suite_methods(self):
+ """Find all expect_* methods in DataFrameExpectationsSuite."""
+ if not self.suite_file.exists():
+ self.issues.append(f"❌ Suite file not found: {self.suite_file}")
+ return
+
+ try:
+ with open(self.suite_file, "r") as f:
+ content = f.read()
+
+ # Use regex to find expect_* method definitions
+ method_pattern = r"def\s+(expect_[a-z_]+)\s*\("
+ matches = re.findall(method_pattern, content)
+ self.suite_methods = set(matches)
+
+ except Exception as e:
+ self.issues.append(f"❌ Could not parse suite file {self.suite_file}: {e}")
+
+ def _discover_test_files(self):
+ """Find all test files and map them to expectation names."""
+ if not self.tests_dir.exists():
+ self.issues.append(f"❌ Tests directory not found: {self.tests_dir}")
+ return
+
+ test_files = list(self.tests_dir.rglob("test_*.py"))
+
+ for test_file in test_files:
+ # Skip template files
+ if "template" in test_file.name.lower():
+ continue
+
+ # Extract potential expectation name from filename
+ # e.g., test_expect_value_equals.py -> ExpectationValueEquals
+ filename = test_file.stem
+ if filename.startswith("test_expect_"):
+ # Convert test_expect_value_equals -> ValueEquals
+ expectation_part = filename[12:] # Remove "test_expect_"
+ expectation_name = "Expectation" + self._snake_to_pascal_case(expectation_part)
+ self.test_files[expectation_name] = str(test_file)
+
+ def _snake_to_pascal_case(self, snake_str: str) -> str:
+ """Convert snake_case to PascalCase."""
+ components = snake_str.split("_")
+ return "".join(word.capitalize() for word in components)
+
+ def _validate_registry_to_suite_mapping(self):
+ """Check that all registered expectations have suite methods."""
+ print(" 🔗 Checking registry -> suite mapping...")
+
+ missing_suite_methods = []
+
+ for expectation_name in self.registered_expectations.keys():
+ # Convert expectation name to expected suite method name
+ expected_method = self._expectation_to_suite_method(expectation_name)
+
+ if expected_method not in self.suite_methods:
+ missing_suite_methods.append((expectation_name, expected_method))
+
+ if missing_suite_methods:
+ self.issues.append("❌ Registered expectations missing suite methods:")
+ for exp_name, method_name in missing_suite_methods:
+ self.issues.append(f" • {exp_name} -> missing {method_name}()")
+
+ def _validate_registry_to_tests_mapping(self):
+ """Check that all registered expectations have test files."""
+ print(" 🧪 Checking registry -> tests mapping...")
+
+ missing_tests = []
+
+ for expectation_name in self.registered_expectations.keys():
+ if expectation_name not in self.test_files:
+ missing_tests.append(expectation_name)
+
+ if missing_tests:
+ self.issues.append("❌ Registered expectations missing test files:")
+ for exp_name in missing_tests:
+ expected_test_file = self._expectation_to_test_filename(exp_name)
+ self.issues.append(f" • {exp_name} -> missing {expected_test_file}")
+
+ def _validate_orphaned_suite_methods(self):
+ """Check for suite methods without corresponding registered expectations."""
+ print(" 🔍 Checking for orphaned suite methods...")
+
+ orphaned_methods = []
+
+ for method_name in self.suite_methods:
+ expected_expectation = self._suite_method_to_expectation(method_name)
+
+ if expected_expectation not in self.registered_expectations:
+ orphaned_methods.append((method_name, expected_expectation))
+
+ if orphaned_methods:
+ self.issues.append("❌ Suite methods without registered expectations:")
+ for method_name, exp_name in orphaned_methods:
+ self.issues.append(f" • {method_name}() -> missing {exp_name}")
+
+ def _validate_orphaned_test_files(self):
+ """Check for test files without corresponding registered expectations."""
+ print(" 🧪 Checking for orphaned test files...")
+
+ orphaned_tests = []
+
+ for expectation_name, test_file in self.test_files.items():
+ if expectation_name not in self.registered_expectations:
+ orphaned_tests.append((expectation_name, test_file))
+
+ if orphaned_tests:
+ self.issues.append("❌ Test files without registered expectations:")
+ for exp_name, test_file in orphaned_tests:
+ self.issues.append(f" • {test_file} -> missing {exp_name}")
+
+ def _expectation_to_suite_method(self, expectation_name: str) -> str:
+ """Convert expectation name to expected suite method name."""
+ # Remove "Expectation" prefix if present
+ if expectation_name.startswith("Expectation"):
+ name_part = expectation_name[11:] # Remove "Expectation"
+ else:
+ name_part = expectation_name
+
+ # Convert PascalCase to snake_case and add "expect_" prefix
+ snake_case = re.sub("([A-Z])", r"_\1", name_part).lower().lstrip("_")
+ return f"expect_{snake_case}"
+
+ def _suite_method_to_expectation(self, method_name: str) -> str:
+ """Convert suite method name to expected expectation name."""
+ if method_name.startswith("expect_"):
+ name_part = method_name[7:] # Remove "expect_"
+ # Convert snake_case to PascalCase and add "Expectation" prefix
+ pascal_case = self._snake_to_pascal_case(name_part)
+ return f"Expectation{pascal_case}"
+ return method_name
+
+ def _expectation_to_test_filename(self, expectation_name: str) -> str:
+ """Convert expectation name to expected test filename."""
+ method_name = self._expectation_to_suite_method(expectation_name)
+ return f"test_{method_name}.py"
+
+ def _print_results(self):
+ """Print the final results of the sanity check."""
+ print("\n" + "=" * 70)
+ print("📊 SANITY CHECK RESULTS")
+ print("=" * 70)
+
+ print("\n📈 Summary:")
+ print(f" • Registered expectations: {len(self.registered_expectations)}")
+ print(f" • Suite methods: {len(self.suite_methods)}")
+ print(f" • Test files: {len(self.test_files)}")
+ print(f" • Issues found: {len(self.issues)}")
+
+ if self.issues:
+ print(f"\n❌ ISSUES FOUND ({len(self.issues)}):")
+ print("-" * 40)
+ for issue in self.issues:
+ print(issue)
+ else:
+ print("\n✅ ALL CHECKS PASSED!")
+ print(" The expectations framework is consistent across:")
+ print(" • Registry registrations")
+ print(" • Suite method implementations")
+ print(" • Unit test coverage")
+
+ print("\n" + "=" * 70)
+
+ def print_detailed_mappings(self):
+ """Print detailed mappings for debugging purposes."""
+ print("\n🔍 DETAILED MAPPINGS")
+ print("=" * 50)
+
+ print(f"\n📋 Registered Expectations ({len(self.registered_expectations)}):")
+ for name, file_path in sorted(self.registered_expectations.items()):
+ print(f" • {name} ({Path(file_path).name})")
+
+ print(f"\n🎯 Suite Methods ({len(self.suite_methods)}):")
+ for method in sorted(self.suite_methods):
+ print(f" • {method}()")
+
+ print(f"\n🧪 Test Files ({len(self.test_files)}):")
+ for name, file_path in sorted(self.test_files.items()):
+ print(f" • {name} -> {Path(file_path).name}")
+
+ def should_run_check(self) -> bool:
+ """Check if we should run based on changed files in the current branch."""
+ import subprocess
+
+ try:
+ # Try to get the default branch name (usually main or master)
+ try:
+ result = subprocess.run(
+ ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ default_branch = result.stdout.strip().split("/")[-1]
+ except subprocess.CalledProcessError:
+ # Fallback to common default branch names
+ for branch in ["main", "master"]:
+ try:
+ subprocess.run(
+ ["git", "rev-parse", f"origin/{branch}"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ default_branch = branch
+ break
+ except subprocess.CalledProcessError:
+ continue
+ else:
+ default_branch = "main" # Final fallback
+
+ # Get list of changed files compared to default branch
+ result = subprocess.run(
+ ["git", "diff", f"origin/{default_branch}...HEAD", "--name-only"],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ changed_files = [f for f in result.stdout.strip().split("\n") if f]
+
+ if not changed_files:
+ print("🔍 No files changed, skipping sanity check.")
+ return False
+
+ # Check if any relevant files changed
+ relevant_patterns = [
+ "dataframe_expectations/",
+ "tests/dataframe_expectations/",
+ ]
+
+ changed_relevant_files = []
+ for file in changed_files:
+ for pattern in relevant_patterns:
+ if pattern in file:
+ changed_relevant_files.append(file)
+ break
+
+ if changed_relevant_files:
+ print("🔍 Relevant DataFrame expectations files changed:")
+ for file in changed_relevant_files:
+ print(f" • {file}")
+ return True
+ else:
+ print("🔍 No relevant DataFrame expectations files changed, skipping sanity check.")
+ return False
+
+ except subprocess.CalledProcessError as e:
+ print(f"⚠️ Git command failed: {e}")
+ print("🔍 Running sanity check anyway as a safety measure.")
+ return True
+ except Exception as e:
+ print(f"⚠️ Error checking changed files: {e}")
+ print("🔍 Running sanity check anyway as a safety measure.")
+ return True
+
+
+if __name__ == "__main__":
+ # Use relative path from the script location
+ script_dir = Path(__file__).parent
+ # Go up one level: sanity_checks.py is in dataframe_expectations/, project root is parent
+ project_root = script_dir.parent
+
+ # Validate directory structure
+ expected_dirs = ["dataframe_expectations", "tests", "pyproject.toml"]
+ missing_dirs = [d for d in expected_dirs if not (project_root / d).exists()]
+
+ if missing_dirs:
+ print(f"❌ Missing expected directories/files: {missing_dirs}")
+ print(f"Script location: {Path(__file__)}")
+ print(f"Project root: {project_root}")
+ sys.exit(1)
+
+ checker = ExpectationsSanityChecker(project_root)
+
+ # Run the checks
+ success = checker.run_full_check()
+
+ # Optionally print detailed mappings for debugging
+ if "--verbose" in sys.argv or "-v" in sys.argv:
+ checker.print_detailed_mappings()
+
+ # Exit with appropriate code
+ sys.exit(0 if success else 1)
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..f0637be
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,32 @@
+# Minimal makefile for Sphinx documentation with uv support
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= uv run sphinx-build
+SOURCEDIR = source
+BUILDDIR = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx-build using the O variable.
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# Custom targets for development
+clean:
+ @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+livehtml:
+ uv run sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
+
+# Install documentation dependencies
+install-docs:
+ uv sync --group docs
+
+# Build docs with fresh install
+build-fresh: install-docs html
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..5492f92
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+sphinx>=4.0.0
+sphinx-rtd-theme>=1.0.0
+sphinx-autobuild>=2021.3.14
diff --git a/docs/source/_ext/expectations_autodoc.py b/docs/source/_ext/expectations_autodoc.py
new file mode 100644
index 0000000..ec3b0d8
--- /dev/null
+++ b/docs/source/_ext/expectations_autodoc.py
@@ -0,0 +1,365 @@
+"""
+Custom Sphinx extension for generating categorized DataFrame expectations documentation.
+"""
+
+import inspect
+import re
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+
+from docutils import nodes
+from docutils.nodes import Node
+from docutils.parsers.rst import directives
+from sphinx.application import Sphinx
+from sphinx.util.docutils import SphinxDirective
+
+from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
+
+
+def parse_metadata_from_docstring(docstring: str) -> Tuple[str, str]:
+ """Parse metadata from docstring using YAML-style format."""
+ if not docstring:
+ return None, None
+
+ # Look for Categories section with YAML-style indentation
+ pattern = r"Categories:\s*\n\s*category:\s*(.+)\n\s*subcategory:\s*(.+)"
+ match = re.search(pattern, docstring, re.IGNORECASE)
+
+ if match:
+ return match.group(1).strip(), match.group(2).strip()
+
+ return None, None
+
+
+def infer_category_from_method_name(method_name: str) -> Tuple[str, str]:
+ """Infer category and subcategory from method name as fallback."""
+ if any(
+ keyword in method_name
+ for keyword in ["quantile", "max", "min", "mean", "median", "unique_rows"]
+ ):
+ return "Column Aggregation Expectations", get_subcategory_from_name(method_name)
+ else:
+ return "Column Expectations", get_subcategory_from_name(method_name)
+
+
+def get_subcategory_from_name(method_name: str) -> str:
+ """Get subcategory from method name."""
+ if any(
+ keyword in method_name
+ for keyword in ["string", "length", "contains", "starts", "ends"]
+ ):
+ return "String"
+ elif any(
+ keyword in method_name
+ for keyword in [
+ "greater",
+ "less",
+ "between",
+ "quantile",
+ "max",
+ "min",
+ "mean",
+ "median",
+ ]
+ ):
+ return "Numerical"
+ else:
+ return "Any Value"
+
+
+def clean_docstring_from_metadata(docstring: str) -> str:
+ """Remove metadata section from docstring."""
+ if not docstring:
+ return ""
+
+ # Remove Categories section
+ pattern = r"Categories:\s*\n\s*category:.*\n\s*subcategory:.*\n?"
+ cleaned = re.sub(pattern, "", docstring, flags=re.IGNORECASE)
+
+ return cleaned.strip()
+
+
+class ExpectationsDirective(SphinxDirective):
+ """
+ Custom directive to generate categorized expectations documentation.
+
+ Usage:
+ .. expectations::
+ :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite
+ :show-summary: true
+ :show-cards: true
+ """
+
+ has_content = False
+ required_arguments = 0
+ optional_arguments = 0
+ option_spec = {
+ 'class': directives.unchanged_required,
+ 'show-summary': directives.flag,
+ 'show-cards': directives.flag,
+ }
+
+ def run(self) -> List[Node]:
+ """Generate the expectations documentation."""
+ # Import the class
+ class_path = self.options.get('class', 'dataframe_expectations.expectations_suite.DataFrameExpectationsSuite')
+ module_name, class_name = class_path.rsplit('.', 1)
+
+ try:
+ module = __import__(module_name, fromlist=[class_name])
+ cls = getattr(module, class_name)
+ except (ImportError, AttributeError) as e:
+ error = f"Could not import {class_path}: {e}"
+ return [nodes.error("", nodes.paragraph("", error))]
+
+ # Collect expectations by category
+ expectations_by_category = defaultdict(lambda: defaultdict(list))
+ method_details = {}
+
+ for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
+ if name.startswith("_") or not name.startswith("expect_"):
+ continue
+
+ docstring = inspect.getdoc(method) or "No description provided."
+ category, subcategory = parse_metadata_from_docstring(docstring)
+ if not category:
+ category, subcategory = infer_category_from_method_name(name)
+
+ expectations_by_category[category][subcategory].append(name)
+ method_details[name] = {
+ "method": method,
+ "docstring": docstring,
+ "signature": inspect.signature(method),
+ "category": category,
+ "subcategory": subcategory,
+ }
+
+ # Generate nodes
+ nodes_list = []
+
+ # Add summary table if requested
+ if 'show-summary' in self.options:
+ nodes_list.extend(self._generate_summary_table(expectations_by_category, method_details))
+
+ # Add cards if requested
+ if 'show-cards' in self.options:
+ nodes_list.extend(self._generate_expectation_cards(expectations_by_category, method_details))
+
+ return nodes_list
+
+ def _generate_summary_table(self, expectations_by_category, method_details) -> List[Node]:
+ """Generate summary table nodes."""
+ nodes_list = []
+
+ # Add section with title and proper ID
+ summary_section = nodes.section()
+ summary_section['ids'] = ['expectations-summary']
+ summary_section['names'] = ['expectations-summary']
+ summary_title = nodes.title("", "Expectations Summary")
+ summary_section += summary_title
+
+ # Create table
+ table = nodes.table()
+ tgroup = nodes.tgroup(cols=3)
+ table += tgroup
+
+ # Add column specifications
+ for width in [30, 25, 45]:
+ colspec = nodes.colspec(colwidth=width)
+ tgroup += colspec
+
+ # Add table head
+ thead = nodes.thead()
+ tgroup += thead
+
+ row = nodes.row()
+ thead += row
+
+ for header in ["Category", "Subcategory", "Expectations"]:
+ entry = nodes.entry()
+ row += entry
+ entry += nodes.paragraph("", header)
+
+ # Add table body
+ tbody = nodes.tbody()
+ tgroup += tbody
+
+ for category in sorted(expectations_by_category.keys()):
+ for subcategory in sorted(expectations_by_category[category].keys()):
+ expectations = expectations_by_category[category][subcategory]
+
+ row = nodes.row()
+ tbody += row
+
+ # Category cell
+ entry = nodes.entry()
+ row += entry
+ entry += nodes.paragraph("", category)
+
+ # Subcategory cell
+ entry = nodes.entry()
+ row += entry
+ entry += nodes.paragraph("", subcategory)
+
+ # Expectations cell
+ entry = nodes.entry()
+ row += entry
+
+ exp_para = nodes.paragraph()
+ for i, exp in enumerate(sorted(expectations)):
+ if i > 0:
+ exp_para += nodes.Text(", ")
+
+ # Create clickable link to the card using raw HTML
+ raw_link = nodes.raw(
+ f'{exp}',
+ f'{exp}',
+ format='html'
+ )
+ exp_para += raw_link
+
+ entry += exp_para
+
+ summary_section += table
+ nodes_list.append(summary_section)
+ return nodes_list
+
+ def _generate_expectation_cards(self, expectations_by_category, method_details) -> List[Node]:
+ """Generate expectation cards in Great Expectations gallery style."""
+ nodes_list = []
+
+ for category in sorted(expectations_by_category.keys()):
+ # Category header - use proper heading for TOC inclusion as top-level section
+ cat_section = nodes.section()
+ cat_section['ids'] = [f"category-{category.lower().replace(' ', '-')}"]
+ cat_section['names'] = [category.lower().replace(' ', '-')]
+
+ cat_header = nodes.title("", category)
+ cat_header['classes'] = ['category-title']
+ cat_section += cat_header
+
+ # Create cards container for this category
+ cards_container = nodes.container()
+ cards_container['classes'] = ['expectations-gallery']
+
+ for subcategory in sorted(expectations_by_category[category].keys()):
+ # Subcategory header - use paragraph with special styling
+ subcat_header = nodes.paragraph()
+ subcat_header['classes'] = ['subcategory-title']
+ subcat_header += nodes.Text(subcategory)
+ cards_container += subcat_header
+
+ # Cards grid for this subcategory
+ cards_grid = nodes.container()
+ cards_grid['classes'] = ['cards-grid']
+
+ for method_name in sorted(expectations_by_category[category][subcategory]):
+ details = method_details[method_name]
+ card = self._create_expectation_card(method_name, details)
+ cards_grid += card
+
+ cards_container += cards_grid
+
+ cat_section += cards_container
+ nodes_list.append(cat_section)
+
+ return nodes_list
+
+ def _create_expectation_card(self, method_name: str, details: dict) -> Node:
+ """Create a single expectation card."""
+ # Create card container
+ card = nodes.container()
+ card['classes'] = ['expectation-card']
+ card['ids'] = [f"card-{method_name}"]
+
+ # Card header with method name
+ card_header = nodes.container()
+ card_header['classes'] = ['card-header']
+
+ method_title = nodes.paragraph()
+ method_title['classes'] = ['method-name']
+ method_title += nodes.Text(method_name)
+ card_header += method_title
+
+ card += card_header
+
+ # Card body
+ card_body = nodes.container()
+ card_body['classes'] = ['card-body']
+
+ # Description
+ clean_docstring = clean_docstring_from_metadata(details["docstring"])
+ if clean_docstring:
+ description = clean_docstring.split('\n')[0] # First line only
+ desc_para = nodes.paragraph()
+ desc_para['classes'] = ['card-description']
+ desc_para += nodes.Text(description)
+ card_body += desc_para
+
+ # Data quality issue tags (similar to Great Expectations)
+ tags_container = nodes.container()
+ tags_container['classes'] = ['tags-container']
+
+ # Add category as a tag
+ category_tag = nodes.inline()
+ category_tag['classes'] = ['tag', 'category-tag']
+ category_tag += nodes.Text(details['category'])
+ tags_container += category_tag
+
+ # Add subcategory as a tag
+ subcategory_tag = nodes.inline()
+ subcategory_tag['classes'] = ['tag', 'subcategory-tag']
+ subcategory_tag += nodes.Text(details['subcategory'])
+ tags_container += subcategory_tag
+
+ card_body += tags_container
+
+ # Parameters preview
+ params = [p for p in details["signature"].parameters.keys() if p != "self"]
+ if params:
+ params_container = nodes.container()
+ params_container['classes'] = ['params-preview']
+
+ params_title = nodes.paragraph()
+ params_title['classes'] = ['params-title']
+ params_title += nodes.Text("Parameters:")
+ params_container += params_title
+
+ params_list = nodes.paragraph()
+ params_list['classes'] = ['params-list']
+ params_text = ", ".join(params[:3]) # Show first 3 parameters
+ if len(params) > 3:
+ params_text += f", ... (+{len(params) - 3} more)"
+ params_list += nodes.Text(params_text)
+ params_container += params_list
+
+ card_body += params_container
+
+ card += card_body
+
+ # Card footer with actions - link to API reference
+ card_footer = nodes.container()
+ card_footer['classes'] = ['card-footer']
+
+ # Create link to API reference using raw HTML
+ api_link = nodes.raw(
+ f'View API Reference',
+ f'View API Reference',
+ format='html'
+ )
+ card_footer += api_link
+
+ card += card_footer
+
+ return card
+
+
+def setup(app: Sphinx) -> Dict[str, Any]:
+ """Setup function for the Sphinx extension."""
+ app.add_directive("expectations", ExpectationsDirective)
+
+ return {
+ 'version': '0.1',
+ 'parallel_read_safe': True,
+ 'parallel_write_safe': True,
+ }
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
new file mode 100644
index 0000000..55f6815
--- /dev/null
+++ b/docs/source/_static/custom.css
@@ -0,0 +1,647 @@
+/* Custom styling for expectations documentation with PyData Sphinx theme */
+
+/* Fix PyData theme page width constraint that causes header overlapping */
+.bd-page-width {
+ max-width: none !important;
+ width: 100% !important;
+}
+
+.bd-header__inner.bd-page-width {
+ max-width: none !important;
+ width: calc(100% - 2rem) !important;
+ margin: 0 1rem !important;
+}
+
+/* Improve header layout to prevent overlapping */
+.bd-header .bd-header__inner {
+ justify-content: space-between !important;
+ align-items: center !important;
+ flex-wrap: nowrap !important;
+}
+
+.col-lg-3.navbar-header-items__start {
+ flex: 0 0 auto !important;
+ max-width: 40% !important;
+ overflow: hidden !important;
+}
+
+.col-lg-9.navbar-header-items {
+ flex: 1 1 auto !important;
+ min-width: 0 !important;
+ display: flex !important;
+ justify-content: space-between !important;
+ align-items: center !important;
+}
+
+.navbar-brand.logo .title.logo__title {
+ font-size: 1rem !important;
+ white-space: nowrap !important;
+ overflow: hidden !important;
+ text-overflow: ellipsis !important;
+ max-width: 100% !important;
+}
+
+.navbar-header-items__center {
+ flex: 1 1 auto !important;
+ min-width: 0 !important;
+ overflow: hidden !important;
+}
+
+.navbar-header-items__end {
+ flex: 0 0 auto !important;
+}
+
+/* Fix sidebar layout - left navigation with section nav underneath */
+.bd-sidebar-primary {
+ display: block !important;
+ width: 280px !important;
+ position: sticky !important;
+ top: var(--pst-header-height) !important;
+ height: calc(100vh - var(--pst-header-height)) !important;
+ overflow-y: auto !important;
+}
+
+.bd-sidebar-secondary {
+ display: none !important;
+}
+
+/* Ensure main content adjusts for left sidebar only */
+.bd-main {
+ display: flex !important;
+}
+
+.bd-content {
+ flex: 1 !important;
+ min-width: 0 !important;
+ margin-left: 0 !important;
+ margin-right: 0 !important;
+}
+
+/* Make sure the article container is properly sized */
+.bd-article-container {
+ width: 100% !important;
+ max-width: none !important;
+ padding: 0 3rem !important;
+ min-width: 0 !important;
+}
+
+/* Style the "On this page" section in the left sidebar */
+.bd-sidebar-primary .bd-toc {
+ margin-top: 2rem !important;
+ padding-top: 1rem !important;
+ border-top: 1px solid var(--pst-color-border) !important;
+}
+
+.bd-sidebar-primary .bd-toc .toc-title {
+ font-weight: bold !important;
+ margin-bottom: 0.75rem !important;
+ color: var(--pst-color-text-base) !important;
+ font-size: 0.9rem !important;
+ text-transform: uppercase !important;
+ letter-spacing: 0.5px !important;
+}
+
+.bd-sidebar-primary .bd-toc ul {
+ list-style: none !important;
+ padding-left: 0 !important;
+ margin: 0 !important;
+}
+
+.bd-sidebar-primary .bd-toc ul ul {
+ padding-left: 1rem !important;
+ margin-top: 0.25rem !important;
+}
+
+.bd-sidebar-primary .bd-toc li {
+ margin-bottom: 0.25rem !important;
+}
+
+.bd-sidebar-primary .bd-toc a {
+ color: var(--pst-color-text-muted) !important;
+ text-decoration: none !important;
+ display: block !important;
+ padding: 0.25rem 0.5rem !important;
+ font-size: 0.85rem !important;
+ border-radius: 3px !important;
+ line-height: 1.4 !important;
+}
+
+.bd-sidebar-primary .bd-toc a:hover {
+ color: var(--pst-color-primary) !important;
+ background-color: var(--pst-color-primary-bg) !important;
+}
+
+.bd-sidebar-primary .bd-toc a.current {
+ color: var(--pst-color-primary) !important;
+ background-color: var(--pst-color-primary-bg) !important;
+ font-weight: 500 !important;
+}
+
+/* Mobile responsive fixes */
+@media (max-width: 991px) {
+ .col-lg-3.navbar-header-items__start {
+ max-width: 50% !important;
+ }
+
+ .navbar-brand.logo .title.logo__title {
+ font-size: 0.9rem !important;
+ }
+}
+
+@media (max-width: 768px) {
+ .bd-header .bd-header__inner {
+ flex-wrap: wrap !important;
+ gap: 0.5rem !important;
+ }
+
+ .col-lg-3.navbar-header-items__start,
+ .col-lg-9.navbar-header-items {
+ flex: 1 1 100% !important;
+ max-width: 100% !important;
+ }
+
+ .navbar-brand.logo .title.logo__title {
+ font-size: 1rem !important;
+ white-space: normal !important;
+ line-height: 1.2 !important;
+ }
+
+ .bd-sidebar-primary {
+ width: 100% !important;
+ position: relative !important;
+ height: auto !important;
+ }
+
+ .bd-main {
+ flex-direction: column !important;
+ }
+}
+
+/* Full-width layout for PyData theme */
+.bd-main .bd-content .bd-article-container {
+ max-width: none !important;
+}
+
+.bd-container-fluid {
+ max-width: none !important;
+}
+
+.bd-content {
+ padding-left: 3rem !important;
+ padding-right: 3rem !important;
+}
+
+/* Style all tables in the expectations documentation */
+table.docutils {
+ width: 100% !important;
+ table-layout: fixed !important;
+ border-collapse: collapse !important;
+ margin: 1em 0 !important;
+}
+
+table.docutils th,
+table.docutils td {
+ border: 1px solid #ddd !important;
+ padding: 8px !important;
+ text-align: left !important;
+ vertical-align: top !important;
+ word-wrap: break-word !important; /* Allow long words to break */
+ overflow-wrap: break-word !important; /* Modern browsers */
+ white-space: normal !important; /* Allow text wrapping */
+}
+
+/* Set specific column widths for the expectations summary table */
+table.docutils th:nth-child(1),
+table.docutils td:nth-child(1) {
+ width: 25% !important; /* Category column */
+}
+
+table.docutils th:nth-child(2),
+table.docutils td:nth-child(2) {
+ width: 20% !important; /* Subcategory column */
+}
+
+table.docutils th:nth-child(3),
+table.docutils td:nth-child(3) {
+ width: 55% !important; /* Expectations column */
+}
+
+table.docutils th {
+ background-color: #f5f5f5 !important;
+ font-weight: bold !important;
+}
+
+table.docutils tr:nth-child(even) {
+ background-color: #f9f9f9 !important;
+}
+
+/* Make expectation names in summary table clickable and styled */
+table.docutils td a,
+table.docutils td a.expectation-link {
+ word-break: break-word !important;
+ display: inline !important;
+ color: #007bff !important;
+ text-decoration: none !important;
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace !important;
+ font-size: 0.9em !important;
+ padding: 2px 4px !important;
+ background-color: #f8f9fa !important;
+ border-radius: 3px !important;
+ border: 1px solid #e9ecef !important;
+}
+
+table.docutils td a:hover,
+table.docutils td a.expectation-link:hover {
+ background-color: #e3f2fd !important;
+ border-color: #2196f3 !important;
+ text-decoration: none !important;
+}
+
+/* For very small screens, allow some responsiveness */
+@media (max-width: 768px) {
+ table.docutils {
+ font-size: 0.9em !important;
+ }
+
+ table.docutils th,
+ table.docutils td {
+ padding: 6px !important;
+ }
+}
+
+/* Style method signatures */
+.method-signature {
+ background-color: #f8f8f8;
+ border: 1px solid #e1e1e1;
+ border-radius: 3px;
+ padding: 10px;
+ font-family: 'Courier New', Courier, monospace;
+ margin: 10px 0;
+}
+
+/* Style expectation method headers */
+.expectation-method h5 {
+ color: #2e8b57;
+ border-bottom: 2px solid #2e8b57;
+ padding-bottom: 5px;
+}
+
+/* Style parameter lists */
+.parameter-list {
+ margin-left: 20px;
+}
+
+.parameter-list li {
+ margin-bottom: 5px;
+}
+
+/* Add some spacing to sections */
+.section {
+ margin-bottom: 2em;
+}
+
+/* Style code blocks */
+pre {
+ background-color: #f8f8f8;
+ border: 1px solid #e1e1e1;
+ border-radius: 4px;
+ padding: 10px;
+ overflow-x: auto;
+}
+
+/* Style inline code */
+code {
+ background-color: #f1f1f1;
+ padding: 2px 4px;
+ border-radius: 3px;
+ font-family: 'Courier New', Courier, monospace;
+}
+
+/* Style custom method documentation to match autodoc exactly */
+dl.py.method {
+ margin-bottom: 2em;
+}
+
+dt.sig.sig-object.py {
+ background-color: #f8f8f8;
+ border: 1px solid #d1d1d1;
+ border-radius: 3px;
+ padding: 10px;
+ font-family: 'SFMono-Regular', Menlo, 'Liberation Mono', Courier, monospace;
+ font-size: 0.9em;
+ margin-bottom: 0.5em;
+ font-weight: normal;
+}
+
+dt.sig.sig-object.py .sig-name {
+ font-weight: bold;
+}
+
+dt.sig.sig-object.py .sig-paren {
+ color: #666;
+}
+
+dd.field-list {
+ margin-left: 2em;
+}
+
+dd.field-list dt {
+ font-weight: bold;
+ margin-bottom: 0.5em;
+}
+
+dd.field-list dd {
+ margin-left: 1em;
+ margin-bottom: 0.5em;
+}
+
+/* Style the description content */
+dd.desc-content {
+ margin-left: 2em;
+}
+
+dd.desc-content p {
+ margin-bottom: 1em;
+}
+
+/* Style parameter lists */
+dl.field-list dt {
+ font-weight: bold;
+ color: #2980b9;
+}
+
+dl.field-list dd p strong {
+ color: #333;
+}
+
+/* Ensure our custom expectations use standard Sphinx autodoc styling */
+dl.py.method > dt {
+ background-color: #f8f8f8 !important;
+ border: 1px solid #d1d1d1 !important;
+ border-radius: 3px !important;
+ padding: 10px !important;
+ font-family: 'SFMono-Regular', Menlo, 'Liberation Mono', Courier, monospace !important;
+ font-size: 0.9em !important;
+ margin-bottom: 0.5em !important;
+ font-weight: normal !important;
+}
+
+dl.py.method > dd {
+ margin-left: 2em !important;
+}
+
+/* Override any custom formatting that interferes with autodoc */
+.expectation-method h5,
+.method-signature,
+.parameter-list {
+ display: none !important; /* Hide any custom formatting */
+}
+
+/* Make sure field lists look standard */
+dl.py.method dd dl.field-list {
+ margin-top: 1em !important;
+}
+
+dl.py.method dd dl.field-list dt {
+ font-weight: bold !important;
+ color: #2980b9 !important;
+ margin-bottom: 0.5em !important;
+}
+
+dl.py.method dd dl.field-list dd {
+ margin-left: 1em !important;
+ margin-bottom: 1em !important;
+}
+
+/* Great Expectations Gallery Style Cards */
+.expectations-gallery {
+ margin: 2rem 0 !important;
+ padding: 0 !important;
+ width: 100% !important;
+}
+
+/* Style category title headings for TOC inclusion */
+.category-title,
+h1.category-title,
+h2.category-title {
+ color: #2c3e50 !important;
+ border-bottom: 3px solid #3498db !important;
+ padding-bottom: 10px !important;
+ margin: 2em 0 1em 0 !important;
+ font-size: 1.4em !important;
+ font-weight: bold !important;
+}
+
+.subcategory-title {
+ color: #34495e !important;
+ margin: 1.5em 0 1em 0 !important;
+ font-size: 1.2em !important;
+ font-weight: bold !important;
+}
+
+.cards-grid {
+ display: grid;
+ grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
+ gap: 32px !important;
+ margin: 2rem 0 !important;
+ align-items: start;
+ padding: 1rem 0 !important;
+ width: 100% !important;
+ max-width: none !important;
+}
+
+.expectation-card {
+ border: 1px solid #e1e8ed !important;
+ border-radius: 8px !important;
+ background: white !important;
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
+ transition: all 0.3s ease !important;
+ overflow: hidden !important;
+ min-height: 320px !important;
+ display: flex !important;
+ flex-direction: column !important;
+ margin: 0 !important;
+ width: 100% !important;
+ min-width: 400px !important;
+}
+
+/* Ensure expectation cards have proper internal padding */
+.expectation-card .card-header,
+.expectation-card .card-body,
+.expectation-card .card-footer {
+ box-sizing: border-box !important;
+}
+
+.expectation-card .card-header {
+ padding: 20px 24px !important;
+}
+
+.expectation-card .card-body {
+ padding: 24px !important;
+}
+
+.expectation-card .card-footer {
+ padding: 20px 24px !important;
+}
+
+.expectation-card:hover {
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
+ transform: translateY(-2px);
+}
+
+.card-header {
+ background: #f8f9fa !important;
+ padding: 20px 24px !important;
+ border-bottom: 1px solid #e1e8ed !important;
+ min-height: 60px !important;
+ display: flex !important;
+ align-items: center !important;
+}
+
+.method-name {
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+ font-size: 1.1em;
+ font-weight: bold;
+ color: #2c3e50;
+ margin: 0;
+ word-wrap: break-word;
+ word-break: break-word;
+ overflow-wrap: break-word;
+ hyphens: auto;
+ line-height: 1.3;
+}
+
+.card-body {
+ padding: 24px !important;
+ flex: 1 !important;
+ display: flex !important;
+ flex-direction: column !important;
+ justify-content: space-between !important;
+}
+
+.card-description {
+ color: #555;
+ margin-bottom: 15px;
+ line-height: 1.5;
+}
+
+.tags-container {
+ margin: 15px 0;
+}
+
+.tag {
+ display: inline-block;
+ padding: 4px 8px;
+ border-radius: 12px;
+ font-size: 0.8em;
+ margin-right: 8px;
+ margin-bottom: 5px;
+}
+
+.category-tag {
+ background: #e3f2fd;
+ color: #1976d2;
+ border: 1px solid #bbdefb;
+}
+
+.subcategory-tag {
+ background: #f3e5f5;
+ color: #7b1fa2;
+ border: 1px solid #ce93d8;
+}
+
+.params-preview {
+ margin-top: 15px;
+ padding: 10px;
+ background: #f8f9fa;
+ border-radius: 4px;
+}
+
+.params-title {
+ font-weight: bold;
+ color: #495057;
+ margin: 0 0 5px 0;
+ font-size: 0.9em;
+}
+
+.params-list {
+ font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+ font-size: 0.85em;
+ color: #6c757d;
+ margin: 0;
+}
+
+.card-footer {
+ padding: 20px 24px !important;
+ background: #f8f9fa !important;
+ border-top: 1px solid #e1e8ed !important;
+ text-align: right !important;
+ margin-top: auto !important;
+}
+
+.btn {
+ display: inline-block;
+ padding: 8px 16px;
+ border-radius: 4px;
+ text-decoration: none;
+ font-size: 0.9em;
+ font-weight: 500;
+ transition: all 0.2s ease;
+}
+
+.btn-details {
+ background: #007bff;
+ color: white !important;
+ border: 1px solid #007bff;
+ text-decoration: none !important;
+}
+
+.btn-details:hover {
+ background: #0056b3;
+ border-color: #0056b3;
+ text-decoration: none !important;
+ color: white !important;
+}
+
+/* Responsive design */
+@media (max-width: 1400px) {
+ .cards-grid {
+ grid-template-columns: repeat(auto-fit, minmax(380px, 1fr)) !important;
+ gap: 28px !important;
+ }
+}
+
+@media (max-width: 1200px) {
+ .cards-grid {
+ grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)) !important;
+ gap: 24px !important;
+ }
+
+ .expectation-card {
+ min-width: 350px !important;
+ }
+}
+
+@media (max-width: 768px) {
+ .cards-grid {
+ grid-template-columns: 1fr !important;
+ gap: 24px !important;
+ margin: 1.5rem 0 !important;
+ padding: 0 !important;
+ }
+
+ .expectation-card {
+ margin: 0 !important;
+ min-height: 280px !important;
+ min-width: auto !important;
+ }
+
+ .bd-content {
+ padding-left: 1.5rem !important;
+ padding-right: 1.5rem !important;
+ }
+
+ .bd-article-container {
+ padding: 0 1.5rem !important;
+ }
+}
diff --git a/docs/source/_templates/expectations_summary.html b/docs/source/_templates/expectations_summary.html
new file mode 100644
index 0000000..d064bc8
--- /dev/null
+++ b/docs/source/_templates/expectations_summary.html
@@ -0,0 +1,27 @@
+
+
Expectations Summary
+
+
+
+ | Category |
+ Subcategory |
+ Expectations |
+
+
+
+ {% for category, subcategories in expectations_by_category.items() %}
+ {% for subcategory, expectations in subcategories.items() %}
+
+ | {{ category }} |
+ {{ subcategory }} |
+
+ {% for exp in expectations %}
+ {{ exp }}{% if not loop.last %}, {% endif %}
+ {% endfor %}
+ |
+
+ {% endfor %}
+ {% endfor %}
+
+
+
diff --git a/docs/source/adding_expectations.rst b/docs/source/adding_expectations.rst
new file mode 100644
index 0000000..99e6843
--- /dev/null
+++ b/docs/source/adding_expectations.rst
@@ -0,0 +1,493 @@
+Adding Your Expectations
+========================
+
+This guide will walk you through the process of creating custom expectations for DataFrame validation.
+There are three main approaches depending on your use case.
+
+Defining Your Expectations
+--------------------------
+
+Most use cases that involve validating a single column in the dataframe can be covered by the initialising the
+``DataFrameColumnExpectation`` class with the correct parameters. Expectations implemented by initialising
+``DataFrameColumnExpectation`` can be found in the ``column_expectations`` module, categorised based on the data-type of
+the column value.
+
+If you want to go ahead with implementing ``DataFrameColumnExpectation``, you first need to identify the data-type of
+the column value. Existing expectations are already categorised into ``string``, ``numerical`` or ``any_value``
+expectations. Create a new category in column_expectations if you think existing categories don't fit your use case.
+Once you have decided where the expectation needs to be added, you can define it as follows:
+
+.. code-block:: python
+
+ from dataframe_expectations.expectations.expectation_registry import (
+ register_expectation,
+ )
+ from dataframe_expectations.expectations.utils import requires_params
+
+
+ @register_expectation("ExpectIsDivisible")
+ @requires_params("column_name", "value", types={"column_name": str, "value": int})
+ def create_expectation_do_something_unexpected(**kwargs) -> DataFrameColumnExpectation:
+ column_name = kwargs["column_name"]
+ value = kwargs["value"]
+
+ return DataFrameColumnExpectation(
+ expectation_name="ExpectIsDivisible",
+ column_name=column_name,
+ fn_violations_pandas=lambda df: df[df[column_name] % value != 0], # function that finds violations
+ fn_violations_pyspark=lambda df: df.filter(F.col(column_name) % value != 0), # function that finds violations
+ description=f"'{column_name}' divisible by {value}",
+ error_message=f"'{column_name}' not divisible by {value}.",
+ )
+
+For additional guidance, you can refer to the implementation of ``ExpectationValueGreaterThan`` and
+``ExpectationValueLessThan`` in ``column_expectation_factory.py``. These examples demonstrate how to initialise the
+``DataFrameColumnExpectation`` class with the right parameters and define filtering logic for different dataframes.
+The ``@register_expectation`` decorator is needed to add your expectation to the library. ``@requires_params`` decorator
+is a utility that helps you validate the input parameters.
+
+Adding Aggregation-Based Expectations
+--------------------------------------
+
+Just like the column expectations, you can find the aggregation-based expectations in the ``aggregation_expectations``
+module. For expectations that require aggregation operations (such as row counts, distinct value counts, null
+percentages, etc.), you should implement custom expectation classes by inheriting from
+``DataFrameAggregationExpectation``. These types of expectations cannot be easily covered
+by the ``DataFrameColumnExpectation`` class because they involve DataFrame-level or column-level aggregations rather
+than row-by-row validations.
+
+Existing expectations are already categorised into ``string``, ``numerical`` or ``any_value``
+expectations. Before you implement your aggregation-based expectation, infer the category of the aggregation operation
+and add it to the right category. Feel free to create a new category if needed.
+
+Here's an example of how to implement an aggregation-based expectation:
+
+.. code-block:: python
+
+ from dataframe_expectations import DataFrameLike, DataFrameType
+ from dataframe_expectations.expectations.aggregation_expectation import (
+ DataFrameAggregationExpectation,
+ )
+ from dataframe_expectations.expectations.expectation_registry import register_expectation
+ from dataframe_expectations.expectations.utils import requires_params
+ from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+ )
+ import pandas as pd
+ from pyspark.sql import functions as F
+
+
+ class ExpectationMinRows(DataFrameAggregationExpectation):
+ """
+ Expectation that validates the DataFrame has at least a minimum number of rows.
+ """
+
+ def __init__(self, min_count: int):
+ description = f"DataFrame has at least {min_count} row(s)"
+ self.min_count = min_count
+
+ super().__init__(
+ expectation_name="ExpectationMinRows",
+ column_names=[], # Empty list since this operates on entire DataFrame
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate minimum row count in a pandas DataFrame."""
+ # Note: Parent class already checks if the column is present when column_names is not empty
+ try:
+ row_count = len(data_frame)
+
+ if row_count >= self.min_count:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"DataFrame has {row_count} row(s), expected at least {self.min_count}.",
+ )
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error counting rows: {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate minimum row count in a PySpark DataFrame."""
+ # Note: Parent class already checks if the column is present when column_names is not empty
+ try:
+ row_count = data_frame.count()
+
+ if row_count >= self.min_count:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"DataFrame has {row_count} row(s), expected at least {self.min_count}.",
+ )
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error counting rows: {str(e)}",
+ )
+
+
+ @register_expectation("ExpectationMinRows")
+ @requires_params("min_count", types={"min_count": int})
+ def create_expectation_min_rows(**kwargs) -> ExpectationMinRows:
+ """
+ Create an ExpectationMinRows instance.
+
+ Args:
+ min_count (int): Minimum required number of rows.
+
+ Returns:
+ ExpectationMinRows: A configured expectation instance.
+ """
+ return ExpectationMinRows(min_count=kwargs["min_count"])
+
+Key differences for aggregation-based expectations:
+
+1. **Inherit from** ``DataFrameAggregationExpectation``: This base class provides the framework for aggregation operations and automatically handles column validation.
+
+2. **Implement** ``aggregate_and_validate_pandas`` **and** ``aggregate_and_validate_pyspark``: These methods are specifically designed for aggregation operations rather than the generic ``validate_pandas`` and ``validate_pyspark`` methods.
+
+3. **Call** ``super().__init__()``: Initialize the parent class with expectation metadata including ``expectation_name``, ``column_names``, and ``description``.
+
+4. **Automatic column validation**: The parent class automatically validates that required columns exist before calling your implementation methods. You don't need to manually check for column existence.
+
+5. **Error handling**: Wrap aggregation operations in try-catch blocks since aggregations can fail due to data type issues or other DataFrame problems.
+
+Example of a column-based aggregation expectation:
+
+.. code-block:: python
+
+ class ExpectationColumnMeanBetween(DataFrameAggregationExpectation):
+ """
+ Expectation that validates the mean value of a column falls within a specified range.
+ """
+
+ def __init__(self, column_name: str, min_value: float, max_value: float):
+ description = f"column '{column_name}' mean value between {min_value} and {max_value}"
+
+ self.column_name = column_name
+ self.min_value = min_value
+ self.max_value = max_value
+
+ super().__init__(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_names=[column_name], # List of columns this expectation requires
+ description=description,
+ )
+
+ def aggregate_and_validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column mean in a pandas DataFrame."""
+ # Column validation is automatically handled by the parent class
+ try:
+ mean_val = data_frame[self.column_name].mean()
+
+ if pd.isna(mean_val):
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ if self.min_value <= mean_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.",
+ )
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"Error calculating mean for column '{self.column_name}': {str(e)}",
+ )
+
+ def aggregate_and_validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """Validate column mean in a PySpark DataFrame."""
+ # Column validation is automatically handled by the parent class
+ try:
+ mean_result = data_frame.select(F.avg(self.column_name).alias("mean_val")).collect()
+ mean_val = mean_result[0]["mean_val"]
+
+ if mean_val is None:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' contains only null values.",
+ )
+
+ if self.min_value <= mean_val <= self.max_value:
+ return DataFrameExpectationSuccessMessage(
+ expectation_name=self.get_expectation_name()
+ )
+ else:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.",
+ )
+ except Exception as e:
+ return DataFrameExpectationFailureMessage(
+ expectation_str=str(self),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=f"Error calculating mean for column '{self.column_name}': {str(e)}",
+ )
+
+Key considerations for aggregation-based expectations:
+
+1. **Performance**: Aggregation operations can be expensive, especially on large datasets in PySpark. Consider the performance implications of your aggregation logic.
+
+2. **Different DataFrame types**: Ensure your implementation works correctly for both Pandas and PySpark DataFrames, as aggregation APIs may differ (e.g., ``df.mean()`` vs ``F.avg()``).
+
+3. **Error handling**: Include proper error handling for edge cases like empty DataFrames or all-null columns.
+
+4. **Message clarity**: Provide clear, informative error messages that help users understand what went wrong.
+
+5. **Automatic column validation**: The ``DataFrameAggregationExpectation`` base class automatically validates that required columns exist before calling your ``aggregate_and_validate_*`` methods. Simply specify the required columns in the ``column_names`` parameter during initialization.
+
+6. **Focus on aggregation logic**: Since column validation is handled automatically, you can focus purely on implementing your aggregation and validation logic without worrying about column existence checks.
+
+Examples of aggregation-based expectations include:
+
+- ``ExpectationMinRows`` / ``ExpectationMaxRows``: Validate row count limits
+- ``ExpectationDistinctColumnValuesEquals``: Validate the number of distinct values in a column
+- ``ExpectationMaxNullPercentage``: Validate the percentage of null values in a column
+- ``ExpectationUniqueRows``: Validate that rows are unique across specified columns
+- ``ExpectationColumnMeanBetween``: Validate that column mean falls within a range
+- ``ExpectationColumnQuantileBetween``: Validate that column quantiles fall within ranges
+
+For more examples, check the aggregation_expectations module.
+
+Custom Expectations with Full Control
+--------------------------------------
+
+While the ``DataFrameColumnExpectation`` covers most use cases there might be other instances where you need more control
+over the validation logic. For such instances you can define a new expectation by inheriting the ``DataFrameExpectation``
+class.
+
+To help you get started, here's a template you can customize to fit your specific use case:
+
+.. code-block:: python
+
+ from typing import Callable
+
+ from dataframe_expectations import DataFrameLike, DataFrameType
+ from dataframe_expectations.expectations import DataFrameExpectation
+ from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+ )
+
+ class ExpectTheUnexpected(DataFrameExpectation):
+ """
+ Description of the expectation
+ """
+
+ def __init__(self, ):
+ """
+ Initialize the expectation. For example:
+ - column_name: The name of the column to validate.
+ - value: The expected threshold for validation.
+ """
+
+ pass
+
+ def get_description(self) -> str:
+ """
+ Returns a description of the expectation.
+ """
+ return
+
+ def validate_pandas(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a pandas DataFrame against the expectation.
+ """
+
+
+ def validate_pyspark(
+ self, data_frame: DataFrameLike, **kwargs
+ ) -> DataFrameExpectationResultMessage:
+ """
+ Validate a PySpark DataFrame against the expectation.
+ """
+
+
+Adding to DataFrameExpectationsSuite
+-------------------------------------
+
+The ``DataFrameExpectationsSuite`` encapsulates all the expectations that are provided by this library.
+After defining and testing your expectation, integrate it into the ``DataFrameExpectationsSuite`` by creating a new
+method with a descriptive name starting with the prefix ``expect_`` (this is needed to generate documentation later).
+Here's an example:
+
+.. code-block:: python
+
+ class DataFrameExpectationsSuite:
+ """
+ A suite of expectations for validating DataFrames.
+ """
+ ...
+
+ def expect_is_divisible(
+ self,
+ column_name: str,
+ value: float,
+ # You can add more parmeters here
+ ):
+ """
+ Define what the expectation does
+ :param column_name: The name of the column to check.
+ :param value: The value to compare against.
+ :return: An instance of DataFrameExpectationsSuite.
+ """
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectIsDivisible",
+ column_name=column_name,
+ value=value,
+ )
+
+ logger.info(f"Adding expectation: {expectation}")
+ self.__expectations.append(expectation)
+ return self
+
+Adding Unit Tests
+-----------------
+
+To ensure your expectations work as expected (pun intended), make sure to add unit tests in the
+``tests/data_engineering/dataframe_expectations/expectations_implemented`` folder. Here's a template to get you started:
+
+.. code-block:: python
+
+ import pytest
+ import pandas as pd
+
+ from dataframe_expectations import DataFrameType
+ from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+ )
+ from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+ )
+
+
+ def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ This method should be implemented in the subclass.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDoesSomeCheck",
+ column_name="col1",
+ value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDoesSomeCheck", f"Expected 'ExpectationDoesSomeCheck' but got: {expectation.get_expectation_name()}"
+
+ def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_expectation_pandas_violations():
+ """
+ Test the expectation for pandas DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_expectation_pyspark_violations(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+ def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+For concrete examples of unit tests, check for tests in the ``expectations_implemented`` folder. You can also
+find the unit test template here.
+
+Updating the Documentation
+--------------------------
+
+After the expectation is ready for use, the last thing remaining is adding your expectation to the documentation. The documentation is automatically generated using a CI pipeline with the ``uv`` package manager and is available at ``docs/build/html/expectations.html``.
+
+Make sure to add the docstring for the function you added to ``DataFrameExpectationsSuite`` before submitting your changes. The CI pipeline will automatically update the documentation using the make targets in the ``docs`` folder when your changes are merged.
+
+If you need to build the documentation locally for testing, you can use the make targets available in the ``docs`` folder.
+
+.. code-block:: bash
+
+ cd docs
+ uv run sphinx-build source build/html
+
+or use the make command
+
+.. code-block:: bash
+
+ cd docs
+ make html
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
new file mode 100644
index 0000000..8c9c9b5
--- /dev/null
+++ b/docs/source/api_reference.rst
@@ -0,0 +1,62 @@
+API Reference
+=============
+
+This section provides detailed documentation for the core infrastructure classes and modules in the DataFrame Expectations library. For user-facing expectation methods, see :doc:`expectations`.
+
+Core Infrastructure
+-------------------
+
+Base Expectation Classes
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: dataframe_expectations.expectations
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Column Expectations
+~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: dataframe_expectations.expectations.column_expectation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Aggregation Expectations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: dataframe_expectations.expectations.aggregation_expectation
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Expectation Registry
+--------------------
+
+.. automodule:: dataframe_expectations.expectations.expectation_registry
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Result Messages
+---------------
+
+.. automodule:: dataframe_expectations.result_message
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Utilities
+---------
+
+.. automodule:: dataframe_expectations.expectations.utils
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Exception Classes
+-----------------
+
+.. automodule:: dataframe_expectations.expectations_suite
+ :members: DataFrameExpectationsSuiteFailure
+ :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..f75ec19
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,83 @@
+import os
+import sys
+
+# Add the project root and extension directories to the path
+sys.path.insert(0, os.path.abspath('../../'))
+sys.path.insert(0, os.path.abspath('_ext'))
+
+# Project information
+project = 'DataFrame Expectations'
+copyright = '2024, Your Name'
+author = 'Your Name'
+release = '0.1.0'
+
+# Extensions
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.viewcode',
+ 'sphinx.ext.napoleon', # For Google/NumPy style docstrings
+ 'sphinx.ext.intersphinx',
+ 'expectations_autodoc', # Our custom extension
+]
+
+# Theme
+html_theme = 'pydata_sphinx_theme'
+
+# PyData theme options for modern, full-width usage
+html_theme_options = {
+ "use_edit_page_button": False,
+ "navigation_depth": 3,
+ "show_prev_next": True,
+ "navbar_persistent": ["search-button"],
+ "navbar_center": ["navbar-nav"],
+ "navbar_end": [],
+ "sidebar_includehidden": True,
+ "primary_sidebar_end": ["page-toc"],
+ "secondary_sidebar_items": [],
+ "show_toc_level": 3,
+}
+
+# Autodoc settings
+autodoc_default_options = {
+ 'members': True,
+ 'undoc-members': True,
+ 'show-inheritance': True,
+ 'special-members': '__init__',
+}
+
+# Napoleon settings for docstring parsing
+napoleon_google_docstring = True
+napoleon_numpy_docstring = True
+napoleon_include_init_with_doc = False
+napoleon_include_private_with_doc = False
+
+# Intersphinx mapping for cross-references
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3', None),
+ 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
+ 'pyspark': ('https://spark.apache.org/docs/latest/api/python/', None),
+}
+
+# HTML output options
+html_static_path = ['_static']
+html_css_files = [
+ 'custom.css',
+]
+
+# Configure HTML title and layout
+html_title = f"{project} v{release} Documentation"
+html_short_title = project
+
+# PyData theme context
+html_context = {
+ 'display_github': True,
+ 'github_user': 'getyourguide',
+ 'github_repo': 'dataframe-expectations',
+ 'github_version': 'main',
+ 'doc_path': 'docs/source/',
+}
+
+# Logo configuration
+html_logo = None # You can add a logo path here if needed
+html_favicon = None # You can add a favicon path here if needed
diff --git a/docs/source/expectations.rst b/docs/source/expectations.rst
new file mode 100644
index 0000000..8c82b3b
--- /dev/null
+++ b/docs/source/expectations.rst
@@ -0,0 +1,11 @@
+Expectation Gallery
+===================
+
+
+This page provides comprehensive documentation for all available DataFrame expectations.
+The expectations are automatically categorized and organized for easy browsing.
+
+.. expectations::
+ :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite
+ :show-summary:
+ :show-cards:
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
new file mode 100644
index 0000000..322dd6a
--- /dev/null
+++ b/docs/source/getting_started.rst
@@ -0,0 +1,122 @@
+Getting Started
+===============
+
+Welcome to DataFrame Expectations! This guide will help you get up and running quickly with validating your Pandas and PySpark DataFrames.
+
+Installation
+------------
+
+Install DataFrame Expectations using pip:
+
+.. code-block:: bash
+
+ pip install dataframe-expectations
+
+Requirements
+~~~~~~~~~~~~
+
+* Python 3.10+
+* pandas >= 1.5.0
+* pyspark >= 3.3.0
+* tabulate >= 0.8.9
+
+Basic Usage
+-----------
+
+DataFrame Expectations provides a fluent API for building validation suites. Here's how to get started:
+
+Pandas Example
+~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+ import pandas as pd
+ from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
+
+ # Create a sample DataFrame
+ df = pd.DataFrame({
+ "age": [25, 15, 45, 22],
+ "name": ["Alice", "Bob", "Charlie", "Diana"],
+ "salary": [50000, 60000, 80000, 45000]
+ })
+
+ # Build a validation suite
+ suite = (
+ DataFrameExpectationsSuite()
+ .expect_min_rows(3) # At least 3 rows
+ .expect_max_rows(10) # At most 10 rows
+ .expect_value_greater_than("age", 18) # All ages > 18
+ .expect_value_less_than("salary", 100000) # All salaries < 100k
+ .expect_value_not_null("name") # No null names
+ )
+
+ # Run validation
+ suite.run(df)
+
+
+PySpark Example
+~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+ from pyspark.sql import SparkSession
+ from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
+
+ # Initialize Spark
+ spark = SparkSession.builder.appName("DataFrameExpectations").getOrCreate()
+
+ # Create a sample DataFrame
+ data = [
+ {"age": 25, "name": "Alice", "salary": 50000},
+ {"age": 15, "name": "Bob", "salary": 60000},
+ {"age": 45, "name": "Charlie", "salary": 80000},
+ {"age": 22, "name": "Diana", "salary": 45000}
+ ]
+ df = spark.createDataFrame(data)
+
+ # Build a validation suite (same API as Pandas!)
+ suite = (
+ DataFrameExpectationsSuite()
+ .expect_min_rows(3)
+ .expect_max_rows(10)
+ .expect_value_greater_than("age", 18)
+ .expect_value_less_than("salary", 100000)
+ .expect_value_not_null("name")
+ )
+
+ # Run validation
+ suite.run(df)
+
+Example Output
+~~~~~~~~~~~~~~
+
+When validations fail, you'll see detailed output like this:
+
+.. code-block:: text
+
+ ========================== Running expectations suite ==========================
+ ExpectationMinRows (DataFrame contains at least 3 rows) ... OK
+ ExpectationMaxRows (DataFrame contains at most 10 rows) ... OK
+ ExpectationValueGreaterThan ('age' is greater than 18) ... FAIL
+ ExpectationValueLessThan ('salary' is less than 100000) ... OK
+ ExpectationValueNotNull ('name' is not null) ... OK
+ ============================ 4 success, 1 failures =============================
+
+ ExpectationSuiteFailure: (1/5) expectations failed.
+
+ ================================================================================
+ List of violations:
+ --------------------------------------------------------------------------------
+ [Failed 1/1] ExpectationValueGreaterThan ('age' is greater than 18): Found 1 row(s) where 'age' is not greater than 18.
+ Some examples of violations:
+ +-----+------+--------+
+ | age | name | salary |
+ +-----+------+--------+
+ | 15 | Bob | 60000 |
+ +-----+------+--------+
+ ================================================================================
+
+How to contribute?
+------------------
+Contributions are welcome! You can enhance the library by adding new expectations, refining existing ones, or improving
+the testing framework or the documentation.
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..06143a2
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,22 @@
+DataFrame Expectations
+======================
+
+**DataFrameExpectations** is a Python library designed to validate **Pandas** and **PySpark** DataFrames using
+customizable, reusable expectations. It simplifies testing in data pipelines and end-to-end workflows by providing a
+standardized framework for DataFrame validation.
+
+Instead of using different validation approaches for DataFrames, this library provides a standardized solution for this
+use case. As a result, any contributions made here, such as adding new expectations, can be leveraged by all users of
+the library.
+
+See the starter guide :doc:`here `.
+See the complete list of expectations :doc:`here `.
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ getting_started
+ adding_expectations
+ expectations
+ api_reference
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..c3ea1a7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,41 @@
+[project]
+name = "dataframe-expectations"
+version = "0.1.0"
+description = "Python library designed to validate Pandas and PySpark DataFrames using customizable, reusable expectations"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "pandas>=1.5.0",
+ "pyspark>=3.3.0",
+ "tabulate>=0.8.9",
+]
+
+[dependency-groups]
+dev = [
+ "numpy>=1.21.0",
+ "pytest>=7.0.0",
+ "pre-commit>=2.20.0",
+ "ruff>=0.1.0",
+ "pytest-cov>=4.0.0",
+]
+docs = [
+ "sphinx>=4.0.0",
+ "pydata-sphinx-theme>=0.13.0",
+ "sphinx-autobuild>=2021.3.14",
+ "pyspark>=3.3.0",
+ "pandas>=1.5.0",
+ "tabulate>=0.8.9",
+]
+
+
+[tool.ruff]
+target-version = "py310" # Target the minimum supported version
+line-length = 100
+
+[tool.mypy]
+python_version = "3.10" # Target the minimum supported version
+warn_unused_configs = false
+disallow_untyped_defs = false
+warn_return_any = false
+ignore_missing_imports = true
+check_untyped_defs = false
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..833a626
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,26 @@
+import pytest
+from pyspark.sql import SparkSession
+import pandas as pd
+import pandas.testing as pdt
+
+
+@pytest.fixture(scope="module")
+def spark() -> SparkSession:
+ """create a spark session we can reuse for every test"""
+
+ return SparkSession.builder.master("local").appName("Test").getOrCreate()
+
+
+def assert_pandas_df_equal(df1: pd.DataFrame, df2: pd.DataFrame):
+ # Optional: sort and reset index to avoid false mismatches due to row order
+ df1_sorted = df1.sort_values(by=df1.columns.tolist()).reset_index(drop=True)
+ df2_sorted = df2.sort_values(by=df2.columns.tolist()).reset_index(drop=True)
+
+ pdt.assert_frame_equal(df1_sorted, df2_sorted, check_dtype=False)
+
+
+def assert_pyspark_df_equal(df1, df2):
+ df1_pd = df1.toPandas().sort_values(by=df1.columns).reset_index(drop=True)
+ df2_pd = df2.toPandas().sort_values(by=df2.columns).reset_index(drop=True)
+
+ pd.testing.assert_frame_equal(df1_pd, df2_pd, check_dtype=False)
diff --git a/tests/expectations_helper_classes/__init__.py b/tests/expectations_helper_classes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/expectations_helper_classes/test_column_expectations.py b/tests/expectations_helper_classes/test_column_expectations.py
new file mode 100644
index 0000000..7827b7c
--- /dev/null
+++ b/tests/expectations_helper_classes/test_column_expectations.py
@@ -0,0 +1,69 @@
+import pytest
+from unittest.mock import MagicMock
+
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.column_expectation import (
+ DataFrameColumnExpectation,
+)
+
+
+@pytest.fixture
+def expectation():
+ return DataFrameColumnExpectation(
+ expectation_name="MyColumnExpectation",
+ column_name="test_column",
+ fn_violations_pandas=lambda df: df,
+ fn_violations_pyspark=lambda df: df,
+ description="Test column expectation",
+ error_message="Test column expectation failed.",
+ )
+
+
+def test_get_expectation_name(expectation):
+ """
+ Test that the expectation name is the class name.
+ """
+ assert expectation.get_expectation_name() == "MyColumnExpectation", (
+ f"Expected 'MyColumnExpectation' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_validate_for_pandas_df(expectation):
+ """
+ Test whether row_validation() and get_filter_fn() methods are called with the right parameters for Pandas.
+ """
+
+ # Mock methods
+ expectation.row_validation = MagicMock(return_value="mock_result")
+
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+
+ # test validate_pandas called the right methods
+ _ = expectation.validate(data_frame=data_frame)
+
+ expectation.row_validation.assert_called_once_with(
+ data_frame_type=DataFrameType.PANDAS,
+ data_frame=data_frame,
+ fn_violations=expectation.fn_violations_pandas,
+ )
+
+
+def test_validate_for_pyspark_df(expectation, spark):
+ """
+ Test whether row_validation() and get_filter_fn() methods are called with the right parameters for PySpark.
+ """
+
+ # Mock methods
+ expectation.row_validation = MagicMock(return_value="mock_result")
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+
+ # test validate_pyspark called the right methods
+ _ = expectation.validate(data_frame=data_frame)
+
+ expectation.row_validation.assert_called_once_with(
+ data_frame_type=DataFrameType.PYSPARK,
+ data_frame=data_frame,
+ fn_violations=expectation.fn_violations_pyspark,
+ )
diff --git a/tests/expectations_helper_classes/test_expectation_registry.py b/tests/expectations_helper_classes/test_expectation_registry.py
new file mode 100644
index 0000000..611ec5b
--- /dev/null
+++ b/tests/expectations_helper_classes/test_expectation_registry.py
@@ -0,0 +1,121 @@
+import pytest
+
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+
+
+class DummyExpectation:
+ def __init__(self, foo=None):
+ self.foo = foo
+
+
+@pytest.fixture(autouse=True)
+def cleanup_registry():
+ # Save the original state of the registry
+ original = set(DataFrameExpectationRegistry.list_expectations())
+
+ yield
+
+ # Remove any expectations added during the test
+ current = set(DataFrameExpectationRegistry.list_expectations())
+ for name in current - original:
+ DataFrameExpectationRegistry.remove_expectation(name)
+
+
+def test_register_and_get_expectation():
+ """
+ Test registering and retrieving an expectation.
+ """
+
+ @DataFrameExpectationRegistry.register("DummyExpectation")
+ def dummy_expectation_factory(foo=None):
+ return DummyExpectation(foo=foo)
+
+ instance = DataFrameExpectationRegistry.get_expectation("DummyExpectation", foo=123)
+ assert isinstance(instance, DummyExpectation), (
+ f"Expected DummyExpectation instance but got: {type(instance)}"
+ )
+ assert instance.foo == 123, f"Expected foo=123 but got: {instance.foo}"
+
+
+def test_duplicate_registration_raises():
+ """
+ Test that registering an expectation with the same name raises a ValueError.
+ """
+
+ @DataFrameExpectationRegistry.register("DuplicateExpectation")
+ def dummy1(foo=None):
+ return DummyExpectation(foo=foo)
+
+ with pytest.raises(ValueError) as context:
+
+ @DataFrameExpectationRegistry.register("DuplicateExpectation")
+ def dummy2(foo=None):
+ return DummyExpectation(foo=foo)
+
+ assert "already registered" in str(context.value), (
+ f"Expected 'already registered' in error message but got: {str(context.value)}"
+ )
+
+
+def test_get_unknown_expectation_raises():
+ """
+ Test that trying to get an unknown expectation raises a ValueError.
+ """
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation("NonExistent")
+ assert "Unknown expectation" in str(context.value), (
+ f"Expected 'Unknown expectation' in error message but got: {str(context.value)}"
+ )
+
+
+def test_list_expectations():
+ """
+ Test listing all registered expectations.
+ """
+
+ @DataFrameExpectationRegistry.register("First")
+ def dummy1(foo=None):
+ return DummyExpectation(foo=foo)
+
+ @DataFrameExpectationRegistry.register("Second")
+ def dummy2(foo=None):
+ return DummyExpectation(foo=foo)
+
+ names = DataFrameExpectationRegistry.list_expectations()
+ assert "First" in names, f"Expected 'First' in expectations list but got: {names}"
+ assert "Second" in names, f"Expected 'Second' in expectations list but got: {names}"
+
+
+def test_remove_expectation():
+ """
+ Test removing an expectation from the registry.
+ """
+
+ @DataFrameExpectationRegistry.register("ToRemove")
+ def dummy(foo=None):
+ return DummyExpectation(foo=foo)
+
+ names_before = DataFrameExpectationRegistry.list_expectations()
+ assert "ToRemove" in names_before, (
+ f"Expected 'ToRemove' in expectations list before removal but got: {names_before}"
+ )
+
+ DataFrameExpectationRegistry.remove_expectation("ToRemove")
+
+ names_after = DataFrameExpectationRegistry.list_expectations()
+ assert "ToRemove" not in names_after, (
+ f"Expected 'ToRemove' not in expectations list after removal but got: {names_after}"
+ )
+
+
+def test_remove_nonexistent_expectation_raises():
+ """
+ Test that trying to remove a non-existent expectation raises a ValueError.
+ """
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.remove_expectation("DefinitelyNotThere")
+ assert "not found" in str(context.value), (
+ f"Expected 'not found' in error message but got: {str(context.value)}"
+ )
diff --git a/tests/expectations_helper_classes/test_expectations.py b/tests/expectations_helper_classes/test_expectations.py
new file mode 100644
index 0000000..7826956
--- /dev/null
+++ b/tests/expectations_helper_classes/test_expectations.py
@@ -0,0 +1,302 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+import pandas as pd
+
+from dataframe_expectations import DataFrameLike, DataFrameType
+from dataframe_expectations.expectations import DataFrameExpectation
+
+
+class MyTestExpectation(DataFrameExpectation):
+ def validate_pandas(self, data_frame: DataFrameLike, **kwargs):
+ """
+ Mock implementation for pandas DataFrame validation.
+ """
+ return "pandas validation successful"
+
+ def validate_pyspark(self, data_frame: DataFrameLike, **kwargs):
+ """
+ Mock implementation for PySpark DataFrame validation.
+ """
+ return "pyspark validation successful"
+
+ def get_description(self):
+ return "This is a test expectation for unit testing purposes."
+
+
+class MockConnectDataFrame:
+ """Mock class to simulate pyspark.sql.connect.dataframe.DataFrame"""
+
+ def __init__(self):
+ pass
+
+
+def test_data_frame_type_enum():
+ """
+ Test that the DataFrameType enum has the correct values.
+ """
+ assert DataFrameType.PANDAS.value == "pandas", (
+ f"Expected 'pandas' but got: {DataFrameType.PANDAS.value}"
+ )
+ assert DataFrameType.PYSPARK.value == "pyspark", (
+ f"Expected 'pyspark' but got: {DataFrameType.PYSPARK.value}"
+ )
+
+ # Test string comparison (now works directly!)
+ assert DataFrameType.PANDAS == "pandas", "Expected DataFrameType.PANDAS == 'pandas' to be True"
+ assert DataFrameType.PYSPARK == "pyspark", (
+ "Expected DataFrameType.PYSPARK == 'pyspark' to be True"
+ )
+
+
+def test_get_expectation_name():
+ """
+ Test that the expectation name is the class name.
+ """
+ expectation = MyTestExpectation()
+ assert expectation.get_expectation_name() == "MyTestExpectation", (
+ f"Expected 'MyTestExpectation' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_validate_unsupported_dataframe_type():
+ """
+ Test that an error is raised for unsupported DataFrame types.
+ """
+ expectation = MyTestExpectation()
+ with pytest.raises(ValueError):
+ expectation.validate(None)
+
+
+def test_validate_pandas_called():
+ """
+ Test that validate_pandas method is called and with right parameters.
+ """
+ expectation = MyTestExpectation()
+
+ # Mock the validate_pandas method
+ expectation.validate_pandas = MagicMock(return_value="mock_result")
+
+ # Assert that validate_pandas was called with the correct arguments
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ _ = expectation.validate(data_frame=data_frame)
+ expectation.validate_pandas.assert_called_once_with(data_frame=data_frame)
+
+ with pytest.raises(ValueError):
+ expectation.validate(None)
+
+
+def test_validate_pyspark_called(spark):
+ """
+ Test that validate_pyspark method is called with right parameters.
+ """
+ expectation = MyTestExpectation()
+
+ # Mock the validate_pyspark method
+ expectation.validate_pyspark = MagicMock(return_value="mock_result")
+
+ # Assert that validate_pyspark was called with the correct arguments
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ _ = expectation.validate(data_frame=data_frame)
+ expectation.validate_pyspark.assert_called_once_with(data_frame=data_frame)
+
+ with pytest.raises(ValueError):
+ expectation.validate(None)
+
+
+def test_num_data_frame_rows(spark):
+ """
+ Test that the number of rows in a DataFrame are counted correctly.
+ """
+ expectation = MyTestExpectation()
+
+ # 1. Non empty DataFrames
+ # Mock a pandas DataFrame
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ num_rows = expectation.num_data_frame_rows(pandas_df)
+ assert num_rows == 3, f"Expected 3 rows for pandas DataFrame but got: {num_rows}"
+
+ # Mock a PySpark DataFrame
+ spark_df = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ num_rows = expectation.num_data_frame_rows(spark_df)
+ assert num_rows == 3, f"Expected 3 rows for PySpark DataFrame but got: {num_rows}"
+
+ # Test unsupported DataFrame type
+ with pytest.raises(ValueError):
+ expectation.num_data_frame_rows(None)
+
+ # 2. Empty DataFrames
+ # Mock an empty pandas DataFrame
+ empty_pandas_df = pd.DataFrame(columns=["col1", "col2"])
+ num_rows = expectation.num_data_frame_rows(empty_pandas_df)
+ assert num_rows == 0, f"Expected 0 rows for empty pandas DataFrame but got: {num_rows}"
+
+ # Mock an empty PySpark DataFrame
+ empty_spark_df = spark.createDataFrame([], "col1 INT, col2 STRING")
+ num_rows = expectation.num_data_frame_rows(empty_spark_df)
+ assert num_rows == 0, f"Expected 0 rows for empty PySpark DataFrame but got: {num_rows}"
+
+ # Test unsupported DataFrame type
+ with pytest.raises(ValueError):
+ expectation.num_data_frame_rows(None)
+
+
+def test_infer_data_frame_type(spark):
+ """
+ Test that the DataFrame type is inferred correctly for all supported DataFrame types.
+ """
+ expectation = MyTestExpectation()
+
+ # Test pandas DataFrame
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ data_frame_type = expectation.infer_data_frame_type(pandas_df)
+ assert data_frame_type == DataFrameType.PANDAS, (
+ f"Expected PANDAS type but got: {data_frame_type}"
+ )
+
+ # Test PySpark DataFrame
+ spark_df = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ data_frame_type = expectation.infer_data_frame_type(spark_df)
+ assert data_frame_type == DataFrameType.PYSPARK, (
+ f"Expected PYSPARK type but got: {data_frame_type}"
+ )
+
+ # Test empty pandas DataFrame
+ empty_pandas_df = pd.DataFrame(columns=["col1", "col2"])
+ data_frame_type = expectation.infer_data_frame_type(empty_pandas_df)
+ assert data_frame_type == DataFrameType.PANDAS, (
+ f"Expected PANDAS type for empty DataFrame but got: {data_frame_type}"
+ )
+
+ # Test empty PySpark DataFrame
+ empty_spark_df = spark.createDataFrame([], "col1 INT, col2 STRING")
+ data_frame_type = expectation.infer_data_frame_type(empty_spark_df)
+ assert data_frame_type == DataFrameType.PYSPARK, (
+ f"Expected PYSPARK type for empty DataFrame but got: {data_frame_type}"
+ )
+
+ # Test unsupported DataFrame types
+ with pytest.raises(ValueError) as context:
+ expectation.infer_data_frame_type(None)
+ assert "Unsupported DataFrame type" in str(context.value), (
+ f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}"
+ )
+
+ with pytest.raises(ValueError) as context:
+ expectation.infer_data_frame_type("not_a_dataframe")
+ assert "Unsupported DataFrame type" in str(context.value), (
+ f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}"
+ )
+
+ with pytest.raises(ValueError) as context:
+ expectation.infer_data_frame_type([1, 2, 3])
+ assert "Unsupported DataFrame type" in str(context.value), (
+ f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}"
+ )
+
+ with pytest.raises(ValueError) as context:
+ expectation.infer_data_frame_type({"col1": [1, 2, 3]})
+ assert "Unsupported DataFrame type" in str(context.value), (
+ f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}"
+ )
+
+ # Test with objects that might have similar attributes but aren't DataFrames
+ class FakeDataFrame:
+ def count(self):
+ return 5
+
+ def collect(self):
+ return []
+
+ fake_df = FakeDataFrame()
+ with pytest.raises(ValueError):
+ expectation.infer_data_frame_type(fake_df)
+
+ # Test with numeric types
+ with pytest.raises(ValueError):
+ expectation.infer_data_frame_type(42)
+
+ # Test with boolean
+ with pytest.raises(ValueError):
+ expectation.infer_data_frame_type(True)
+
+
+def test_infer_data_frame_type_with_connect_dataframe_available():
+ """
+ Test that PySpark Connect DataFrame is correctly identified when available.
+ """
+ expectation = MyTestExpectation()
+
+ # Patch the PySparkConnectDataFrame import to be our mock class
+ with patch(
+ "dataframe_expectations.expectations.PySparkConnectDataFrame",
+ MockConnectDataFrame,
+ ):
+ # Create an instance of our mock Connect DataFrame
+ mock_connect_df = MockConnectDataFrame()
+
+ # Test that Connect DataFrame is identified as PYSPARK type
+ data_frame_type = expectation.infer_data_frame_type(mock_connect_df)
+ assert data_frame_type == DataFrameType.PYSPARK, (
+ f"Expected PYSPARK type for Connect DataFrame but got: {data_frame_type}"
+ )
+
+
+@patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None)
+def test_infer_data_frame_type_without_connect_support(spark):
+ """
+ Test that the method works correctly when PySpark Connect is not available.
+ """
+ expectation = MyTestExpectation()
+
+ # Test that regular DataFrames still work when Connect is not available
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3]})
+ data_frame_type = expectation.infer_data_frame_type(pandas_df)
+ assert data_frame_type == DataFrameType.PANDAS, (
+ f"Expected PANDAS type but got: {data_frame_type}"
+ )
+
+ spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ data_frame_type = expectation.infer_data_frame_type(spark_df)
+ assert data_frame_type == DataFrameType.PYSPARK, (
+ f"Expected PYSPARK type but got: {data_frame_type}"
+ )
+
+
+def test_infer_data_frame_type_connect_import_behavior(spark):
+ """
+ Test that the Connect DataFrame import behavior works as expected.
+ """
+ expectation = MyTestExpectation()
+
+ # Test case 1: When PySparkConnectDataFrame is None (import failed)
+ with patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None):
+ # Should still work with regular DataFrames
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3]})
+ result_type = expectation.infer_data_frame_type(pandas_df)
+ assert result_type == DataFrameType.PANDAS, f"Expected PANDAS type but got: {result_type}"
+
+ spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result_type = expectation.infer_data_frame_type(spark_df)
+ assert result_type == DataFrameType.PYSPARK, f"Expected PYSPARK type but got: {result_type}"
+
+ # Test case 2: When PySparkConnectDataFrame is available (mocked)
+ with patch(
+ "dataframe_expectations.expectations.PySparkConnectDataFrame",
+ MockConnectDataFrame,
+ ):
+ # Regular DataFrames should still work
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3]})
+ result_type = expectation.infer_data_frame_type(pandas_df)
+ assert result_type == DataFrameType.PANDAS, f"Expected PANDAS type but got: {result_type}"
+
+ spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result_type = expectation.infer_data_frame_type(spark_df)
+ assert result_type == DataFrameType.PYSPARK, f"Expected PYSPARK type but got: {result_type}"
+
+ # Mock Connect DataFrame should be identified as PYSPARK
+ mock_connect_df = MockConnectDataFrame()
+ result_type = expectation.infer_data_frame_type(mock_connect_df)
+ assert result_type == DataFrameType.PYSPARK, (
+ f"Expected PYSPARK type for Connect DataFrame but got: {result_type}"
+ )
diff --git a/tests/expectations_helper_classes/test_utils.py b/tests/expectations_helper_classes/test_utils.py
new file mode 100644
index 0000000..f435c4d
--- /dev/null
+++ b/tests/expectations_helper_classes/test_utils.py
@@ -0,0 +1,90 @@
+from typing import Union
+import pytest
+
+from dataframe_expectations.expectations.utils import requires_params
+
+
+def test_requires_params_success():
+ """
+ Test that all required parameters are provided.
+ """
+
+ @requires_params("a", "b")
+ def func(**kwargs):
+ return kwargs["a"] + kwargs["b"]
+
+ result = func(a=1, b=2)
+ assert result == 3, f"Expected 3 but got: {result}"
+
+
+def test_requires_params_missing_param():
+ """
+ Test that a ValueError is raised when a required parameter is missing.
+ """
+
+ @requires_params("a", "b")
+ def func(**kwargs):
+ return kwargs["a"] + kwargs["b"]
+
+ with pytest.raises(ValueError) as context:
+ func(a=1)
+ assert "missing required parameters" in str(context.value), (
+ f"Expected 'missing required parameters' in error message but got: {str(context.value)}"
+ )
+
+
+def test_requires_params_type_success():
+ """
+ Test that type validation works correctly when types are specified.
+ """
+
+ @requires_params("a", "b", types={"a": int, "b": str})
+ def func(**kwargs):
+ return f"{kwargs['a']}-{kwargs['b']}"
+
+ result = func(a=5, b="hello")
+ assert result == "5-hello", f"Expected '5-hello' but got: {result}"
+
+
+def test_requires_params_type_error():
+ """
+ Test that a TypeError is raised when a parameter does not match the expected type."""
+
+ @requires_params("a", "b", types={"a": int, "b": str})
+ def func(**kwargs):
+ return f"{kwargs['a']}-{kwargs['b']}"
+
+ with pytest.raises(TypeError) as context:
+ func(a="not-an-int", b="hello")
+ assert "type validation errors" in str(context.value), (
+ f"Expected 'type validation errors' in error message but got: {str(context.value)}"
+ )
+
+
+def test_requires_params_union_type_success():
+ """
+ Test that Union types are handled correctly.
+ """
+
+ @requires_params("a", types={"a": Union[int, str]})
+ def func(**kwargs):
+ return kwargs["a"]
+
+ result1 = func(a=5)
+ assert result1 == 5, f"Expected 5 but got: {result1}"
+
+ result2 = func(a="foo")
+ assert result2 == "foo", f"Expected 'foo' but got: {result2}"
+
+
+def test_requires_params_union_type_error():
+ """
+ Test that a TypeError is raised when a parameter does not match any type in a Union.
+ """
+
+ @requires_params("a", types={"a": Union[int, str]})
+ def func(**kwargs):
+ return kwargs["a"]
+
+ with pytest.raises(TypeError):
+ func(a=3.14)
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py
new file mode 100644
index 0000000..076b0cf
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py
@@ -0,0 +1,562 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesBetween", (
+ f"Expected 'ExpectationDistinctColumnValuesBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is within range [2, 5]
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesBetween"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nulls():
+ """
+ Test the expectation for pandas DataFrame with NaN values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=4,
+ )
+ # DataFrame with 3 distinct values [1, 2, NaN] which is within range [3, 4]
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesBetween"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations_too_few():
+ """
+ Test the expectation for pandas DataFrame with too few distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=4,
+ max_value=6,
+ )
+ # DataFrame with 2 distinct values [1, 2] which is below range [4, 6]
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 2 distinct values, expected between 4 and 6.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_violations_too_many():
+ """
+ Test the expectation for pandas DataFrame with too many distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=3,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is above range [2, 3]
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 5 distinct values, expected between 2 and 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_exact_boundaries():
+ """
+ Test the expectation for pandas DataFrame with distinct counts exactly at boundaries.
+ """
+ # Test exact minimum boundary
+ expectation_min = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=5,
+ )
+ data_frame_min = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values
+ result_min = expectation_min.validate(data_frame=data_frame_min)
+ assert isinstance(result_min, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_min)}"
+ )
+
+ # Test exact maximum boundary
+ expectation_max = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=5,
+ )
+ data_frame_max = pd.DataFrame({"col1": [1, 2, 3, 4, 5, 1]}) # 5 distinct values
+ result_max = expectation_max.validate(data_frame=data_frame_max)
+ assert isinstance(result_max, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_max)}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is within range [2, 5]
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesBetween"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_with_nulls(spark):
+ """
+ Test the expectation for PySpark DataFrame with null values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=4,
+ )
+ # DataFrame with 3 distinct values [1, 2, null] which is within range [3, 4]
+ data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesBetween"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations_too_few(spark):
+ """
+ Test the expectation for PySpark DataFrame with too few distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=4,
+ max_value=6,
+ )
+ # DataFrame with 2 distinct values [1, 2] which is below range [4, 6]
+ data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 2 distinct values, expected between 4 and 6.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_violations_too_many(spark):
+ """
+ Test the expectation for PySpark DataFrame with too many distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=3,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is above range [2, 3]
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 5 distinct values, expected between 2 and 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_exact_boundaries(spark):
+ """
+ Test the expectation for PySpark DataFrame with distinct counts exactly at boundaries.
+ """
+ # Test exact minimum boundary
+ expectation_min = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=5,
+ )
+ data_frame_min = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # 3 distinct values
+ result_min = expectation_min.validate(data_frame=data_frame_min)
+ assert isinstance(result_min, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_min)}"
+ )
+
+ # Test exact maximum boundary
+ expectation_max = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=5,
+ )
+ data_frame_max = spark.createDataFrame(
+ [(1,), (2,), (3,), (4,), (5,), (1,)], ["col1"]
+ ) # 5 distinct values
+ result_max = expectation_max.validate(data_frame=data_frame_max)
+ assert isinstance(result_max, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_max)}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test that an error is raised when the specified column is missing.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_invalid_parameters():
+ """
+ Test that appropriate errors are raised for invalid parameters.
+ """
+ # Test negative min_value
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=-1,
+ max_value=5,
+ )
+ assert "min_value must be non-negative" in str(context.value), (
+ f"Expected 'min_value must be non-negative' in error message: {str(context.value)}"
+ )
+
+ # Test negative max_value
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=-1,
+ )
+ assert "max_value must be non-negative" in str(context.value), (
+ f"Expected 'max_value must be non-negative' in error message: {str(context.value)}"
+ )
+
+ # Test min_value > max_value
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=5,
+ max_value=2,
+ )
+ assert "min_value (5) must be <= max_value (2)" in str(context.value), (
+ f"Expected 'min_value (5) must be <= max_value (2)' in error message: {str(context.value)}"
+ )
+
+
+def test_edge_case_zero_range():
+ """
+ Test the expectation when min_value equals max_value (zero range).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=3,
+ )
+ # DataFrame with exactly 3 distinct values
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+ # DataFrame with 2 distinct values (should fail)
+ data_frame_fail = pd.DataFrame({"col1": [1, 2, 1, 2, 1]})
+ result_fail = expectation.validate(data_frame=data_frame_fail)
+ assert isinstance(result_fail, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result_fail)}"
+ )
+
+
+def test_edge_case_empty_dataframe():
+ """
+ Test the expectation with an empty DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=0,
+ max_value=5,
+ )
+ # Empty DataFrame should have 0 distinct values
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_edge_case_single_value():
+ """
+ Test the expectation with a DataFrame containing a single distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=1,
+ max_value=1,
+ )
+ # DataFrame with 1 distinct value
+ data_frame = pd.DataFrame({"col1": [1, 1, 1, 1, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between(
+ column_name="col1", min_value=4, max_value=6
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, expected 4-6
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # 3 distinct values
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between(
+ column_name="col1", min_value=4, max_value=6
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (1,), (2,), (1,)], ["col1"]
+ ) # 2 distinct values, expected 4-6
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when the specified column is missing in PySpark DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_string_column_with_mixed_values():
+ """
+ Test the expectation with a string column containing mixed values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=3,
+ max_value=5,
+ )
+ # String column with 4 distinct values ["A", "B", "C", None]
+ data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_column_with_floats():
+ """
+ Test the expectation with a numeric column containing floats.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=4,
+ )
+ # Float column with 3 distinct values [1.1, 2.2, 3.3]
+ data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column():
+ """
+ Test the expectation with a boolean column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=2,
+ )
+ # Boolean column with 2 distinct values [True, False]
+ data_frame = pd.DataFrame({"col1": [True, False, True, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_datetime_column():
+ """
+ Test the expectation with a datetime column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=4,
+ )
+ # Datetime column with 3 distinct values
+ data_frame = pd.DataFrame(
+ {
+ "col1": pd.to_datetime(
+ [
+ "2023-01-01",
+ "2023-01-02",
+ "2023-01-03",
+ "2023-01-02",
+ "2023-01-01",
+ ]
+ )
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_performance():
+ """
+ Test the expectation with a larger dataset to ensure reasonable performance.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesBetween",
+ column_name="col1",
+ min_value=900,
+ max_value=1100,
+ )
+ # Create a DataFrame with exactly 1000 distinct values
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py
new file mode 100644
index 0000000..3bb97c7
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py
@@ -0,0 +1,640 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesEquals", (
+ f"Expected 'ExpectationDistinctColumnValuesEquals' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3]
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nulls():
+ """
+ Test the expectation for pandas DataFrame with NaN values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, NaN]
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations_too_few():
+ """
+ Test the expectation for pandas DataFrame with too few distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=5,
+ )
+ # DataFrame with 2 distinct values [1, 2] when expecting 5
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 2 distinct values, expected exactly 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_violations_too_many():
+ """
+ Test the expectation for pandas DataFrame with too many distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=2,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] when expecting 2
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 5 distinct values, expected exactly 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_zero_expected():
+ """
+ Test the expectation for pandas DataFrame expecting zero distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=0,
+ )
+ # Empty DataFrame should have 0 distinct values
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pandas_one_expected():
+ """
+ Test the expectation for pandas DataFrame expecting exactly one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=1,
+ )
+ # DataFrame with exactly 1 distinct value
+ data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3]
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_with_nulls(spark):
+ """
+ Test the expectation for PySpark DataFrame with null values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, null]
+ data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations_too_few(spark):
+ """
+ Test the expectation for PySpark DataFrame with too few distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=5,
+ )
+ # DataFrame with 2 distinct values [1, 2] when expecting 5
+ data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 2 distinct values, expected exactly 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_violations_too_many(spark):
+ """
+ Test the expectation for PySpark DataFrame with too many distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=2,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] when expecting 2
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 5 distinct values, expected exactly 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_zero_expected(spark):
+ """
+ Test the expectation for PySpark DataFrame expecting zero distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=0,
+ )
+ # Empty DataFrame should have 0 distinct values
+ data_frame = spark.createDataFrame([], "col1 INT")
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pyspark_one_expected(spark):
+ """
+ Test the expectation for PySpark DataFrame expecting exactly one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=1,
+ )
+ # DataFrame with exactly 1 distinct value
+ data_frame = spark.createDataFrame([(5,), (5,), (5,), (5,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test that an error is raised when the specified column is missing.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_invalid_parameters():
+ """
+ Test that appropriate errors are raised for invalid parameters.
+ """
+ # Test negative expected_value
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=-1,
+ )
+ assert "expected_value must be non-negative" in str(context.value), (
+ f"Expected 'expected_value must be non-negative' in error message: {str(context.value)}"
+ )
+
+
+def test_string_column_with_mixed_values():
+ """
+ Test the expectation with a string column containing mixed values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=4,
+ )
+ # String column with exactly 4 distinct values ["A", "B", "C", None]
+ data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_string_column_case_sensitive():
+ """
+ Test that string comparisons are case-sensitive for distinct counting.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=4,
+ )
+ # String column with 4 distinct values ["a", "A", "b", "B"] (case-sensitive)
+ data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_column_with_floats():
+ """
+ Test the expectation with a numeric column containing floats.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Float column with exactly 3 distinct values [1.1, 2.2, 3.3]
+ data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_precision_handling():
+ """
+ Test that numeric precision is handled correctly for distinct counting.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Values that might have precision issues but should be treated as distinct
+ data_frame = pd.DataFrame({"col1": [1.0, 1.1, 1.2, 1.0, 1.1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column():
+ """
+ Test the expectation with a boolean column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=2,
+ )
+ # Boolean column with exactly 2 distinct values [True, False]
+ data_frame = pd.DataFrame({"col1": [True, False, True, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column_with_none():
+ """
+ Test the expectation with a boolean column that includes None values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Boolean column with 3 distinct values [True, False, None]
+ data_frame = pd.DataFrame({"col1": [True, False, None, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_datetime_column():
+ """
+ Test the expectation with a datetime column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Datetime column with exactly 3 distinct values
+ data_frame = pd.DataFrame(
+ {
+ "col1": pd.to_datetime(
+ [
+ "2023-01-01",
+ "2023-01-02",
+ "2023-01-03",
+ "2023-01-02",
+ "2023-01-01",
+ ]
+ )
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_datetime_column_with_timezone():
+ """
+ Test the expectation with a datetime column including timezone information.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=2,
+ )
+ # Datetime column with timezone - same time in different timezones should be distinct
+ data_frame = pd.DataFrame(
+ {
+ "col1": [
+ pd.Timestamp("2023-01-01 12:00:00", tz="UTC"),
+ pd.Timestamp("2023-01-01 12:00:00", tz="US/Eastern"),
+ pd.Timestamp("2023-01-01 12:00:00", tz="UTC"),
+ pd.Timestamp("2023-01-01 12:00:00", tz="US/Eastern"),
+ ]
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_mixed_data_types_as_object():
+ """
+ Test the expectation with a column containing mixed data types (as object dtype).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=4,
+ )
+ # Mixed data types: string, int, float, None
+ data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_performance():
+ """
+ Test the expectation with a larger dataset to ensure reasonable performance.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=1000,
+ )
+ # Create a DataFrame with exactly 1000 distinct values
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals(
+ column_name="col1", expected_value=3
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # exactly 3 distinct values
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals(
+ column_name="col1", expected_value=5
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, expected 5
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals(
+ column_name="col1", expected_value=3
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # exactly 3 distinct values
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals(
+ column_name="col1", expected_value=5
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (1,), (2,), (1,)], ["col1"]
+ ) # 2 distinct values, expected 5
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when the specified column is missing in PySpark DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals(
+ column_name="col1", expected_value=3
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_categorical_data():
+ """
+ Test the expectation with categorical data.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Categorical data with 3 distinct categories
+ data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_duplicate_nan_handling():
+ """
+ Test that multiple NaN values are counted as one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=3,
+ )
+ # Multiple NaN values should be counted as 1 distinct value
+ data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_very_large_expected_distinct_values():
+ """
+ Test the expectation with a very large expected count that doesn't match actual.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=1000000,
+ )
+ # Small DataFrame with only 3 distinct values
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 distinct values, expected exactly 1000000.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_string_with_whitespace_handling():
+ """
+ Test that strings with different whitespace are treated as distinct.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=4,
+ )
+ # Strings with different whitespace should be distinct
+ data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_string_vs_numeric():
+ """
+ Test that numeric strings and numeric values are treated as distinct when in object column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesEquals",
+ column_name="col1",
+ expected_value=2,
+ )
+ # String "1" and integer 1 should be distinct in object column
+ data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object)
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py
new file mode 100644
index 0000000..5be1b82
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py
@@ -0,0 +1,691 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesGreaterThan", (
+ f"Expected 'ExpectationDistinctColumnValuesGreaterThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is > 2
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nulls():
+ """
+ Test the expectation for pandas DataFrame with NaN values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 3 distinct values [1, 2, NaN] which is > 2
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_exact_boundary():
+ """
+ Test the expectation for pandas DataFrame with distinct count exactly at boundary (exclusive).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is > 2
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pandas_violations_equal_to_threshold():
+ """
+ Test the expectation for pandas DataFrame with distinct count equal to threshold (should fail).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT > 3
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 distinct values, expected more than 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_violations_below_threshold():
+ """
+ Test the expectation for pandas DataFrame with distinct count below threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 2 distinct values [1, 2] which is NOT > 5
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 2 distinct values, expected more than 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_zero_threshold():
+ """
+ Test the expectation for pandas DataFrame with zero threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=0,
+ )
+ # Any non-empty DataFrame should have > 0 distinct values
+ data_frame = pd.DataFrame({"col1": [1, 1, 1]}) # 1 distinct value > 0
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pandas_empty_dataframe():
+ """
+ Test the expectation for pandas DataFrame that is empty.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=0,
+ )
+ # Empty DataFrame has 0 distinct values, which is NOT > 0
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 0 distinct values, expected more than 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is > 2
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_with_nulls(spark):
+ """
+ Test the expectation for PySpark DataFrame with null values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 3 distinct values [1, 2, null] which is > 2
+ data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations_equal_to_threshold(spark):
+ """
+ Test the expectation for PySpark DataFrame with distinct count equal to threshold (should fail).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT > 3
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 3 distinct values, expected more than 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_violations_below_threshold(spark):
+ """
+ Test the expectation for PySpark DataFrame with distinct count below threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 2 distinct values [1, 2] which is NOT > 5
+ data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 2 distinct values, expected more than 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_empty_dataframe(spark):
+ """
+ Test the expectation for PySpark DataFrame that is empty.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=0,
+ )
+ # Empty DataFrame has 0 distinct values, which is NOT > 0
+ data_frame = spark.createDataFrame([], "col1 INT")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 0 distinct values, expected more than 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test that an error is raised when the specified column is missing.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_invalid_parameters():
+ """
+ Test that appropriate errors are raised for invalid parameters.
+ """
+ # Test negative threshold
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=-1,
+ )
+ assert "threshold must be non-negative" in str(context.value), (
+ f"Expected 'threshold must be non-negative' in error message: {str(context.value)}"
+ )
+
+
+def test_string_column_with_mixed_values():
+ """
+ Test the expectation with a string column containing mixed values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # String column with 4 distinct values ["A", "B", "C", None] which is > 3
+ data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_string_column_case_sensitive():
+ """
+ Test that string comparisons are case-sensitive for distinct counting.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # String column with 4 distinct values ["a", "A", "b", "B"] which is > 3
+ data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_column_with_floats():
+ """
+ Test the expectation with a numeric column containing floats.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Float column with 3 distinct values [1.1, 2.2, 3.3] which is > 2
+ data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column():
+ """
+ Test the expectation with a boolean column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=1,
+ )
+ # Boolean column with 2 distinct values [True, False] which is > 1
+ data_frame = pd.DataFrame({"col1": [True, False, True, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column_failure():
+ """
+ Test the expectation with a boolean column that fails the threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Boolean column with only 1 distinct value [True] which is NOT > 2
+ data_frame = pd.DataFrame({"col1": [True, True, True, True, True]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 1 distinct values, expected more than 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_datetime_column():
+ """
+ Test the expectation with a datetime column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Datetime column with 3 distinct values which is > 2
+ data_frame = pd.DataFrame(
+ {
+ "col1": pd.to_datetime(
+ [
+ "2023-01-01",
+ "2023-01-02",
+ "2023-01-03",
+ "2023-01-02",
+ "2023-01-01",
+ ]
+ )
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_mixed_data_types_as_object():
+ """
+ Test the expectation with a column containing mixed data types.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # Mixed data types: 4 distinct values ["text", 42, 3.14, None] which is > 3
+ data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_performance():
+ """
+ Test the expectation with a larger dataset to ensure reasonable performance.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=999,
+ )
+ # Create a DataFrame with exactly 1000 distinct values (> 999)
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_failure():
+ """
+ Test the expectation with a larger dataset that fails the threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=1000,
+ )
+ # Create a DataFrame with exactly 1000 distinct values (NOT > 1000)
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 1000 distinct values, expected more than 1000.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than(
+ column_name="col1", threshold=2
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values > 2
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than(
+ column_name="col1", threshold=5
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, need > 5
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than(
+ column_name="col1", threshold=2
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # 3 distinct values > 2
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than(
+ column_name="col1", threshold=5
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (1,), (2,), (1,)], ["col1"]
+ ) # 2 distinct values, need > 5
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when the specified column is missing in PySpark DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than(
+ column_name="col1", threshold=2
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_categorical_data():
+ """
+ Test the expectation with categorical data.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Categorical data with 3 distinct categories which is > 2
+ data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_duplicate_nan_handling():
+ """
+ Test that multiple NaN values are counted as one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Multiple NaN values should be counted as 1, total = 3 distinct values > 2
+ data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_single_distinct_value_success():
+ """
+ Test the expectation with only one distinct value that passes threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=0,
+ )
+ # Single distinct value (1) which is > 0
+ data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_string_with_whitespace_handling():
+ """
+ Test that strings with different whitespace are treated as distinct.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # 4 distinct strings with different whitespace > 3
+ data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_string_vs_numeric():
+ """
+ Test that numeric strings and numeric values are treated as distinct.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=1,
+ )
+ # String "1" and integer 1 are distinct, so 2 distinct values > 1
+ data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object)
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_very_high_threshold():
+ """
+ Test the expectation with a very high threshold that cannot be met.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=1000000,
+ )
+ # Small DataFrame with only 3 distinct values
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 distinct values, expected more than 1000000.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_exclusive_boundary_validation():
+ """
+ Test that the boundary is truly exclusive (not inclusive).
+ """
+ # Test with threshold = 5, actual = 5 (should fail because 5 is NOT > 5)
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=5,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5, 1, 2]}) # exactly 5 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result)}"
+ )
+
+ # Test with threshold = 4, actual = 5 (should pass because 5 > 4)
+ expectation_pass = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesGreaterThan",
+ column_name="col1",
+ threshold=4,
+ )
+ result_pass = expectation_pass.validate(data_frame=data_frame)
+ assert isinstance(result_pass, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py
new file mode 100644
index 0000000..5f15e10
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py
@@ -0,0 +1,732 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesLessThan", (
+ f"Expected 'ExpectationDistinctColumnValuesLessThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is < 5
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesLessThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nulls():
+ """
+ Test the expectation for pandas DataFrame with NaN values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, NaN] which is < 5
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesLessThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_zero_values():
+ """
+ Test the expectation for pandas DataFrame with zero distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1,
+ )
+ # Empty DataFrame has 0 distinct values which is < 1
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pandas_violations_equal_to_threshold():
+ """
+ Test the expectation for pandas DataFrame with distinct count equal to threshold (should fail).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT < 3
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 distinct values, expected fewer than 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_violations_above_threshold():
+ """
+ Test the expectation for pandas DataFrame with distinct count above threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is NOT < 2
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 5 distinct values, expected fewer than 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_single_value_success():
+ """
+ Test the expectation for pandas DataFrame with single distinct value below threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with 1 distinct value which is < 3
+ data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, 3] which is < 5
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesLessThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_with_nulls(spark):
+ """
+ Test the expectation for PySpark DataFrame with null values included in distinct count.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # DataFrame with 3 distinct values [1, 2, null] which is < 5
+ data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(
+ expectation_name="ExpectationDistinctColumnValuesLessThan"
+ )
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_zero_values(spark):
+ """
+ Test the expectation for PySpark DataFrame with zero distinct values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1,
+ )
+ # Empty DataFrame has 0 distinct values which is < 1
+ data_frame = spark.createDataFrame([], "col1 INT")
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_pyspark_violations_equal_to_threshold(spark):
+ """
+ Test the expectation for PySpark DataFrame with distinct count equal to threshold (should fail).
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT < 3
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 3 distinct values, expected fewer than 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_violations_above_threshold(spark):
+ """
+ Test the expectation for PySpark DataFrame with distinct count above threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is NOT < 2
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 5 distinct values, expected fewer than 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_single_value_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with single distinct value below threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # DataFrame with 1 distinct value which is < 3
+ data_frame = spark.createDataFrame([(5,), (5,), (5,), (5,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test that an error is raised when the specified column is missing.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_invalid_parameters():
+ """
+ Test that appropriate errors are raised for invalid parameters.
+ """
+ # Test negative threshold
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=-1,
+ )
+ assert "threshold must be non-negative" in str(context.value), (
+ f"Expected 'threshold must be non-negative' in error message: {str(context.value)}"
+ )
+
+
+def test_zero_threshold_edge_case():
+ """
+ Test the expectation with zero threshold - only empty DataFrames should pass.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=0,
+ )
+
+ # Empty DataFrame has 0 distinct values, which is NOT < 0
+ data_frame_empty = pd.DataFrame({"col1": []})
+ result_empty = expectation.validate(data_frame=data_frame_empty)
+ assert isinstance(result_empty, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result_empty)}"
+ )
+
+ # Any non-empty DataFrame will have >= 1 distinct values, which is NOT < 0
+ data_frame_non_empty = pd.DataFrame({"col1": [1, 1, 1]})
+ result_non_empty = expectation.validate(data_frame=data_frame_non_empty)
+ assert isinstance(result_non_empty, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result_non_empty)}"
+ )
+
+
+def test_string_column_with_mixed_values():
+ """
+ Test the expectation with a string column containing mixed values.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # String column with 4 distinct values ["A", "B", "C", None] which is < 5
+ data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_string_column_case_sensitive():
+ """
+ Test that string comparisons are case-sensitive for distinct counting.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # String column with 4 distinct values ["a", "A", "b", "B"] which is < 5
+ data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_column_with_floats():
+ """
+ Test the expectation with a numeric column containing floats.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # Float column with 3 distinct values [1.1, 2.2, 3.3] which is < 5
+ data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column_success():
+ """
+ Test the expectation with a boolean column that passes.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # Boolean column with 2 distinct values [True, False] which is < 3
+ data_frame = pd.DataFrame({"col1": [True, False, True, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_boolean_column_failure():
+ """
+ Test the expectation with a boolean column that fails the threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Boolean column with 2 distinct values [True, False] which is NOT < 2
+ data_frame = pd.DataFrame({"col1": [True, False, True, False, True]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 2 distinct values, expected fewer than 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_boolean_single_value():
+ """
+ Test the expectation with a boolean column having only one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=2,
+ )
+ # Boolean column with only 1 distinct value [True] which is < 2
+ data_frame = pd.DataFrame({"col1": [True, True, True, True, True]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_datetime_column():
+ """
+ Test the expectation with a datetime column.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # Datetime column with 3 distinct values which is < 5
+ data_frame = pd.DataFrame(
+ {
+ "col1": pd.to_datetime(
+ [
+ "2023-01-01",
+ "2023-01-02",
+ "2023-01-03",
+ "2023-01-02",
+ "2023-01-01",
+ ]
+ )
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_mixed_data_types_as_object():
+ """
+ Test the expectation with a column containing mixed data types.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # Mixed data types: 4 distinct values ["text", 42, 3.14, None] which is < 5
+ data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_success():
+ """
+ Test the expectation with a larger dataset that passes.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1001,
+ )
+ # Create a DataFrame with exactly 1000 distinct values (< 1001)
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_large_dataset_failure():
+ """
+ Test the expectation with a larger dataset that fails the threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1000,
+ )
+ # Create a DataFrame with exactly 1000 distinct values (NOT < 1000)
+ data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 1000 distinct values, expected fewer than 1000.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than(
+ column_name="col1", threshold=5
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values < 5
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than(
+ column_name="col1", threshold=2
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values, need < 2
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than(
+ column_name="col1", threshold=5
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # 3 distinct values < 5
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than(
+ column_name="col1", threshold=2
+ )
+ data_frame = spark.createDataFrame(
+ [(1,), (2,), (3,), (2,), (1,)], ["col1"]
+ ) # 3 distinct values, need < 2
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when the specified column is missing in PySpark DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than(
+ column_name="col1", threshold=5
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_categorical_data():
+ """
+ Test the expectation with categorical data.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # Categorical data with 3 distinct categories which is < 5
+ data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_duplicate_nan_handling():
+ """
+ Test that multiple NaN values are counted as one distinct value.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # Multiple NaN values should be counted as 1, total = 3 distinct values < 5
+ data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_string_with_whitespace_handling():
+ """
+ Test that strings with different whitespace are treated as distinct.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=5,
+ )
+ # 4 distinct strings with different whitespace < 5
+ data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_numeric_string_vs_numeric():
+ """
+ Test that numeric strings and numeric values are treated as distinct.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ # String "1" and integer 1 are distinct, so 2 distinct values < 3
+ data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object)
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}"
+ )
+
+
+def test_very_low_threshold():
+ """
+ Test the expectation with a very low threshold.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1,
+ )
+ # DataFrame with 3 distinct values, need < 1 (only empty DataFrames can pass)
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 distinct values, expected fewer than 1.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_exclusive_boundary_validation():
+ """
+ Test that the boundary is truly exclusive (not inclusive).
+ """
+ # Test with threshold = 3, actual = 3 (should fail because 3 is NOT < 3)
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 1, 2]}) # exactly 3 distinct values
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result)}"
+ )
+
+ # Test with threshold = 4, actual = 3 (should pass because 3 < 4)
+ expectation_pass = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=4,
+ )
+ result_pass = expectation_pass.validate(data_frame=data_frame)
+ assert isinstance(result_pass, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}"
+ )
+
+
+def test_boundary_with_one_distinct_value():
+ """
+ Test boundary conditions with a single distinct value.
+ """
+ # Single distinct value should pass when threshold > 1
+ expectation_pass = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=2,
+ )
+ data_frame = pd.DataFrame({"col1": [5, 5, 5, 5]}) # 1 distinct value < 2
+ result_pass = expectation_pass.validate(data_frame=data_frame)
+ assert isinstance(result_pass, DataFrameExpectationSuccessMessage), (
+ f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}"
+ )
+
+ # Single distinct value should fail when threshold = 1
+ expectation_fail = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDistinctColumnValuesLessThan",
+ column_name="col1",
+ threshold=1,
+ )
+ result_fail = expectation_fail.validate(data_frame=data_frame)
+ assert isinstance(result_fail, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result_fail)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py
new file mode 100644
index 0000000..8a4e6b5
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py
@@ -0,0 +1,564 @@
+import pytest
+import numpy as np
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_pandas_success_no_nulls():
+ """Test pandas success case with no null values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=5,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 3, 4, 5],
+ "col2": ["a", "b", "c", "d", "e"],
+ "col3": [1.1, 2.2, 3.3, 4.4, 5.5],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_within_threshold():
+ """Test pandas success case with null count within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=3,
+ )
+ # 2 null values in col1, which is less than max_count of 3
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, None, 3, None, 5],
+ "col2": ["a", "b", "c", "d", "e"],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_exactly_at_threshold():
+ """Test pandas success case with null count exactly at threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=2,
+ )
+ # Exactly 2 null values in col1
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 4, None], "col2": [None, "b", "c", "d", "e"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nan():
+ """Test pandas success case with NaN values within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col2",
+ max_count=2,
+ )
+ # 1 NaN value in col2, which is less than max_count of 2
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4.0, np.nan, 6.0]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_failure_exceeds_threshold():
+ """Test pandas failure case when null count exceeds threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1,
+ )
+ # 3 null values in col1, which exceeds max_count of 1
+ data_frame = pd.DataFrame(
+ {"col1": [1, None, None, None, 5], "col2": [None, "b", "c", "d", "e"]}
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 null values, expected at most 1.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_failure_all_nulls_in_column():
+ """Test pandas failure case with all null values in the specified column."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1,
+ )
+ data_frame = pd.DataFrame({"col1": [None, None, None], "col2": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 3 null values, expected at most 1.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_boundary_zero_threshold():
+ """Test pandas boundary case with 0 threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = pd.DataFrame({"col1": [1, None, 3]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 1 null values, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_boundary_zero_threshold_success():
+ """Test pandas boundary case with 0 threshold and no nulls."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_empty_dataframe():
+ """Test pandas edge case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=5,
+ )
+ data_frame = pd.DataFrame(columns=["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ # Empty DataFrame should have 0 nulls and pass
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_single_value_null():
+ """Test pandas edge case with single null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = pd.DataFrame({"col1": [None]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 1 null values, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_single_value_not_null():
+ """Test pandas edge case with single non-null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_different_column_nulls_not_affecting():
+ """Test that nulls in other columns don't affect the result."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1,
+ )
+ # col1 has 0 nulls, col2 has 3 nulls - should pass since we're only checking col1
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_no_nulls(spark):
+ """Test PySpark success case with no null values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=5,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a", 1.1), (2, "b", 2.2), (3, "c", 3.3), (4, "d", 4.4), (5, "e", 5.5)],
+ ["col1", "col2", "col3"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_within_threshold(spark):
+ """Test PySpark success case with null count within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=3,
+ )
+ # 2 null values in col1, which is less than max_count of 3
+ data_frame = spark.createDataFrame([(1,), (None,), (3,), (None,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_exactly_at_threshold(spark):
+ """Test PySpark success case with null count exactly at threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=2,
+ )
+ # Exactly 2 null values in col1
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (2, None), (None, "c"), (4, "d"), (None, "e")], ["col1", "col2"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_failure_exceeds_threshold(spark):
+ """Test PySpark failure case when null count exceeds threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1,
+ )
+ # 2 null values in col1, which exceeds max_count of 1
+ data_frame = spark.createDataFrame([(1, None), (None, "b"), (None, "c")], ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 2 null values, expected at most 1.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_failure_all_nulls_in_column(spark):
+ """Test PySpark failure case with all null values in the specified column."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=2,
+ )
+ data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 3 null values, expected at most 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_boundary_zero_threshold(spark):
+ """Test PySpark boundary case with 0 threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = spark.createDataFrame([(1,), (None,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 1 null values, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_boundary_zero_threshold_success(spark):
+ """Test PySpark boundary case with 0 threshold and no nulls."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = spark.createDataFrame(
+ [
+ {"col1": 1, "col2": None},
+ {"col1": 2, "col2": None},
+ {"col1": 3, "col2": None},
+ ],
+ schema="col1 int, col2 string",
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_empty_dataframe(spark):
+ """Test PySpark edge case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=5,
+ )
+ data_frame = spark.createDataFrame([], "col1 INT")
+ result = expectation.validate(data_frame=data_frame)
+ # Empty DataFrame should have 0 nulls and pass
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_single_value_null(spark):
+ """Test PySpark edge case with single null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = spark.createDataFrame([{"col1": None}], schema="col1 int")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 1 null values, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_single_value_not_null(spark):
+ """Test PySpark edge case with single non-null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=0,
+ )
+ data_frame = spark.createDataFrame([(1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_different_column_nulls_not_affecting(spark):
+ """Test that nulls in other columns don't affect the result."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1,
+ )
+ # col1 has 0 nulls, col2 has nulls - should pass since we're only checking col1
+ data_frame = spark.createDataFrame(
+ [
+ {"col1": 1, "col2": None},
+ {"col1": 2, "col2": None},
+ {"col1": 3, "col2": None},
+ ],
+ schema="col1 int, col2 string",
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_suite_pandas_success():
+ """Test the expectation suite for pandas DataFrame with no violations."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_count(
+ column_name="col1", max_count=2
+ )
+ data_frame = pd.DataFrame(
+ {"col1": [1, None, 3], "col2": ["a", "b", "c"]}
+ ) # 1 null value, which is less than max_count of 2
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """Test the expectation suite for pandas DataFrame with violations."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_count(
+ column_name="col1", max_count=1
+ )
+ data_frame = pd.DataFrame(
+ {"col1": [1, None, None], "col2": ["a", "b", "c"]}
+ ) # 2 null values, which exceeds max_count of 1
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """Test the expectation suite for PySpark DataFrame with no violations."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_count(
+ column_name="col1", max_count=2
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (None, "b"), (3, "c")], ["col1", "col2"]
+ ) # 1 null value, which is less than max_count of 2
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """Test the expectation suite for PySpark DataFrame with violations."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_count(
+ column_name="col1", max_count=1
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (None, "b"), (None, "c")], ["col1", "col2"]
+ ) # 2 null values, which exceeds max_count of 1
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """Test that an error is raised when the specified column is missing in PySpark DataFrame."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_count(
+ column_name="col1", max_count=5
+ )
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col2", "col3"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_expectation_parameter_validation():
+ """Test that appropriate errors are raised for invalid parameters."""
+ # Test negative max_count
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=-1,
+ )
+ assert "max_count must be non-negative" in str(context.value), (
+ f"Expected 'max_count must be non-negative' in error message: {str(context.value)}"
+ )
+
+
+def test_expectation_mixed_data_types():
+ """Test the expectation with mixed data types including nulls."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=2,
+ )
+ # Mixed data types with nulls
+ data_frame = pd.DataFrame({"col1": [1, "text", None, 3.14, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_large_dataset():
+ """Test the expectation with a larger dataset."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=100,
+ )
+ # Create a DataFrame with 1000 rows and 50 nulls
+ data = [None if i % 20 == 0 else i for i in range(1000)] # Every 20th value is None
+ data_frame = pd.DataFrame({"col1": data})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_large_threshold():
+ """Test the expectation with a very large threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=1000000,
+ )
+ # Small DataFrame with few nulls should pass with large threshold
+ data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_column_not_exists_error():
+ """Test that an error is raised when the specified column does not exist."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullCount",
+ column_name="col1",
+ max_count=5,
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ # The error message might vary slightly depending on pandas version
+ assert isinstance(result, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result)}"
+ )
+ result_str = str(result)
+ assert "col1" in result_str, f"Expected 'col1' in result message: {result_str}"
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py
new file mode 100644
index 0000000..cbe34aa
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py
@@ -0,0 +1,547 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_pandas_success_no_nulls():
+ """Test pandas success case with no null values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 3, 4, 5],
+ "col2": ["a", "b", "c", "d", "e"],
+ "col3": [1.1, 2.2, 3.3, 4.4, 5.5],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_within_threshold():
+ """Test pandas success case with null percentage within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=25.0,
+ )
+ # 4 values in col1, 1 null = 25% null
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, None, 3, 4],
+ "col2": ["a", "b", "c", "d"], # Other columns don't affect the test
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_exactly_at_threshold():
+ """Test pandas success case with null percentage exactly at threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=20.0,
+ )
+ # 5 values in col1, 1 null = 20% null
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 4, 5], "col2": [None, "b", "c", "d", "e"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_with_nan():
+ """Test pandas success case with NaN values within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col2",
+ max_percentage=50.0,
+ )
+ # 3 values in col2, 1 NaN = 33.33% null (less than 50%)
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4.0, np.nan, 6.0]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_failure_exceeds_threshold():
+ """Test pandas failure case when null percentage exceeds threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=20.0,
+ )
+ # 4 values in col1, 2 nulls = 50% null (exceeds 20%)
+ data_frame = pd.DataFrame({"col1": [1, None, 3, None], "col2": [None, "b", "c", "d"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 50.00% null values, expected at most 20.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_failure_all_nulls_in_column():
+ """Test pandas failure case with 100% null values in the specified column."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=50.0,
+ )
+ data_frame = pd.DataFrame({"col1": [None, None], "col2": [1, 2]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 100.00% null values, expected at most 50.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_boundary_zero_threshold():
+ """Test pandas boundary case with 0.0% threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=0.0,
+ )
+ data_frame = pd.DataFrame({"col1": [1, None, 3]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 33.33% null values, expected at most 0.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_boundary_hundred_threshold():
+ """Test pandas boundary case with 100.0% threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=100.0,
+ )
+ data_frame = pd.DataFrame({"col1": [None, None, None], "col2": [None, None, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_empty_dataframe():
+ """Test pandas edge case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ data_frame = pd.DataFrame(columns=["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ # Empty DataFrame should have 0% nulls and pass
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_single_value_null():
+ """Test pandas edge case with single null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=50.0,
+ )
+ data_frame = pd.DataFrame({"col1": [None]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' has 100.00% null values, expected at most 50.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_single_value_not_null():
+ """Test pandas edge case with single non-null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_different_column_nulls_not_affecting():
+ """Test that nulls in other columns don't affect the result."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ # col1 has 0% nulls, col2 has 100% nulls - should pass since we're only checking col1
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_no_nulls(spark):
+ """Test PySpark success case with no null values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a", 1.1), (2, "b", 2.2), (3, "c", 3.3), (4, "d", 4.4), (5, "e", 5.5)],
+ ["col1", "col2", "col3"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_within_threshold(spark):
+ """Test PySpark success case with null percentage within threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=30.0,
+ )
+ # 4 values in col1, 1 null = 25% null
+ data_frame = spark.createDataFrame([(1,), (None,), (3,), (4,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_exactly_at_threshold(spark):
+ """Test PySpark success case with null percentage exactly at threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=40.0,
+ )
+ # 5 values in col1, 2 nulls = 40% null
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (2, None), (None, "c"), (4, "d"), (None, None)], ["col1", "col2"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_failure_exceeds_threshold(spark):
+ """Test PySpark failure case when null percentage exceeds threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=25.0,
+ )
+ # 3 values in col1, 2 nulls = 66.67% null (exceeds 25%)
+ data_frame = spark.createDataFrame([(1, None), (None, "b"), (None, "c")], ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 66.67% null values, expected at most 25.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_failure_all_nulls_in_column(spark):
+ """Test PySpark failure case with 100% null values in the specified column."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=75.0,
+ )
+ data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 100.00% null values, expected at most 75.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_boundary_zero_threshold(spark):
+ """Test PySpark boundary case with 0.0% threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=0.0,
+ )
+ data_frame = spark.createDataFrame([(1,), (None,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 33.33% null values, expected at most 0.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_boundary_hundred_threshold(spark):
+ """Test PySpark boundary case with 100.0% threshold."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=100.0,
+ )
+ data_frame = spark.createDataFrame(
+ [
+ {"col1": None, "col2": None},
+ {"col1": None, "col2": None},
+ ],
+ schema="col1: int, col2: string",
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_empty_dataframe(spark):
+ """Test PySpark edge case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ # Create empty DataFrame with schema
+ data_frame = spark.createDataFrame([], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+ # Empty DataFrame should have 0% nulls and pass
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_single_value_null(spark):
+ """Test PySpark edge case with single null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=50.0,
+ )
+ data_frame = spark.createDataFrame([(None,)], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'col1' has 100.00% null values, expected at most 50.00%.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_single_value_not_null(spark):
+ """Test PySpark edge case with single non-null value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ data_frame = spark.createDataFrame([(1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_different_column_nulls_not_affecting(spark):
+ """Test that nulls in other columns don't affect the result."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=10.0,
+ )
+ # col1 has 0% nulls, col2 has 100% nulls - should pass since we're only checking col1
+ data_frame = spark.createDataFrame(
+ [
+ {"col1": 1, "col2": None},
+ {"col1": 2, "col2": None},
+ {"col1": 3, "col2": None},
+ ],
+ schema="col1: int, col2: int",
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_suite_pandas_success():
+ """Test integration with expectations suite for pandas success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage(
+ column_name="col1", max_percentage=30.0
+ )
+ # 4 values in col1, 1 nulls = 25% null (should pass)
+ data_frame = pd.DataFrame({"col1": [1, 2, None, 4], "col2": ["a", "b", "c", "d"]})
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pandas_violations():
+ """Test integration with expectations suite for pandas failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage(
+ column_name="col1", max_percentage=10.0
+ )
+ # 2 values in col1, 1 null = 50% null (exceeds 10%)
+ data_frame = pd.DataFrame({"col1": [1, None], "col2": ["a", "b"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """Test integration with expectations suite for PySpark success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage(
+ column_name="col1", max_percentage=50.0
+ )
+ # 2 values in col1, 1 null = 50% null (equals 50%)
+ data_frame = spark.createDataFrame([(1, "a"), (None, "b")], ["col1", "col2"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """Test integration with expectations suite for PySpark failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage(
+ column_name="col1", max_percentage=20.0
+ )
+ # 2 values in col1, 1 null = 50% null (exceeds 20%)
+ data_frame = spark.createDataFrame([(None, "a"), (2, None)], ["col1", "col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_expectation_parameter_validation():
+ """Test parameter validation for column_name and max_percentage."""
+ # Test with valid parameters
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="test_col",
+ max_percentage=50.0,
+ )
+ assert expectation is not None, "Expected expectation to be created successfully"
+
+ # Test string representation
+ expectation_str = str(expectation)
+ assert "50.0" in expectation_str, f"Expected '50.0' in expectation string: {expectation_str}"
+ assert "test_col" in expectation_str, (
+ f"Expected 'test_col' in expectation string: {expectation_str}"
+ )
+ assert "ExpectationMaxNullPercentage" in expectation_str, (
+ f"Expected 'ExpectationMaxNullPercentage' in expectation string: {expectation_str}"
+ )
+
+
+def test_expectation_mixed_data_types():
+ """Test expectation with mixed data types including various null representations."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="float_col",
+ max_percentage=50.0,
+ )
+ # 4 values in float_col, 1 NaN = 25% null (less than 50%)
+ data_frame = pd.DataFrame(
+ {
+ "int_col": [1, None, 3, 4],
+ "str_col": ["a", "b", None, "d"],
+ "float_col": [1.1, 2.2, 3.3, np.nan],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_precision_boundary():
+ """Test expectation with very precise percentage boundaries."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="col1",
+ max_percentage=25.0,
+ )
+ # 4 values in col1, 1 null = 25.00% null (exactly at boundary)
+ data_frame = pd.DataFrame({"col1": [1, None, 3, 4]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_column_not_exists_error():
+ """Test expectation with non-existent column should fail gracefully."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxNullPercentage",
+ column_name="nonexistent_col",
+ max_percentage=50.0,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+ result = expectation.validate(data_frame=data_frame)
+
+ # Should get a failure message with error info
+ assert isinstance(result, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result)}"
+ )
+ result_str = str(result)
+ assert "nonexistent_col" in result_str, (
+ f"Expected 'nonexistent_col' in result message: {result_str}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py
new file mode 100644
index 0000000..0578cf0
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py
@@ -0,0 +1,485 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_pandas_success_exact_count():
+ """Test pandas success case with exact maximum row count."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_below_max():
+ """Test pandas success case with row count below maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=10,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_single_row():
+ """Test pandas success case with single row and max count of 1."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=1,
+ )
+ data_frame = pd.DataFrame({"col1": [42]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_empty_dataframe():
+ """Test pandas success case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=5,
+ )
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_failure_exceeds_max():
+ """Test pandas failure case when row count exceeds maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 5 rows, expected at most 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_failure_zero_max_with_data():
+ """Test pandas failure case with zero max count but data present."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=0,
+ )
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 1 rows, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_boundary_zero_max_empty_df():
+ """Test pandas boundary case with zero max count and empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=0,
+ )
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_large_dataset():
+ """Test pandas with larger dataset exceeding maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=100,
+ )
+ # Create DataFrame with 150 rows
+ data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 150 rows, expected at most 100.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_with_nulls():
+ """Test pandas expectation with null values (should still count rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=4,
+ )
+ data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5], "col2": [None, "b", None, "d", None]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 5 rows, expected at most 4.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_success_exact_count(spark):
+ """Test PySpark success case with exact maximum row count."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_below_max(spark):
+ """Test PySpark success case with row count below maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=10,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_single_row(spark):
+ """Test PySpark success case with single row."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=1,
+ )
+ data_frame = spark.createDataFrame([(42,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_empty_dataframe(spark):
+ """Test PySpark success case with empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=5,
+ )
+ # Create empty DataFrame with schema
+ data_frame = spark.createDataFrame([], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_failure_exceeds_max(spark):
+ """Test PySpark failure case when row count exceeds maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 5 rows, expected at most 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_failure_zero_max_with_data(spark):
+ """Test PySpark failure case with zero max count but data present."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=0,
+ )
+ data_frame = spark.createDataFrame([(1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 1 rows, expected at most 0.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_boundary_zero_max_empty_df(spark):
+ """Test PySpark boundary case with zero max count and empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=0,
+ )
+ data_frame = spark.createDataFrame([], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_large_dataset(spark):
+ """Test PySpark with larger dataset exceeding maximum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=50,
+ )
+ # Create DataFrame with 75 rows
+ data = [(i, f"value_{i}") for i in range(75)]
+ data_frame = spark.createDataFrame(data, ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 75 rows, expected at most 50.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_with_nulls(spark):
+ """Test PySpark expectation with null values (should still count rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=4,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, None), (None, "b"), (3, None), (None, "d"), (5, None)],
+ ["col1", "col2"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 5 rows, expected at most 4.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """Test integration with expectations suite for pandas success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=5)
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """Test integration with expectations suite for pandas failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=2)
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """Test integration with expectations suite for PySpark success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=5)
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """Test integration with expectations suite for PySpark failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=2)
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c"), (4, "d")], ["col1", "col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_expectation_parameter_validation():
+ """Test parameter validation for max_rows."""
+ # Test with valid parameters
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=100,
+ )
+ assert expectation is not None, "Expected expectation to be created successfully"
+
+ # Test string representation
+ expectation_str = str(expectation)
+ assert "100" in expectation_str, f"Expected '100' in expectation string: {expectation_str}"
+ assert "ExpectationMaxRows" in expectation_str, (
+ f"Expected 'ExpectationMaxRows' in expectation string: {expectation_str}"
+ )
+
+
+def test_expectation_boundary_conditions():
+ """Test various boundary conditions for max_rows."""
+ # Test with max_rows = 1
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=1,
+ )
+
+ # Single row - should pass
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+ # Two rows - should fail
+ data_frame = pd.DataFrame({"col1": [1, 2]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 2 rows, expected at most 1.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_multiple_columns():
+ """Test expectation with multiple columns (should still count total rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 3, 4],
+ "col2": ["a", "b", "c", "d"],
+ "col3": [1.1, 2.2, 3.3, 4.4],
+ "col4": [True, False, True, False],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 4 rows, expected at most 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_mixed_data_types():
+ """Test expectation with mixed data types in columns."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=10,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "int_col": [1, 2, 3, 4, 5],
+ "str_col": ["a", "b", "c", "d", "e"],
+ "float_col": [1.1, 2.2, 3.3, 4.4, 5.5],
+ "bool_col": [True, False, True, False, True],
+ "null_col": [None, None, None, None, None],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_high_max_rows():
+ """Test expectation with very high max_rows value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=1000000, # 1 million
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_identical_values():
+ """Test expectation with DataFrame containing identical values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=3,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [42, 42, 42, 42], # All same values
+ "col2": ["same", "same", "same", "same"],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 4 rows, expected at most 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_edge_case_max_rows_equals_actual():
+ """Test edge case where max_rows exactly equals actual row count."""
+ for count in [1, 5, 10, 100]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMaxRows",
+ max_rows=count,
+ )
+ # Create DataFrame with exactly 'count' rows
+ data_frame = pd.DataFrame({"col1": list(range(count))})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows")
+ ), f"Expected success message for count {count} but got: {result}"
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py
new file mode 100644
index 0000000..e112ccc
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py
@@ -0,0 +1,616 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_pandas_success_exact_count():
+ """Test pandas success case with exact minimum row count."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_above_min():
+ """Test pandas success case with row count above minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_single_row():
+ """Test pandas success case with single row and min count of 1."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=1,
+ )
+ data_frame = pd.DataFrame({"col1": [42]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_zero_min_empty_df():
+ """Test pandas success case with zero minimum and empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=0,
+ )
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_success_zero_min_with_data():
+ """Test pandas success case with zero minimum and data present."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=0,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_failure_below_min():
+ """Test pandas failure case when row count is below minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=5,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 3 rows, expected at least 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_failure_empty_with_min():
+ """Test pandas failure case with empty DataFrame but minimum required."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=2,
+ )
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 0 rows, expected at least 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_failure_single_row_needs_more():
+ """Test pandas failure case with single row but higher minimum required."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 1 rows, expected at least 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_large_dataset():
+ """Test pandas with larger dataset meeting minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=100,
+ )
+ # Create DataFrame with 150 rows
+ data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_large_dataset_failure():
+ """Test pandas with dataset not meeting large minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=200,
+ )
+ # Create DataFrame with 150 rows
+ data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 150 rows, expected at least 200.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pandas_with_nulls():
+ """Test pandas expectation with null values (should still count rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5], "col2": [None, "b", None, "d", None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_exact_count(spark):
+ """Test PySpark success case with exact minimum row count."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_above_min(spark):
+ """Test PySpark success case with row count above minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_single_row(spark):
+ """Test PySpark success case with single row."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=1,
+ )
+ data_frame = spark.createDataFrame([(42,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_zero_min_empty_df(spark):
+ """Test PySpark success case with zero minimum and empty DataFrame."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=0,
+ )
+ # Create empty DataFrame with schema
+ data_frame = spark.createDataFrame([], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_success_zero_min_with_data(spark):
+ """Test PySpark success case with zero minimum and data present."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=0,
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_failure_below_min(spark):
+ """Test PySpark failure case when row count is below minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=5,
+ )
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 3 rows, expected at least 5.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_failure_empty_with_min(spark):
+ """Test PySpark failure case with empty DataFrame but minimum required."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=2,
+ )
+ data_frame = spark.createDataFrame([], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 0 rows, expected at least 2.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_failure_single_row_needs_more(spark):
+ """Test PySpark failure case with single row but higher minimum required."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = spark.createDataFrame([(1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 1 rows, expected at least 3.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_large_dataset(spark):
+ """Test PySpark with larger dataset meeting minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=50,
+ )
+ # Create DataFrame with 75 rows
+ data = [(i, f"value_{i}") for i in range(75)]
+ data_frame = spark.createDataFrame(data, ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_large_dataset_failure(spark):
+ """Test PySpark with dataset not meeting large minimum."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=100,
+ )
+ # Create DataFrame with 75 rows
+ data = [(i, f"value_{i}") for i in range(75)]
+ data_frame = spark.createDataFrame(data, ["col1", "col2"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="DataFrame has 75 rows, expected at least 100.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_with_nulls(spark):
+ """Test PySpark expectation with null values (should still count rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = spark.createDataFrame(
+ [(1, None), (None, "b"), (3, None), (None, "d"), (5, None)],
+ ["col1", "col2"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_suite_pandas_success():
+ """Test integration with expectations suite for pandas success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=2)
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """Test integration with expectations suite for pandas failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=5)
+ data_frame = pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """Test integration with expectations suite for PySpark success case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=2)
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """Test integration with expectations suite for PySpark failure case."""
+ expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=5)
+ data_frame = spark.createDataFrame([(1, "a"), (2, "b")], ["col1", "col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_expectation_parameter_validation():
+ """Test parameter validation for min_rows."""
+ # Test with valid parameters
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=10,
+ )
+ assert expectation is not None, "Expected expectation to be created successfully"
+
+ # Test string representation
+ expectation_str = str(expectation)
+ assert "10" in expectation_str, f"Expected '10' in expectation string: {expectation_str}"
+ assert "ExpectationMinRows" in expectation_str, (
+ f"Expected 'ExpectationMinRows' in expectation string: {expectation_str}"
+ )
+
+
+def test_expectation_boundary_conditions():
+ """Test various boundary conditions for min_rows."""
+ # Test with min_rows = 1
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=1,
+ )
+
+ # Single row - should pass
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+ # Empty DataFrame - should fail
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationFailureMessage), (
+ f"Expected DataFrameExpectationFailureMessage but got: {type(result)}"
+ )
+
+
+def test_expectation_multiple_columns():
+ """Test expectation with multiple columns (should still count total rows)."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 3, 4],
+ "col2": ["a", "b", "c", "d"],
+ "col3": [1.1, 2.2, 3.3, 4.4],
+ "col4": [True, False, True, False],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_mixed_data_types():
+ """Test expectation with mixed data types in columns."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "int_col": [1, 2, 3, 4, 5],
+ "str_col": ["a", "b", "c", "d", "e"],
+ "float_col": [1.1, 2.2, 3.3, 4.4, 5.5],
+ "bool_col": [True, False, True, False, True],
+ "null_col": [None, None, None, None, None],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_low_min_count():
+ """Test expectation with very low min_rows value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=1,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_high_min_count():
+ """Test expectation with very high min_rows value."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=1000000, # 1 million
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="DataFrame has 3 rows, expected at least 1000000.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_identical_values():
+ """Test expectation with DataFrame containing identical values."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [42, 42, 42, 42], # All same values
+ "col2": ["same", "same", "same", "same"],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_edge_case_min_count_equals_actual():
+ """Test edge case where min_rows exactly equals actual row count."""
+ for count in [1, 5, 10, 100]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=count,
+ )
+ # Create DataFrame with exactly 'count' rows
+ data_frame = pd.DataFrame({"col1": list(range(count))})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message for count {count} but got: {result}"
+
+
+def test_expectation_zero_min_count_edge_cases():
+ """Test edge cases with zero minimum count."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=0,
+ )
+
+ # Empty DataFrame should pass
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+ # DataFrame with data should also pass
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_progressive_min_counts():
+ """Test expectation with progressively increasing minimum counts."""
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]}) # 5 rows
+
+ # Should pass for min_rows <= 5
+ for min_rows in [0, 1, 2, 3, 4, 5]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=min_rows,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message for min_rows {min_rows} but got: {result}"
+
+ # Should fail for min_rows > 5
+ for min_rows in [6, 7, 10, 100]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=min_rows,
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=f"DataFrame has 5 rows, expected at least {min_rows}.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message for min_rows {min_rows} but got: {result}"
+ )
+
+
+def test_expectation_dataframe_structure_irrelevant():
+ """Test that DataFrame structure doesn't affect row counting."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationMinRows",
+ min_rows=3,
+ )
+
+ # Single column DataFrame
+ df1 = pd.DataFrame({"col1": [1, 2, 3]})
+ result1 = expectation.validate(data_frame=df1)
+
+ # Multi-column DataFrame
+ df2 = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"], "col3": [1.1, 2.2, 3.3]})
+ result2 = expectation.validate(data_frame=df2)
+
+ # Both should have same result (success)
+ assert str(result1) == str(result2), f"Expected same results but got: {result1} vs {result2}"
+ assert str(result1) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows")
+ ), f"Expected success message but got: {result1}"
diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py
new file mode 100644
index 0000000..d86c475
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py
@@ -0,0 +1,577 @@
+import pytest
+import pandas as pd
+from pyspark.sql.types import IntegerType, StructField, StructType
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ assert expectation.get_expectation_name() == "ExpectationUniqueRows", (
+ f"Expected 'ExpectationUniqueRows' but got: {expectation.get_expectation_name()}"
+ )
+
+
+# Tests for specific columns - Pandas
+def test_expectation_pandas_success_specific_columns():
+ """
+ Test the expectation for pandas DataFrame with no violations on specific columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 3, 1],
+ "col2": [10, 20, 30, 20], # Different combination
+ "col3": [100, 100, 100, 100], # Same values but not checked
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations_specific_columns():
+ """
+ Test the expectation for pandas DataFrame with violations on specific columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 1, 3],
+ "col2": [10, 20, 10, 30], # Duplicate combination (1, 10)
+ "col3": [100, 200, 300, 400],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = pd.DataFrame({"col1": [1], "col2": [10], "#duplicates": [2]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Tests for all columns (empty list) - Pandas
+def test_expectation_pandas_success_all_columns():
+ """
+ Test the expectation for pandas DataFrame with no violations on all columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=[],
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 20, 30], "col3": [100, 200, 300]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations_all_columns():
+ """
+ Test the expectation for pandas DataFrame with violations on all columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=[],
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 1],
+ "col2": [10, 20, 10], # Duplicate combination (1, 10)
+ "col3": [100, 200, 100],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = pd.DataFrame(
+ {"col1": [1], "col2": [10], "col3": [100], "#duplicates": [2]}
+ )
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Tests for specific columns - PySpark
+def test_expectation_pyspark_success_specific_columns(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations on specific columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = spark.createDataFrame(
+ [
+ (1, 10, 100),
+ (2, 20, 100),
+ (3, 30, 100),
+ (1, 20, 100), # Different combination
+ ],
+ ["col1", "col2", "col3"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations_specific_columns(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations on specific columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = spark.createDataFrame(
+ [
+ (1, 10, 100),
+ (2, 20, 200),
+ (1, 10, 300), # Duplicate combination (1, 10)
+ (3, 30, 400),
+ ],
+ ["col1", "col2", "col3"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = spark.createDataFrame([(1, 10, 2)], ["col1", "col2", "#duplicates"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Tests for all columns (empty list) - PySpark
+def test_expectation_pyspark_success_all_columns(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations on all columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=[],
+ )
+ data_frame = spark.createDataFrame(
+ [(1, 10, 100), (2, 20, 200), (3, 30, 300)], ["col1", "col2", "col3"]
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations_all_columns(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations on all columns.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=[],
+ )
+ data_frame = spark.createDataFrame(
+ [
+ (1, 10, 100),
+ (2, 20, 200),
+ (1, 10, 100),
+ ], # Duplicate combination (1, 10, 100)
+ ["col1", "col2", "col3"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = spark.createDataFrame(
+ [(1, 10, 100, 2)], ["col1", "col2", "col3", "#duplicates"]
+ )
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Edge case tests
+def test_column_missing_error_pandas():
+ """
+ Test that an error is raised when specified columns are missing in pandas DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["nonexistent_col"],
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'nonexistent_col' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_column_missing_error_pyspark(spark):
+ """
+ Test that an error is raised when specified columns are missing in PySpark DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["nonexistent_col"],
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message="Column 'nonexistent_col' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_empty_dataframe_pandas():
+ """
+ Test the expectation on an empty pandas DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ data_frame = pd.DataFrame({"col1": []})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_empty_dataframe_pyspark(spark):
+ """
+ Test the expectation on an empty PySpark DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+
+ schema = StructType([StructField("col1", IntegerType(), True)])
+ data_frame = spark.createDataFrame([], schema)
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_single_row_dataframe_pandas():
+ """
+ Test the expectation on a single-row pandas DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ data_frame = pd.DataFrame({"col1": [1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_single_row_dataframe_pyspark(spark):
+ """
+ Test the expectation on a single-row PySpark DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ data_frame = spark.createDataFrame([(1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows")
+ ), f"Expected success message but got: {result}"
+
+
+def test_with_nulls_pandas():
+ """
+ Test the expectation with null values in pandas DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, None, 1, None],
+ "col2": [10, None, 20, None], # (None, None) appears twice
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = pd.DataFrame({"col1": [None], "col2": [None], "#duplicates": [2]})
+
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_with_nulls_pyspark(spark):
+ """
+ Test the expectation with null values in PySpark DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1", "col2"],
+ )
+ data_frame = spark.createDataFrame(
+ [
+ (1, 10),
+ (None, None),
+ (1, 20),
+ (None, None), # (None, None) appears twice
+ ],
+ ["col1", "col2"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ schema = StructType(
+ [
+ StructField("col1", IntegerType(), True),
+ StructField("col2", IntegerType(), True),
+ StructField("#duplicates", IntegerType(), True),
+ ]
+ )
+ # Expected violations shows only one row per duplicate group with count
+ expected_violations = spark.createDataFrame([(None, None, 2)], schema)
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Test with multiple duplicate groups
+def test_expectation_pandas_multiple_duplicate_groups():
+ """
+ Test the expectation with multiple groups of duplicates in pandas DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ data_frame = pd.DataFrame(
+ {
+ "col1": [1, 2, 1, 3, 2, 3], # Three groups: (1,1), (2,2), (3,3)
+ "col2": [10, 20, 30, 40, 50, 60],
+ }
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows one row per duplicate group with count, ordered by count then by values
+ expected_violations = pd.DataFrame({"col1": [1, 2, 3], "#duplicates": [2, 2, 2]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 6 duplicate row(s). duplicate rows found for columns ['col1']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_multiple_duplicate_groups(spark):
+ """
+ Test the expectation with multiple groups of duplicates in PySpark DataFrame.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationUniqueRows",
+ column_names=["col1"],
+ )
+ data_frame = spark.createDataFrame(
+ [
+ (1, 10),
+ (2, 20),
+ (1, 30), # Duplicate group 1
+ (3, 40),
+ (2, 50), # Duplicate group 2
+ (3, 60), # Duplicate group 3
+ ],
+ ["col1", "col2"],
+ )
+ result = expectation.validate(data_frame=data_frame)
+
+ # Expected violations shows one row per duplicate group with count, ordered by count then by values
+ expected_violations = spark.createDataFrame([(1, 2), (2, 2), (3, 2)], ["col1", "#duplicates"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 6 duplicate row(s). duplicate rows found for columns ['col1']",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+# Suite-level tests
+def test_suite_pandas_success_specific_columns():
+ """
+ Test the expectation suite for pandas DataFrame with no violations on specific columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"])
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 10, 10]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations_specific_columns():
+ """
+ Test the expectation suite for pandas DataFrame with violations on specific columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"])
+ data_frame = pd.DataFrame({"col1": [1, 1, 3], "col2": [10, 20, 30]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pandas_success_all_columns():
+ """
+ Test the expectation suite for pandas DataFrame with no violations on all columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[])
+ data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 20, 30]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations_all_columns():
+ """
+ Test the expectation suite for pandas DataFrame with violations on all columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[])
+ data_frame = pd.DataFrame({"col1": [1, 1, 3], "col2": [10, 10, 30]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success_specific_columns(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations on specific columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"])
+ data_frame = spark.createDataFrame([(1, 10), (2, 10), (3, 10)], ["col1", "col2"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations_specific_columns(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations on specific columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"])
+ data_frame = spark.createDataFrame([(1, 10), (1, 20), (3, 30)], ["col1", "col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success_all_columns(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations on all columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[])
+ data_frame = spark.createDataFrame([(1, 10), (2, 20), (3, 30)], ["col1", "col2"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations_all_columns(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations on all columns.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[])
+ data_frame = spark.createDataFrame([(1, 10), (1, 10), (3, 30)], ["col1", "col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pandas_column_missing_error():
+ """
+ Test that an error is raised when specified columns are missing in pandas DataFrame suite.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(
+ column_names=["nonexistent_col"]
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when specified columns are missing in PySpark DataFrame suite.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(
+ column_names=["nonexistent_col"]
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py
new file mode 100644
index 0000000..fb338a8
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py
@@ -0,0 +1,364 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name_and_description():
+ """Test that the expectation name and description are correctly returned."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="test_col",
+ min_value=10,
+ max_value=20,
+ )
+
+ # Test expectation name
+ assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", (
+ f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+ # Test description
+ description = expectation.get_description()
+ assert "maximum" in description, f"Expected 'maximum' in description: {description}"
+ assert "test_col" in description, f"Expected 'test_col' in description: {description}"
+ assert "10" in description, f"Expected '10' in description: {description}"
+ assert "20" in description, f"Expected '20' in description: {description}"
+
+
+def test_pandas_success_registry_and_suite():
+ """Test successful validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 30, 40, "basic success case"),
+ ([35], 30, 40, "single row"),
+ ([-20, -15, -10, -3], -5, 0, "negative values"),
+ ([1.1, 2.5, 3.7, 3.8], 3.5, 4.0, "float values"),
+ ([25, 25, 25, 25], 24, 26, "identical values"),
+ ([20, 25.5, 30, 37], 35, 40, "mixed data types"),
+ ([-5, 0, 0, -2], -1, 1, "zero values"),
+ ([20, None, 35, None, 25], 30, 40, "with nulls"),
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pandas_failure_registry_and_suite():
+ """Test failure validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_message)
+ (
+ [20, 25, 30, 35],
+ 40,
+ 50,
+ "Column 'col1' maximum value 35 is not between 40 and 50.",
+ ),
+ ([None, None, None], 30, 40, "Column 'col1' contains only null values."),
+ ([], 30, 40, "Column 'col1' contains only null values."),
+ ]
+
+ for data, min_val, max_val, expected_message in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for data {data}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pandas_missing_column_registry_and_suite():
+ """Test missing column error for pandas DataFrames through both registry and suite."""
+ data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]})
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="nonexistent_col",
+ min_value=30,
+ max_value=40,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="nonexistent_col", min_value=30, max_value=40
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_success_registry_and_suite(spark):
+ """Test successful validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 30, 40, "basic success case"),
+ ([35], 30, 40, "single row"),
+ ([-20, -15, -10, -3], -5, 0, "negative values"),
+ ([20, None, 35, None, 25], 30, 40, "with nulls"),
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pyspark_failure_registry_and_suite(spark):
+ """Test failure validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_message)
+ (
+ [20, 25, 30, 35],
+ 40,
+ 50,
+ "Column 'col1' maximum value 35 is not between 40 and 50.",
+ ),
+ ]
+
+ for data, min_val, max_val, expected_message in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_null_scenarios_registry_and_suite(spark):
+ """Test null scenarios for PySpark DataFrames through both registry and suite."""
+ from pyspark.sql.types import IntegerType, StructField, StructType
+
+ # Test scenarios
+ test_scenarios = [
+ # (data_frame_creation, expected_message, description)
+ (
+ lambda: spark.createDataFrame(
+ [{"col1": None}, {"col1": None}, {"col1": None}],
+ schema="struct",
+ ),
+ "Column 'col1' contains only null values.",
+ "all nulls",
+ ),
+ (
+ lambda: spark.createDataFrame(
+ [], StructType([StructField("col1", IntegerType(), True)])
+ ),
+ "Column 'col1' contains only null values.",
+ "empty dataframe",
+ ),
+ ]
+
+ for df_creator, expected_message, description in test_scenarios:
+ data_frame = df_creator()
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=30,
+ max_value=40,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="col1", min_value=30, max_value=40
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_missing_column_registry_and_suite(spark):
+ """Test missing column error for PySpark DataFrames through both registry and suite."""
+ data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"])
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="nonexistent_col",
+ min_value=30,
+ max_value=40,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_max_between(
+ column_name="nonexistent_col", min_value=30, max_value=40
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_boundary_values_both_dataframes(spark):
+ """Test boundary values for both pandas and PySpark DataFrames."""
+ test_data = [20, 25, 30, 35] # max = 35
+
+ # Test exact minimum boundary
+ for df_type, data_frame in [
+ ("pandas", pd.DataFrame({"col1": test_data})),
+ ("pyspark", spark.createDataFrame([(val,) for val in test_data], ["col1"])),
+ ]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=35, # exact minimum boundary
+ max_value=40,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Minimum boundary test failed for {df_type}: expected success but got {type(result)}"
+ )
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=30,
+ max_value=35, # exact maximum boundary
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Maximum boundary test failed for {df_type}: expected success but got {type(result)}"
+ )
+
+
+def test_suite_chaining():
+ """Test that the suite method returns self for method chaining."""
+ suite = DataFrameExpectationsSuite()
+ result = suite.expect_column_max_between(column_name="col1", min_value=30, max_value=40)
+ assert result is suite, f"Expected suite chaining to return same instance but got: {result}"
+
+
+def test_large_dataset_performance():
+ """Test the expectation with a larger dataset to ensure performance."""
+ import numpy as np
+
+ # Create a larger dataset with max around 60
+ large_data = np.random.uniform(10, 60, 1000).tolist()
+ data_frame = pd.DataFrame({"col1": large_data})
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMaxBetween",
+ column_name="col1",
+ min_value=55,
+ max_value=65,
+ )
+
+ result = expectation.validate(data_frame=data_frame)
+ # Should succeed as the max of uniform(10, 60) should be around 60
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Large dataset test failed: expected success but got {type(result)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py
new file mode 100644
index 0000000..7e25ffa
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py
@@ -0,0 +1,433 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name_and_description():
+ """Test that the expectation name and description are correctly returned."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="test_col",
+ min_value=10,
+ max_value=20,
+ )
+
+ # Test expectation name
+ assert expectation.get_expectation_name() == "ExpectationColumnMeanBetween", (
+ f"Expected 'ExpectationColumnMeanBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+ # Test description
+ description = expectation.get_description()
+ assert "mean" in description, f"Expected 'mean' in description: {description}"
+ assert "test_col" in description, f"Expected 'test_col' in description: {description}"
+ assert "10" in description, f"Expected '10' in description: {description}"
+ assert "20" in description, f"Expected '20' in description: {description}"
+
+
+def test_pandas_success_registry_and_suite():
+ """Test successful validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 25, 30, "basic success case"), # mean = 27.5
+ ([25], 20, 30, "single row"), # mean = 25
+ ([-20, -15, -10, -5], -15, -10, "negative values"), # mean = -12.5
+ ([1.1, 2.5, 3.7, 3.8], 2.5, 3.0, "float values"), # mean = 2.775
+ ([25, 25, 25, 25], 24, 26, "identical values"), # mean = 25
+ ([20, 25.5, 30, 37], 27, 29, "mixed data types"), # mean = 28.125
+ ([-5, 0, 0, 5], -2, 2, "with zeros"), # mean = 0
+ (
+ [20, None, 30, None, 40],
+ 25,
+ 35,
+ "with nulls",
+ ), # mean = 30 (nulls ignored)
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnMeanBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pandas_failure_registry_and_suite():
+ """Test failure validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_mean, description)
+ ([20, 25, 30, 35], 30, 35, 27.5, "mean too low"),
+ ([20, 25, 30, 35], 20, 25, 27.5, "mean too high"),
+ ([None, None, None], 25, 30, None, "all nulls"),
+ ([], 25, 30, None, "empty dataframe"),
+ ]
+
+ for data, min_val, max_val, expected_mean, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Determine expected message
+ if expected_mean is None:
+ expected_message = "Column 'col1' contains only null values."
+ else:
+ expected_message = (
+ f"Column 'col1' mean value {expected_mean} is not between {min_val} and {max_val}."
+ )
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pandas_missing_column_registry_and_suite():
+ """Test missing column error for pandas DataFrames through both registry and suite."""
+ data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]})
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="nonexistent_col",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="nonexistent_col", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_success_registry_and_suite(spark):
+ """Test successful validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 25, 30, "basic success case"), # mean = 27.5
+ ([25], 20, 30, "single row"), # mean = 25
+ ([-20, -15, -10, -5], -15, -10, "negative values"), # mean = -12.5
+ ([20, None, 30, None, 40], 25, 35, "with nulls"), # mean = 30
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnMeanBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pyspark_failure_registry_and_suite(spark):
+ """Test failure validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_mean, description)
+ ([20, 25, 30, 35], 30, 35, 27.5, "mean too low"),
+ ([20, 25, 30, 35], 20, 25, 27.5, "mean too high"),
+ ]
+
+ for data, min_val, max_val, expected_mean, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+ expected_message = (
+ f"Column 'col1' mean value {expected_mean} is not between {min_val} and {max_val}."
+ )
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_null_scenarios_registry_and_suite(spark):
+ """Test null scenarios for PySpark DataFrames through both registry and suite."""
+ from pyspark.sql.types import IntegerType, StructField, StructType
+
+ # Test scenarios
+ test_scenarios = [
+ # (data_frame_creation, expected_message, description)
+ (
+ lambda: spark.createDataFrame(
+ [{"col1": None}, {"col1": None}, {"col1": None}],
+ schema="struct",
+ ),
+ "Column 'col1' contains only null values.",
+ "all nulls",
+ ),
+ (
+ lambda: spark.createDataFrame(
+ [], StructType([StructField("col1", IntegerType(), True)])
+ ),
+ "Column 'col1' contains only null values.",
+ "empty dataframe",
+ ),
+ ]
+
+ for df_creator, expected_message, description in test_scenarios:
+ data_frame = df_creator()
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="col1", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_missing_column_registry_and_suite(spark):
+ """Test missing column error for PySpark DataFrames through both registry and suite."""
+ data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"])
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="nonexistent_col",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_mean_between(
+ column_name="nonexistent_col", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_boundary_values_both_dataframes(spark):
+ """Test boundary values for both pandas and PySpark DataFrames."""
+ test_data = [20, 25, 30, 35] # mean = 27.5
+
+ # Test boundary scenarios
+ boundary_tests = [
+ (27.5, 30, "exact minimum boundary"), # mean exactly at min
+ (25, 27.5, "exact maximum boundary"), # mean exactly at max
+ ]
+
+ for min_val, max_val, boundary_desc in boundary_tests:
+ for df_type, data_frame in [
+ ("pandas", pd.DataFrame({"col1": test_data})),
+ (
+ "pyspark",
+ spark.createDataFrame([(val,) for val in test_data], ["col1"]),
+ ),
+ ]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}"
+ )
+
+
+def test_precision_handling():
+ """Test mean calculation precision with various numeric types."""
+ # Test scenarios with different levels of precision
+ precision_tests = [
+ # (data, description)
+ ([1.1111, 2.2222, 3.3333], "high precision decimals"),
+ ([1, 2, 3, 4, 5, 6, 7, 8, 9], "integer sequence"),
+ ([0.1, 0.2, 0.3, 0.4, 0.5], "decimal sequence"),
+ ([1e-6, 2e-6, 3e-6], "scientific notation"),
+ ]
+
+ for data, description in precision_tests:
+ data_frame = pd.DataFrame({"col1": data})
+ calculated_mean = sum(data) / len(data)
+
+ # Use a range around the calculated mean
+ min_val = calculated_mean - 0.1
+ max_val = calculated_mean + 0.1
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Precision test failed for {description}: expected success but got {type(result)}"
+ )
+
+
+def test_suite_chaining():
+ """Test that the suite method returns self for method chaining."""
+ suite = DataFrameExpectationsSuite()
+ result = suite.expect_column_mean_between(column_name="col1", min_value=25, max_value=30)
+ assert result is suite, f"Expected suite chaining to return same instance but got: {result}"
+
+
+def test_large_dataset_performance():
+ """Test the expectation with a larger dataset to ensure performance."""
+ import numpy as np
+
+ # Create a larger dataset with mean around 50
+ large_data = np.random.normal(50, 10, 1000).tolist()
+ data_frame = pd.DataFrame({"col1": large_data})
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=45,
+ max_value=55,
+ )
+
+ result = expectation.validate(data_frame=data_frame)
+ # Should succeed as the mean of normal(50, 10) should be around 50
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Large dataset test failed: expected success but got {type(result)}"
+ )
+
+
+def test_outlier_handling(spark):
+ """Test mean calculation with outliers."""
+ # Test data with outliers
+ outlier_scenarios = [
+ # (data, min_val, max_val, description)
+ ([1, 2, 3, 100], 20, 30, "single high outlier"), # mean = 26.5
+ ([-100, 10, 20, 30], -15, -5, "single low outlier"), # mean = -10
+ ([1, 2, 3, 4, 5, 1000], 150, 200, "extreme outlier"), # mean ≈ 169.17
+ ]
+
+ for data, min_val, max_val, description in outlier_scenarios:
+ # Test with pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMeanBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas outlier test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test with PySpark
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py
new file mode 100644
index 0000000..786cfb5
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py
@@ -0,0 +1,511 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name_and_description():
+ """Test that the expectation name and description are correctly returned."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="test_col",
+ min_value=10,
+ max_value=20,
+ )
+
+ # Test expectation name (should delegate to quantile expectation)
+ assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", (
+ f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+ # Test description
+ description = expectation.get_description()
+ assert "median" in description, f"Expected 'median' in description: {description}"
+ assert "test_col" in description, f"Expected 'test_col' in description: {description}"
+ assert "10" in description, f"Expected '10' in description: {description}"
+ assert "20" in description, f"Expected '20' in description: {description}"
+
+ # Test that quantile is correctly set to 0.5
+ assert expectation.quantile == 0.5, (
+ f"Expected quantile to be 0.5 but got: {expectation.quantile}"
+ )
+ assert expectation.quantile_desc == "median", (
+ f"Expected quantile_desc to be 'median' but got: {expectation.quantile_desc}"
+ )
+
+
+def test_pandas_success_registry_and_suite():
+ """Test successful validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 25, 30, "basic success case"), # median = 27.5
+ ([25], 20, 30, "single row"), # median = 25
+ ([-20, -15, -10, -5], -15, -10, "negative values"), # median = -12.5
+ ([1.1, 2.5, 3.7, 3.8], 2.5, 3.5, "float values"), # median = 3.1
+ ([25, 25, 25, 25], 24, 26, "identical values"), # median = 25
+ ([20, 25.5, 30, 37], 27, 29, "mixed data types"), # median = 27.75
+ ([-5, 0, 0, 5], -1, 1, "with zeros"), # median = 0
+ (
+ [20, None, 30, None, 40],
+ 25,
+ 35,
+ "with nulls",
+ ), # median = 30 (nulls ignored)
+ ([10, 20, 30], 19, 21, "odd number of values"), # median = 20
+ ([10, 20, 30, 40], 24, 26, "even number of values"), # median = 25
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pandas_failure_registry_and_suite():
+ """Test failure validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_median, description)
+ ([20, 25, 30, 35], 30, 35, 27.5, "median too low"),
+ ([20, 25, 30, 35], 20, 25, 27.5, "median too high"),
+ ([10, 20, 30], 25, 30, 20.0, "odd count median out of range"),
+ ([None, None, None], 25, 30.0, None, "all nulls"),
+ ([], 25, 30.0, None, "empty dataframe"),
+ ]
+
+ for data, min_val, max_val, expected_median, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Determine expected message
+ if expected_median is None:
+ expected_message = "Column 'col1' contains only null values."
+ else:
+ expected_message = f"Column 'col1' median value {expected_median} is not between {min_val} and {max_val}."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pandas_missing_column_registry_and_suite():
+ """Test missing column error for pandas DataFrames through both registry and suite."""
+ data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]})
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="nonexistent_col",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure)
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="nonexistent_col", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_success_registry_and_suite(spark):
+ """Test successful validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 25, 30, "basic success case"), # median ≈ 27.5
+ ([25], 20, 30, "single row"), # median = 25
+ ([-20, -15, -10, -5], -15, -10, "negative values"), # median ≈ -12.5
+ ([20, None, 30, None, 40], 25, 35, "with nulls"), # median ≈ 30
+ ([10, 20, 30], 19, 21, "odd number of values"), # median ≈ 20
+ ([10, 20, 30, 40], 24, 26, "even number of values"), # median ≈ 25
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pyspark_failure_registry_and_suite(spark):
+ """Test failure validation for PySpark DataFrames through both registry and suite."""
+ import numpy as np
+
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 30, 35, "median too low"),
+ ([20, 25, 30, 35], 20, 25, "median too high"),
+ ([10, 20, 30], 25, 30, "odd count median out of range"),
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Calculate expected median for error message
+ expected_median = np.median(data)
+ expected_message = (
+ f"Column 'col1' median value {expected_median} is not between {min_val} and {max_val}."
+ )
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_null_scenarios_registry_and_suite(spark):
+ """Test null scenarios for PySpark DataFrames through both registry and suite."""
+ from pyspark.sql.types import IntegerType, StructField, StructType
+
+ # Test scenarios
+ test_scenarios = [
+ # (data_frame_creation, expected_message, description)
+ (
+ lambda: spark.createDataFrame(
+ [{"col1": None}, {"col1": None}, {"col1": None}],
+ schema="struct",
+ ),
+ "Column 'col1' contains only null values.",
+ "all nulls",
+ ),
+ (
+ lambda: spark.createDataFrame(
+ [], StructType([StructField("col1", IntegerType(), True)])
+ ),
+ "Column 'col1' contains only null values.",
+ "empty dataframe",
+ ),
+ ]
+
+ for df_creator, expected_message, description in test_scenarios:
+ data_frame = df_creator()
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="col1", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_missing_column_registry_and_suite(spark):
+ """Test missing column error for PySpark DataFrames through both registry and suite."""
+ data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"])
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="nonexistent_col",
+ min_value=25,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_median_between(
+ column_name="nonexistent_col", min_value=25, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_boundary_values_both_dataframes(spark):
+ """Test boundary values for both pandas and PySpark DataFrames."""
+ test_data = [20, 25, 30, 35] # median = 27.5
+
+ # Test boundary scenarios
+ boundary_tests = [
+ (27.5, 30, "exact minimum boundary"), # median exactly at min
+ (25, 27.5, "exact maximum boundary"), # median exactly at max
+ ]
+
+ for min_val, max_val, boundary_desc in boundary_tests:
+ for df_type, data_frame in [
+ ("pandas", pd.DataFrame({"col1": test_data})),
+ (
+ "pyspark",
+ spark.createDataFrame([(val,) for val in test_data], ["col1"]),
+ ),
+ ]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}"
+ )
+
+
+def test_median_calculation_specifics(spark):
+ """Test median calculation specifics for odd vs even number of elements."""
+ median_scenarios = [
+ # (data, expected_median, description)
+ ([1, 2, 3], 2, "odd count - middle element"),
+ ([1, 2, 3, 4], 2.5, "even count - average of middle two"),
+ ([5], 5, "single element"),
+ ([10, 10, 10], 10, "all identical values"),
+ ([1, 100], 50.5, "two elements - average"),
+ ([1, 2, 100], 2, "odd count with outlier"),
+ ([1, 2, 99, 100], 50.5, "even count with outliers"),
+ ]
+
+ for data, expected_median, description in median_scenarios:
+ # Set bounds around expected median
+ min_val = expected_median - 0.1
+ max_val = expected_median + 0.1
+
+ # Test pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas median test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test PySpark (for non-single element cases)
+ if len(data) > 1:
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark median test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
+
+
+def test_precision_handling():
+ """Test median calculation precision with various numeric types."""
+ # Test scenarios with different levels of precision
+ precision_tests = [
+ # (data, description)
+ ([1.1111, 2.2222, 3.3333], "high precision decimals"),
+ ([0.1, 0.2, 0.3, 0.4, 0.5], "decimal sequence"),
+ ([1e-6, 2e-6, 3e-6, 4e-6, 5e-6], "scientific notation"),
+ ([1.0, 1.5, 2.0, 2.5, 3.0], "half increments"),
+ ]
+
+ for data, description in precision_tests:
+ data_frame = pd.DataFrame({"col1": data})
+ import numpy as np
+
+ calculated_median = np.median(data)
+
+ # Use a small range around the calculated median
+ min_val = calculated_median - 0.001
+ max_val = calculated_median + 0.001
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Precision test failed for {description}: expected success but got {type(result)}"
+ )
+
+
+def test_suite_chaining():
+ """Test that the suite method returns self for method chaining."""
+ suite = DataFrameExpectationsSuite()
+ result = suite.expect_column_median_between(column_name="col1", min_value=25, max_value=30)
+ assert result is suite, f"Expected suite chaining to return same instance but got: {result}"
+
+
+def test_large_dataset_performance():
+ """Test the expectation with a larger dataset to ensure performance."""
+ import numpy as np
+
+ # Create a larger dataset with median around 50
+ large_data = np.random.normal(50, 10, 1001).tolist() # Use odd count for deterministic median
+ data_frame = pd.DataFrame({"col1": large_data})
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=45,
+ max_value=55,
+ )
+
+ result = expectation.validate(data_frame=data_frame)
+ # Should succeed as the median of normal(50, 10) should be around 50
+ assert isinstance(result, DataFrameExpectationSuccessMessage)
+
+
+def test_outlier_resistance(spark):
+ """Test that median is resistant to outliers (unlike mean)."""
+ # Test data where median is stable despite extreme outliers
+ outlier_scenarios = [
+ # (data, min_val, max_val, description)
+ (
+ [1, 2, 3, 1000],
+ 1.5,
+ 2.5,
+ "high outlier doesn't affect median",
+ ), # median = 2.5
+ (
+ [-1000, 10, 20, 30],
+ 14,
+ 16,
+ "low outlier doesn't affect median",
+ ), # median = 15
+ (
+ [1, 2, 3, 4, 5, 1000000],
+ 2.5,
+ 3.5,
+ "extreme outlier ignored",
+ ), # median = 3.5
+ (
+ [-1000000, 1, 2, 3, 4, 5],
+ 2.5,
+ 3.5,
+ "extreme negative outlier ignored",
+ ), # median = 2.5
+ ]
+
+ for data, min_val, max_val, description in outlier_scenarios:
+ # Test with pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMedianBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas outlier test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test with PySpark
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py
new file mode 100644
index 0000000..fe438f2
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py
@@ -0,0 +1,480 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name_and_description():
+ """Test that the expectation name and description are correctly returned."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="test_col",
+ min_value=10,
+ max_value=20,
+ )
+
+ # Test expectation name (should delegate to quantile expectation)
+ assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", (
+ f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+ # Test description
+ description = expectation.get_description()
+ assert "minimum" in description, f"Expected 'minimum' in description: {description}"
+ assert "test_col" in description, f"Expected 'test_col' in description: {description}"
+ assert "10" in description, f"Expected '10' in description: {description}"
+ assert "20" in description, f"Expected '20' in description: {description}"
+
+ # Test that quantile is correctly set to 0.0
+ assert expectation.quantile == 0.0, (
+ f"Expected quantile to be 0.0 but got: {expectation.quantile}"
+ )
+ assert expectation.quantile_desc == "minimum", (
+ f"Expected quantile_desc to be 'minimum' but got: {expectation.quantile_desc}"
+ )
+
+
+def test_pandas_success_registry_and_suite():
+ """Test successful validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 15, 25, "basic success case"), # min = 20
+ ([25], 20, 30, "single row"), # min = 25
+ ([-20, -15, -10, -5], -25, -15, "negative values"), # min = -20
+ ([1.1, 2.5, 3.7, 3.8], 1.0, 1.5, "float values"), # min = 1.1
+ ([25, 25, 25, 25], 24, 26, "identical values"), # min = 25
+ ([20, 25.5, 30, 37], 15, 25, "mixed data types"), # min = 20
+ ([-5, 0, 0, 2], -10, -1, "with zeros"), # min = -5
+ (
+ [20, None, 35, None, 25],
+ 15,
+ 25,
+ "with nulls",
+ ), # min = 20 (nulls ignored)
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pandas_failure_registry_and_suite():
+ """Test failure validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_min, description)
+ ([20, 25, 30, 35], 25, 35, 20, "minimum too low"),
+ ([20, 25, 30, 35], 10, 15, 20, "minimum too high"),
+ ([None, None, None], 15, 25, None, "all nulls"),
+ ([], 15, 25, None, "empty dataframe"),
+ ]
+
+ for data, min_val, max_val, expected_min, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Determine expected message
+ if expected_min is None:
+ expected_message = "Column 'col1' contains only null values."
+ else:
+ expected_message = f"Column 'col1' minimum value {expected_min} is not between {min_val} and {max_val}."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pandas_missing_column_registry_and_suite():
+ """Test missing column error for pandas DataFrames through both registry and suite."""
+ data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]})
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="nonexistent_col",
+ min_value=15,
+ max_value=25,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="nonexistent_col", min_value=15, max_value=25
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_success_registry_and_suite(spark):
+ """Test successful validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, description)
+ ([20, 25, 30, 35], 15, 25, "basic success case"), # min = 20
+ ([25], 20, 30, "single row"), # min = 25
+ ([-20, -15, -10, -5], -25, -15, "negative values"), # min = -20
+ ([20, None, 35, None, 25], 15, 25, "with nulls"), # min = 20
+ ]
+
+ for data, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pyspark_failure_registry_and_suite(spark):
+ """Test failure validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios
+ test_scenarios = [
+ # (data, min_value, max_value, expected_min, description)
+ ([20, 25, 30, 35], 25, 35, 20, "minimum too low"),
+ ([20, 25, 30, 35], 10, 15, 20, "minimum too high"),
+ ]
+
+ for data, min_val, max_val, expected_min, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+ expected_message = (
+ f"Column 'col1' minimum value {expected_min} is not between {min_val} and {max_val}."
+ )
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="col1", min_value=min_val, max_value=max_val
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_null_scenarios_registry_and_suite(spark):
+ """Test null scenarios for PySpark DataFrames through both registry and suite."""
+ from pyspark.sql.types import IntegerType, StructField, StructType
+
+ # Test scenarios
+ test_scenarios = [
+ # (data_frame_creation, expected_message, description)
+ (
+ lambda: spark.createDataFrame(
+ [{"col1": None}, {"col1": None}, {"col1": None}],
+ schema="struct",
+ ),
+ "Column 'col1' contains only null values.",
+ "all nulls",
+ ),
+ (
+ lambda: spark.createDataFrame(
+ [], StructType([StructField("col1", IntegerType(), True)])
+ ),
+ "Column 'col1' contains only null values.",
+ "empty dataframe",
+ ),
+ ]
+
+ for df_creator, expected_message, description in test_scenarios:
+ data_frame = df_creator()
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=15,
+ max_value=25,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="col1", min_value=15, max_value=25
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_missing_column_registry_and_suite(spark):
+ """Test missing column error for PySpark DataFrames through both registry and suite."""
+ data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"])
+ expected_message = "Column 'nonexistent_col' does not exist in the DataFrame."
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="nonexistent_col",
+ min_value=15,
+ max_value=25,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_min_between(
+ column_name="nonexistent_col", min_value=15, max_value=25
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_boundary_values_both_dataframes(spark):
+ """Test boundary values for both pandas and PySpark DataFrames."""
+ test_data = [20, 25, 30, 35] # min = 20
+
+ # Test boundary scenarios
+ boundary_tests = [
+ (20, 25, "exact minimum boundary"), # min exactly at min
+ (15, 20, "exact maximum boundary"), # min exactly at max
+ ]
+
+ for min_val, max_val, boundary_desc in boundary_tests:
+ for df_type, data_frame in [
+ ("pandas", pd.DataFrame({"col1": test_data})),
+ (
+ "pyspark",
+ spark.createDataFrame([(val,) for val in test_data], ["col1"]),
+ ),
+ ]:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}"
+ )
+
+
+def test_minimum_specific_scenarios(spark):
+ """Test minimum-specific scenarios including edge cases."""
+ min_scenarios = [
+ # (data, expected_min, description)
+ ([100, 50, 75, 25], 25, "minimum with mixed order"),
+ ([0, 1, 2, 3], 0, "minimum is zero"),
+ ([-10, -5, -1, -20], -20, "minimum with negatives"),
+ ([1.001, 1.002, 1.003], 1.001, "minimum with small differences"),
+ ([1e6, 1e5, 1e4], 1e4, "minimum with large numbers"),
+ ([1e-6, 1e-5, 1e-4], 1e-6, "minimum with very small numbers"),
+ ]
+
+ for data, expected_min, description in min_scenarios:
+ # Set bounds around expected minimum
+ min_val = expected_min - 0.1
+ max_val = expected_min + 0.1
+
+ # Test pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas minimum test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test PySpark
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark minimum test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
+
+
+def test_suite_chaining():
+ """Test that the suite method returns self for method chaining."""
+ suite = DataFrameExpectationsSuite()
+ result = suite.expect_column_min_between(column_name="col1", min_value=15, max_value=25)
+ assert result is suite, f"Expected suite chaining to return same instance but got: {result}"
+
+
+def test_large_dataset_performance():
+ """Test the expectation with a larger dataset to ensure performance."""
+ import numpy as np
+
+ # Create a larger dataset with minimum around 10
+ large_data = np.random.uniform(10, 60, 1000).tolist()
+ data_frame = pd.DataFrame({"col1": large_data})
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=9,
+ max_value=12,
+ )
+
+ result = expectation.validate(data_frame=data_frame)
+ # Should succeed as the minimum of uniform(10, 60) should be around 10
+ assert isinstance(result, DataFrameExpectationSuccessMessage)
+
+
+def test_outlier_impact_on_minimum(spark):
+ """Test how outliers affect minimum values (unlike median, minimum is sensitive to outliers)."""
+ # Test data where outliers affect the minimum
+ outlier_scenarios = [
+ # (data, min_val, max_val, description)
+ ([1, 2, 3, -1000], -1100, -900, "extreme low outlier becomes minimum"),
+ ([100, 200, 300, 50], 40, 60, "outlier changes minimum significantly"),
+ ([1.5, 2.0, 2.5, 0.1], 0.05, 0.15, "small outlier affects minimum"),
+ ]
+
+ for data, min_val, max_val, description in outlier_scenarios:
+ # Test with pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas outlier test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test with PySpark
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
+
+
+def test_edge_case_single_unique_value(spark):
+ """Test minimum when all values are the same."""
+ # When all values are identical, min = max = that value
+ identical_scenarios = [
+ ([42, 42, 42, 42], "integer repetition"),
+ ([3.14, 3.14, 3.14], "float repetition"),
+ ([-7, -7, -7, -7, -7], "negative repetition"),
+ ([0, 0, 0], "zero repetition"),
+ ]
+
+ for data, description in identical_scenarios:
+ expected_value = data[0] # All values are the same
+ min_val = expected_value - 0.1
+ max_val = expected_value + 0.1
+
+ # Test pandas
+ data_frame = pd.DataFrame({"col1": data})
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnMinBetween",
+ column_name="col1",
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Pandas identical values test failed for {description}: expected success but got {type(result)}"
+ )
+
+ # Test PySpark
+ pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"])
+ result_pyspark = expectation.validate(data_frame=pyspark_df)
+ assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), (
+ f"PySpark identical values test failed for {description}: expected success but got {type(result_pyspark)}"
+ )
diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py
new file mode 100644
index 0000000..d2e59d2
--- /dev/null
+++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py
@@ -0,0 +1,393 @@
+import pytest
+import numpy as np
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name_and_description():
+ """Test that the expectation name and description are correctly returned."""
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="test_col",
+ quantile=0.5,
+ min_value=20,
+ max_value=30,
+ )
+
+ # Test expectation name
+ assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", (
+ f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+ # Test description messages for different quantiles
+ test_cases = [
+ (0.0, "minimum"),
+ (0.25, "25th percentile"),
+ (0.5, "median"),
+ (0.75, "75th percentile"),
+ (1.0, "maximum"),
+ (0.9, "0.9 quantile"),
+ ]
+
+ for quantile, expected_desc in test_cases:
+ exp = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="test_col",
+ quantile=quantile,
+ min_value=10,
+ max_value=20,
+ )
+ assert exp.quantile_desc == expected_desc, (
+ f"Expected quantile_desc '{expected_desc}' for quantile {quantile} but got: {exp.quantile_desc}"
+ )
+ assert expected_desc in exp.get_description(), (
+ f"Expected '{expected_desc}' in description: {exp.get_description()}"
+ )
+
+
+def test_pandas_success_registry_and_suite():
+ """Test successful validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios for different quantiles
+ test_scenarios = [
+ # (data, quantile, min_value, max_value, description)
+ ([20, 25, 30, 35], 0.0, 15, 25, "minimum success"), # min = 20
+ ([20, 25, 30, 35], 1.0, 30, 40, "maximum success"), # max = 35
+ ([20, 25, 30, 35], 0.5, 25, 30, "median success"), # median = 27.5
+ ([20, 25, 30, 35], 0.25, 20, 25, "25th percentile success"), # 25th = 22.5
+ ([10, 20, 30, 40, 50], 0.33, 20, 30, "33rd percentile success"), # ~23.2
+ ([25], 0.5, 20, 30, "single row median"), # median = 25
+ ([20, None, 25, None, 30], 0.5, 20, 30, "with nulls median"), # median = 25
+ ]
+
+ for data, quantile, min_val, max_val, description in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_quantile_between(
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pandas_failure_registry_and_suite():
+ """Test failure validation for pandas DataFrames through both registry and suite."""
+ # Test data scenarios for different quantiles
+ test_scenarios = [
+ # (data, quantile, min_value, max_value, expected_message)
+ (
+ [20, 25, 30, 35],
+ 0.0,
+ 25,
+ 35,
+ "Column 'col1' minimum value 20 is not between 25 and 35.",
+ ),
+ (
+ [20, 25, 30, 35],
+ 1.0,
+ 40,
+ 50,
+ "Column 'col1' maximum value 35 is not between 40 and 50.",
+ ),
+ (
+ [20, 25, 30, 35],
+ 0.5,
+ 30,
+ 35,
+ "Column 'col1' median value 27.5 is not between 30 and 35.",
+ ),
+ (
+ [20, 25, 30, 35],
+ 0.75,
+ 25,
+ 30,
+ f"Column 'col1' 75th percentile value {np.quantile([20, 25, 30, 35], 0.75)} is not between 25 and 30.",
+ ),
+ (
+ [None, None, None],
+ 0.5,
+ 20,
+ 30,
+ "Column 'col1' contains only null values.",
+ ),
+ ([], 0.5, 20, 30, "Column 'col1' contains only null values."),
+ ]
+
+ for data, quantile, min_val, max_val, expected_message in test_scenarios:
+ data_frame = pd.DataFrame({"col1": data})
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for quantile {quantile}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_quantile_between(
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_success_registry_and_suite(spark):
+ """Test successful validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios for different quantiles
+ test_scenarios = [
+ # (data, quantile, min_value, max_value, description)
+ ([20, 25, 30, 35], 0.0, 15, 25, "minimum success"), # min = 20
+ ([20, 25, 30, 35], 1.0, 30, 40, "maximum success"), # max = 35
+ ([20, 25, 30, 35], 0.5, 25, 30, "median success"), # median ≈ 27.5
+ ([20, 25, 30, 35], 0.9, 30, 40, "90th percentile success"), # ≈ 34
+ ([25], 0.5, 20, 30, "single row median"), # median = 25
+ ([20, None, 25, None, 30], 0.5, 20, 30, "with nulls median"), # median ≈ 25
+ ]
+
+ for data, quantile, min_val, max_val, description in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween")
+ ), f"Registry test failed for {description}: expected success but got {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_quantile_between(
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ suite_result = suite.run(data_frame=data_frame)
+ assert suite_result is None, (
+ f"Suite test failed for {description}: expected None but got {suite_result}"
+ )
+
+
+def test_pyspark_failure_registry_and_suite(spark):
+ """Test failure validation for PySpark DataFrames through both registry and suite."""
+ # Test data scenarios for different quantiles
+ test_scenarios = [
+ # (data, quantile, min_value, max_value, expected_message)
+ (
+ [20, 25, 30, 35],
+ 0.0,
+ 25,
+ 35,
+ "Column 'col1' minimum value 20 is not between 25 and 35.",
+ ),
+ (
+ [20, 25, 30, 35],
+ 1.0,
+ 40,
+ 50,
+ "Column 'col1' maximum value 35 is not between 40 and 50.",
+ ),
+ (
+ [20, 25, 30, 35],
+ 0.5,
+ 30,
+ 35,
+ f"Column 'col1' median value {np.median([20, 25, 30, 35])} is not between 30 and 35.",
+ ),
+ ]
+
+ for data, quantile, min_val, max_val, expected_message in test_scenarios:
+ data_frame = spark.createDataFrame([(val,) for val in data], ["col1"])
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), f"Expected failure message but got: {result}"
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_quantile_between(
+ column_name="col1",
+ quantile=quantile,
+ min_value=min_val,
+ max_value=max_val,
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_pyspark_null_scenarios_registry_and_suite(spark):
+ """Test null scenarios for PySpark DataFrames through both registry and suite."""
+ from pyspark.sql.types import IntegerType, StructField, StructType
+
+ # Test scenarios
+ test_scenarios = [
+ # (data_frame_creation, expected_message, description)
+ (
+ lambda: spark.createDataFrame(
+ [{"col1": None}, {"col1": None}, {"col1": None}],
+ schema="struct",
+ ),
+ "Column 'col1' contains only null values.",
+ "all nulls",
+ ),
+ (
+ lambda: spark.createDataFrame(
+ [], StructType([StructField("col1", IntegerType(), True)])
+ ),
+ "Column 'col1' contains only null values.",
+ "empty dataframe",
+ ),
+ ]
+
+ for df_creator, expected_message, description in test_scenarios:
+ data_frame = df_creator()
+
+ # Test through registry
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=0.5,
+ min_value=20,
+ max_value=30,
+ )
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ message=expected_message,
+ )
+ assert str(result) == str(expected_failure), (
+ f"Registry test failed for {description}: expected failure message but got {result}"
+ )
+
+ # Test through suite
+ suite = DataFrameExpectationsSuite().expect_column_quantile_between(
+ column_name="col1", quantile=0.5, min_value=20, max_value=30
+ )
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_frame)
+
+
+def test_invalid_quantile_range():
+ """Test that invalid quantile values raise ValueError."""
+ invalid_quantiles = [
+ (1.5, "greater than 1.0"),
+ (-0.1, "less than 0.0"),
+ ]
+
+ for invalid_quantile, description in invalid_quantiles:
+ with pytest.raises(ValueError) as context:
+ DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=invalid_quantile,
+ min_value=20,
+ max_value=30,
+ )
+ assert "Quantile must be between 0.0 and 1.0" in str(context.value), (
+ f"Expected quantile validation error for {description} but got: {str(context.value)}"
+ )
+
+
+def test_boundary_quantile_values():
+ """Test quantile values at the boundaries (0.0 and 1.0)."""
+ boundary_cases = [
+ (0.0, "minimum"),
+ (1.0, "maximum"),
+ ]
+
+ for quantile, expected_desc in boundary_cases:
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=quantile,
+ min_value=15,
+ max_value=25,
+ )
+ assert expectation.quantile == quantile, (
+ f"Expected quantile {quantile} but got: {expectation.quantile}"
+ )
+ assert expectation.quantile_desc == expected_desc, (
+ f"Expected quantile_desc '{expected_desc}' but got: {expectation.quantile_desc}"
+ )
+
+
+def test_large_dataset_performance():
+ """Test the expectation with a larger dataset to ensure performance."""
+ # Create a larger dataset
+ large_data = np.random.normal(50, 10, 1000).tolist()
+ data_frame = pd.DataFrame({"col1": large_data})
+
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationColumnQuantileBetween",
+ column_name="col1",
+ quantile=0.5, # median
+ min_value=45,
+ max_value=55,
+ )
+
+ result = expectation.validate(data_frame=data_frame)
+ # Should succeed as the median of normal(50, 10) should be around 50
+ assert isinstance(result, DataFrameExpectationSuccessMessage), (
+ f"Large dataset test failed: expected success but got {type(result)}"
+ )
diff --git a/tests/expectations_implemented/column_expectations/__init__.py b/tests/expectations_implemented/column_expectations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/__init__.py b/tests/expectations_implemented/column_expectations/any_value_expectations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py
new file mode 100644
index 0000000..6e6618e
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py
@@ -0,0 +1,193 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueEquals", (
+ f"Expected 'ExpectationValueEquals' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [5, 5, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ """
+ Test the expectation for pandas DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [3, 4]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not equal to 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(3,), (4,)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not equal to 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ """
+ Test that an error is raised when the specified column is missing.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col2": [5, 5, 5]})
+
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_equals(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col1": [5, 5, 5]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_equals(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ """
+ Test that an error is raised when the specified column is missing in PySpark DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py
new file mode 100644
index 0000000..3621f0a
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueIn", (
+ f"Expected 'ExpectationValueIn' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueIn")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col1": [1, 4, 5, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [4, 5]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not in [1, 2, 3].",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueIn")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = spark.createDataFrame([(1,), (4,), (5,), (2,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(4,), (5,)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not in [1, 2, 3].",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = pd.DataFrame({"col1": [1, 4, 5, 2, 3]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(1,), (4,), (5,), (2,), (3,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py
new file mode 100644
index 0000000..2fff4b9
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueNotEquals", (
+ f"Expected 'ExpectationValueNotEquals' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 6]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 5, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [5, 5]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is equal to 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (6,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = spark.createDataFrame([(3,), (5,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(5,), (5,)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is equal to 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotEquals",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col2": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 6]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col1": [3, 5, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (6,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(3,), (5,), (5,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py
new file mode 100644
index 0000000..ef40723
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueNotIn", (
+ f"Expected 'ExpectationValueNotIn' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col1": [4, 5, 6, 7]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotIn")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [1, 2]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is in [1, 2, 3].",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = spark.createDataFrame([(4,), (5,), (6,), (7,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotIn")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(1,), (2,)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is in [1, 2, 3].",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotIn",
+ column_name="col1",
+ values=[1, 2, 3],
+ )
+ data_frame = pd.DataFrame({"col2": [4, 5, 6]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = pd.DataFrame({"col1": [4, 5, 6, 7]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 4, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(4,), (5,), (6,), (7,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (4,), (5,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_in(
+ column_name="col1", values=[1, 2, 3]
+ )
+ data_frame = spark.createDataFrame([(4,), (5,), (6,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py
new file mode 100644
index 0000000..2f5afe3
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py
@@ -0,0 +1,142 @@
+import pytest
+import numpy as np
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueNotNull", (
+ f"Expected 'ExpectationValueNotNull' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotNull")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col1": [1, None, np.nan]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [None, np.nan]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is null.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotNull")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ data_frame = spark.createDataFrame([(1,), (None,), (None,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(None,), (None,)], "col1: int")
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is null.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNotNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col2": [1, 2, 3]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1")
+ data_frame = pd.DataFrame({"col1": [1, 2, 3]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1")
+ data_frame = pd.DataFrame({"col1": [1, None, np.nan]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1")
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1")
+ data_frame = spark.createDataFrame([(1,), (None,), (None,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1")
+ data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py
new file mode 100644
index 0000000..7cf276f
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py
@@ -0,0 +1,142 @@
+import pytest
+import numpy as np
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueNull", (
+ f"Expected 'ExpectationValueNull' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col1": [None, np.nan, None]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNull")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col1": [None, 1, 2]}, dtype="Int64")
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [1, 2]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not null.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int")
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNull")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ data_frame = spark.createDataFrame([(None,), (1,), (2,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(1,), (2,)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not null.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueNull",
+ column_name="col1",
+ )
+ data_frame = pd.DataFrame({"col2": [None, None, None]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1")
+ data_frame = pd.DataFrame({"col1": [None, None, None]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1")
+ data_frame = pd.DataFrame({"col1": [None, 1, 2]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1")
+ data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int")
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1")
+ data_frame = spark.createDataFrame([(None,), (1,), (2,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1")
+ data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col2: int")
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/__init__.py b/tests/expectations_implemented/column_expectations/numerical_expectations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py
new file mode 100644
index 0000000..dbde80b
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py
@@ -0,0 +1,163 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueBetween", (
+ f"Expected 'ExpectationValueBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [2, 3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueBetween")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 6]})
+ expected_violations = pd.DataFrame({"col1": [1, 6]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not between 2 and 5.",
+ limit_violations=5,
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = spark.createDataFrame([(2,), (3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueBetween")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (6,)], ["col1"])
+ expected_violations = spark.createDataFrame([(1,), (6,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' is not between 2 and 5.",
+ limit_violations=5,
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueBetween",
+ column_name="col1",
+ min_value=2,
+ max_value=5,
+ )
+ data_frame = pd.DataFrame({"col2": [2, 3, 4]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = pd.DataFrame({"col1": [2, 3, 4, 5]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = pd.DataFrame({"col1": [1, 2, 3, 6]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = spark.createDataFrame([(2,), (3,), (4,), (5,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_value_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = spark.createDataFrame([(1,), (2,), (3,), (6,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_column_missing_error():
+ expectations_suite = DataFrameExpectationsSuite().expect_value_between(
+ column_name="col1", min_value=2, max_value=5
+ )
+ data_frame = pd.DataFrame({"col2": [2, 3, 4]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py
new file mode 100644
index 0000000..19c5d9e
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py
@@ -0,0 +1,196 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=2,
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueGreaterThan", (
+ f"Expected 'ExpectationValueGreaterThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the greater than expectation for pandas dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=2,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueGreaterThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ """
+ Test the greater than expectation for pandas dataframe with violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=3,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [3]})
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' is not greater than 3.",
+ limit_violations=5,
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the greater than expectation for pyspark dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=2,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueGreaterThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ """
+ Test the greater than expectation for pyspark dataframe with violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=3,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(3,)], ["col1"])
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' is not greater than 3.",
+ limit_violations=5,
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test the error when the specified column is missing in the dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueGreaterThan",
+ column_name="col1",
+ value=2,
+ )
+ data_frame = pd.DataFrame({"col2": [3, 4, 5]})
+
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the greater than expectation for pandas dataframe with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than(
+ column_name="col1", value=2
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the greater than expectation for pandas dataframe with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than(
+ column_name="col1", value=3
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the greater than expectation for pyspark dataframe with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than(
+ column_name="col1", value=2
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the greater than expectation for pyspark dataframe with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than(
+ column_name="col1", value=3
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_column_missing_error():
+ """
+ Test the greater than expectation for dataframe with missing column.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than(
+ column_name="col1", value=2
+ )
+ data_frame = pd.DataFrame({"col2": [3, 4, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py
new file mode 100644
index 0000000..e1f4292
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py
@@ -0,0 +1,198 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=2,
+ )
+ assert expectation.get_expectation_name() == "ExpectationValueLessThan", (
+ f"Expected 'ExpectationValueLessThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the less than expectation for pandas dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=6,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueLessThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ """
+ Test the less than expectation for pandas dataframe with violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": [5]})
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' is not less than 5.",
+ limit_violations=5,
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the less than expectation for pyspark dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=6,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueLessThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ """
+ Test the less than expectation for pyspark dataframe with violations.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([(5,)], ["col1"])
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' is not less than 5.",
+ limit_violations=5,
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_column_missing_error():
+ """
+ Test the error when the specified column is missing in the dataframe.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationValueLessThan",
+ column_name="col1",
+ value=5,
+ )
+ data_frame = pd.DataFrame({"col2": [3, 4, 5]})
+
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_less_than(
+ column_name="col1", value=6
+ )
+
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation for pandas DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_less_than(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col1": [3, 4, 5]})
+
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_less_than(
+ column_name="col1", value=6
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_less_than(
+ column_name="col1", value=5
+ )
+ data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_column_missing_error():
+ """
+ Test the error when the specified column is missing in the DataFrame.
+ """
+ expectations_suite = DataFrameExpectationsSuite().expect_value_less_than(
+ column_name="col1", value=5
+ )
+ data_frame = pd.DataFrame({"col2": [3, 4, 5]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/__init__.py b/tests/expectations_implemented/column_expectations/string_expectations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py
new file mode 100644
index 0000000..808c481
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringContains", (
+ f"Expected 'ExpectationStringContains' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "barfoo"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringContains")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["bar", "baz"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' does not contain 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringContains")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("bar",), ("baz",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' does not contain 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col2": ["foobar", "foo123", "barfoo"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "barfoo"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py
new file mode 100644
index 0000000..e2f3ca9
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringEndsWith", (
+ f"Expected 'ExpectationStringEndsWith' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbar"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringEndsWith")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["baz"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' does not end with 'bar'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringEndsWith")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("baz",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 1 row(s) where 'col1' does not end with 'bar'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringEndsWith",
+ column_name="col1",
+ suffix="bar",
+ )
+ data_frame = pd.DataFrame({"col2": ["foobar", "bar", "bazbar"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with(
+ column_name="col1", suffix="bar"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbar"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with(
+ column_name="col1", suffix="bar"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with(
+ column_name="col1", suffix="bar"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with(
+ column_name="col1", suffix="bar"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with(
+ column_name="col1", suffix="bar"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py
new file mode 100644
index 0000000..7ccd6eb
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py
@@ -0,0 +1,163 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringLengthBetween", (
+ f"Expected 'ExpectationStringLengthBetween' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bazz", "hello", "foobar"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthBetween")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ data_frame = pd.DataFrame({"col1": ["fo", "bazz", "hellothere", "foobar"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["fo", "hellothere"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not between 3 and 6.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",), ("foobar",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthBetween")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ data_frame = spark.createDataFrame([("fo",), ("bazz",), ("hellothere",), ("foobar",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("fo",), ("hellothere",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not between 3 and 6.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthBetween",
+ column_name="col1",
+ min_length=3,
+ max_length=6,
+ )
+ data_frame = pd.DataFrame({"col2": ["foo", "bazz", "hello"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_between(
+ column_name="col1", min_length=3, max_length=6
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bazz", "hello", "foobar"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_between(
+ column_name="col1", min_length=3, max_length=6
+ )
+ data_frame = pd.DataFrame({"col1": ["fo", "bazz", "hellothere", "foobar"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_between(
+ column_name="col1", min_length=3, max_length=6
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",), ("foobar",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_between(
+ column_name="col1", min_length=3, max_length=6
+ )
+ data_frame = spark.createDataFrame([("fo",), ("bazz",), ("hellothere",), ("foobar",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_between(
+ column_name="col1", min_length=3, max_length=6
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py
new file mode 100644
index 0000000..ff9c3e7
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringLengthEquals", (
+ f"Expected 'ExpectationStringLengthEquals' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazz", "foobar"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["bazz", "foobar"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not equal to 3.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthEquals")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazz",), ("foobar",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("bazz",), ("foobar",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not equal to 3.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthEquals",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col2": ["foo", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals(
+ column_name="col1", length=3
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals(
+ column_name="col1", length=3
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazz", "foobar"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazz",), ("foobar",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py
new file mode 100644
index 0000000..41cc74a
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringLengthGreaterThan", (
+ f"Expected 'ExpectationStringLengthGreaterThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bazz", "hello"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthGreaterThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazzz"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["foo", "bar"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not greater than 3.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthGreaterThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazzz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("foo",), ("bar",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not greater than 3.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthGreaterThan",
+ column_name="col1",
+ length=3,
+ )
+ data_frame = pd.DataFrame({"col2": ["foobar", "bazz", "hello"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than(
+ column_name="col1", length=3
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bazz", "hello"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than(
+ column_name="col1", length=3
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazzz"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazzz",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than(
+ column_name="col1", length=3
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py
new file mode 100644
index 0000000..5fe0712
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringLengthLessThan", (
+ f"Expected 'ExpectationStringLengthLessThan' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthLessThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbaz"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["foobar", "bazbaz"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not less than 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthLessThan")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbaz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("foobar",), ("bazbaz",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' length is not less than 5.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringLengthLessThan",
+ column_name="col1",
+ length=5,
+ )
+ data_frame = pd.DataFrame({"col2": ["foo", "bar", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than(
+ column_name="col1", length=5
+ )
+ data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than(
+ column_name="col1", length=5
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbaz"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than(
+ column_name="col1", length=5
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than(
+ column_name="col1", length=5
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbaz",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than(
+ column_name="col1", length=5
+ )
+ data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py
new file mode 100644
index 0000000..0402f04
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringNotContains", (
+ f"Expected 'ExpectationStringNotContains' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["bar", "baz", "qux"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringNotContains")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "foo"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["foobar", "foo"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' contains 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringNotContains")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("foo",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("foobar",), ("foo",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' contains 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringNotContains",
+ column_name="col1",
+ substring="foo",
+ )
+ data_frame = pd.DataFrame({"col2": ["bar", "baz", "qux"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["bar", "baz", "qux"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "bar", "foo"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("bar",), ("foo",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains(
+ column_name="col1", substring="foo"
+ )
+ data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py
new file mode 100644
index 0000000..766506f
--- /dev/null
+++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py
@@ -0,0 +1,157 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationSuccessMessage,
+)
+
+
+def test_expectation_name():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ assert expectation.get_expectation_name() == "ExpectationStringStartsWith", (
+ f"Expected 'ExpectationStringStartsWith' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "foobaz"]})
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringStartsWith")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pandas_violations():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "barfoo", "baz"]})
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = pd.DataFrame({"col1": ["barfoo", "baz"]})
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' does not start with 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_expectation_pyspark_success(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+ assert str(result) == str(
+ DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringStartsWith")
+ ), f"Expected success message but got: {result}"
+
+
+def test_expectation_pyspark_violations(spark):
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("barfoo",), ("baz",)], ["col1"])
+ result = expectation.validate(data_frame=data_frame)
+
+ expected_violations = spark.createDataFrame([("barfoo",), ("baz",)], ["col1"])
+ assert str(result) == str(
+ DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PYSPARK,
+ violations_data_frame=expected_violations,
+ message="Found 2 row(s) where 'col1' does not start with 'foo'.",
+ limit_violations=5,
+ )
+ ), f"Expected failure message but got: {result}"
+
+
+def test_column_missing_error():
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationStringStartsWith",
+ column_name="col1",
+ prefix="foo",
+ )
+ data_frame = pd.DataFrame({"col2": ["foobar", "foo123", "foobaz"]})
+ result = expectation.validate(data_frame=data_frame)
+ expected_failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=str(expectation),
+ data_frame_type=DataFrameType.PANDAS,
+ message="Column 'col1' does not exist in the DataFrame.",
+ )
+ assert str(result) == str(expected_failure_message), (
+ f"Expected failure message but got: {result}"
+ )
+
+
+def test_suite_pandas_success():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with(
+ column_name="col1", prefix="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "foobaz"]})
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pandas_violations():
+ expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with(
+ column_name="col1", prefix="foo"
+ )
+ data_frame = pd.DataFrame({"col1": ["foobar", "barfoo", "baz"]})
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_success(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with(
+ column_name="col1", prefix="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col1"])
+ result = expectations_suite.run(data_frame=data_frame)
+ assert result is None, "Expected no exceptions to be raised"
+
+
+def test_suite_pyspark_violations(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with(
+ column_name="col1", prefix="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("barfoo",), ("baz",)], ["col1"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
+
+
+def test_suite_pyspark_column_missing_error(spark):
+ expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with(
+ column_name="col1", prefix="foo"
+ )
+ data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col2"])
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ expectations_suite.run(data_frame=data_frame)
diff --git a/tests/expectations_implemented/template_test_expectation.py b/tests/expectations_implemented/template_test_expectation.py
new file mode 100644
index 0000000..0e0da24
--- /dev/null
+++ b/tests/expectations_implemented/template_test_expectation.py
@@ -0,0 +1,82 @@
+from dataframe_expectations.expectations.expectation_registry import (
+ DataFrameExpectationRegistry,
+)
+
+
+def test_expectation_name():
+ """
+ Test that the expectation name is correctly returned.
+ This method should be implemented in the subclass.
+ """
+ expectation = DataFrameExpectationRegistry.get_expectation(
+ expectation_name="ExpectationDoesSomeCheck",
+ column_name="col1",
+ value=5,
+ )
+ assert expectation.get_expectation_name() == "ExpectationDoesSomeCheck", (
+ f"Expected 'ExpectationDoesSomeCheck' but got: {expectation.get_expectation_name()}"
+ )
+
+
+def test_expectation_pandas_success():
+ """
+ Test the expectation for pandas DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_expectation_pandas_violations():
+ """
+ Test the expectation for pandas DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_expectation_pyspark_success(spark):
+ """
+ Test the expectation for PySpark DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_expectation_pyspark_violations(spark):
+ """
+ Test the expectation for PySpark DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_suite_pandas_success():
+ """
+ Test the expectation suite for pandas DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_suite_pandas_violations():
+ """
+ Test the expectation suite for pandas DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_suite_pyspark_success(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with no violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
+
+
+def test_suite_pyspark_violations(spark):
+ """
+ Test the expectation suite for PySpark DataFrame with violations.
+ This method should be implemented in the subclass.
+ """
+ raise NotImplementedError("Subclasses should implement this method.")
diff --git a/tests/test_expectations_suite.py b/tests/test_expectations_suite.py
new file mode 100644
index 0000000..c3f571f
--- /dev/null
+++ b/tests/test_expectations_suite.py
@@ -0,0 +1,184 @@
+import pytest
+import pandas as pd
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.expectations_suite import (
+ DataFrameExpectationsSuite,
+ DataFrameExpectationsSuiteFailure,
+)
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+)
+
+
+def test_suite_success():
+ """
+ Test the ExpectationsSuite with a successful expectation.
+ """
+
+ # No expectations
+ suite = DataFrameExpectationsSuite()
+ result = suite.run(data_frame=pd.DataFrame())
+ assert result is None, "Expected no result for empty suite"
+
+ # All succeeding expectations
+ suite = (
+ DataFrameExpectationsSuite()
+ .expect_value_greater_than(column_name="col1", value=2)
+ .expect_value_less_than(column_name="col1", value=10)
+ )
+ data_Frame = pd.DataFrame({"col1": [3, 4, 5]})
+ result = suite.run(data_frame=data_Frame)
+ assert result is None, "Expected no result for successful suite"
+
+
+def test_suite_failure():
+ """
+ Test the ExpectationsSuite with a failing expectation.
+ """
+
+ # Any 1 violation causes the suite to fail
+ suite = (
+ DataFrameExpectationsSuite()
+ .expect_value_greater_than(column_name="col1", value=2)
+ .expect_value_less_than(column_name="col1", value=3)
+ )
+ data_Frame = pd.DataFrame({"col1": [3, 4, 5]})
+
+ with pytest.raises(DataFrameExpectationsSuiteFailure):
+ suite.run(data_frame=data_Frame)
+
+
+def test_invalid_data_frame_type():
+ """
+ Test that an invalid DataFrame type raises a ValueError.
+ """
+
+ suite = (
+ DataFrameExpectationsSuite()
+ .expect_value_greater_than(column_name="col1", value=2)
+ .expect_value_less_than(column_name="col1", value=10)
+ )
+ data_Frame = None
+
+ with pytest.raises(ValueError):
+ suite.run(data_frame=data_Frame)
+
+
+def test_suite_with_supported_dataframe_types(spark):
+ """
+ Test the ExpectationsSuite with all supported DataFrame types.
+ """
+
+ suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=1)
+
+ # Test with pandas DataFrame
+ pandas_df = pd.DataFrame({"col1": [1, 2, 3]})
+ result = suite.run(data_frame=pandas_df)
+ assert result is None, "Expected success for pandas DataFrame"
+
+ # Test with PySpark DataFrame
+ spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"])
+ result = suite.run(data_frame=spark_df)
+ assert result is None, "Expected success for PySpark DataFrame"
+
+
+def test_suite_with_unsupported_dataframe_types():
+ """
+ Test the ExpectationsSuite with unsupported DataFrame types.
+ """
+ suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=1)
+
+ # Test various unsupported types
+ unsupported_types = [
+ None,
+ "not_a_dataframe",
+ [1, 2, 3],
+ {"col1": [1, 2, 3]},
+ 42,
+ True,
+ ]
+
+ for unsupported_data in unsupported_types:
+ with pytest.raises(ValueError) as context:
+ suite.run(data_frame=unsupported_data)
+ assert "Unsupported DataFrame type" in str(context.value), (
+ f"Expected unsupported type error for {type(unsupported_data)}"
+ )
+
+
+def test_suite_with_pyspark_connect_dataframe():
+ """
+ Test the ExpectationsSuite with PySpark Connect DataFrame (if available).
+ """
+ from unittest.mock import patch
+
+ # Mock a Connect DataFrame
+ class MockConnectDataFrame:
+ def __init__(self):
+ self.is_cached = False
+
+ def cache(self):
+ self.is_cached = True
+ return self
+
+ def unpersist(self):
+ self.is_cached = False
+ return self
+
+ suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=0)
+
+ with patch(
+ "dataframe_expectations.expectations.PySparkConnectDataFrame",
+ MockConnectDataFrame,
+ ):
+ # Create mock expectation that can handle Connect DataFrame
+ with patch.object(
+ suite._DataFrameExpectationsSuite__expectations[0], "validate"
+ ) as mock_validate:
+ from dataframe_expectations.result_message import (
+ DataFrameExpectationSuccessMessage,
+ )
+
+ mock_validate.return_value = DataFrameExpectationSuccessMessage(
+ expectation_name="MockExpectation"
+ )
+
+ mock_connect_df = MockConnectDataFrame()
+ result = suite.run(data_frame=mock_connect_df)
+ assert result is None, "Expected success for mock Connect DataFrame"
+
+
+def test_expectation_suite_failure_message():
+ failed_expectation_messages = [
+ DataFrameExpectationFailureMessage(
+ expectation_str="ExpectationValueGreaterThan",
+ data_frame_type=DataFrameType.PANDAS,
+ message="Failed expectation 1",
+ ),
+ DataFrameExpectationFailureMessage(
+ expectation_str="ExpectationValueGreaterThan",
+ data_frame_type=DataFrameType.PANDAS,
+ message="Failed expectation 2",
+ ),
+ ]
+
+ suite_failure = DataFrameExpectationsSuiteFailure(
+ total_expectations=4,
+ failures=failed_expectation_messages,
+ )
+
+ expected_str = (
+ "(2/4) expectations failed.\n\n"
+ f"{'=' * 80}\n"
+ "List of violations:\n"
+ f"{'-' * 80}"
+ "\n[Failed 1/2] ExpectationValueGreaterThan: Failed expectation 1\n"
+ f"{'-' * 80}\n"
+ "[Failed 2/2] ExpectationValueGreaterThan: Failed expectation 2\n"
+ f"{'=' * 80}"
+ )
+
+ assert str(suite_failure) == expected_str, (
+ f"Expected suite failure message but got: {str(suite_failure)}"
+ )
diff --git a/tests/test_result_message.py b/tests/test_result_message.py
new file mode 100644
index 0000000..def309f
--- /dev/null
+++ b/tests/test_result_message.py
@@ -0,0 +1,194 @@
+import pytest
+import pandas as pd
+from tabulate import tabulate # type: ignore
+
+from dataframe_expectations import DataFrameType
+from dataframe_expectations.result_message import (
+ DataFrameExpectationFailureMessage,
+ DataFrameExpectationResultMessage,
+ DataFrameExpectationSuccessMessage,
+)
+from tests.conftest import assert_pandas_df_equal
+
+
+def test_result_message_empty():
+ """
+ By default the result message should be empty.
+ """
+ result_message = DataFrameExpectationResultMessage()
+
+ assert str(result_message) == "", (
+ f"Expected empty result message but got: {str(result_message)}"
+ )
+
+
+def test_data_frame_to_str_pandas():
+ """
+ Test the dataframe_to_str method with a mock DataFrame.
+ """
+ pandas_dataframe = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ result_message = DataFrameExpectationResultMessage()
+
+ expected_dataframe_str = tabulate(
+ pandas_dataframe.head(2), headers="keys", tablefmt="pretty", showindex=False
+ )
+
+ actual_str = result_message.dataframe_to_str(
+ data_frame_type=DataFrameType.PANDAS,
+ data_frame=pandas_dataframe,
+ rows=2,
+ )
+ assert actual_str == expected_dataframe_str, (
+ f"Expected pandas dataframe string but got: {actual_str}"
+ )
+
+
+def test_dataframe_to_str_pyspark(spark):
+ """
+ Test the dataframe_to_str method with a mock PySpark DataFrame.
+ """
+ pyspark_dataframe = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"])
+
+ result_message = DataFrameExpectationResultMessage()
+
+ expected_dataframe_str = tabulate(
+ pyspark_dataframe.limit(2).toPandas(),
+ headers="keys",
+ tablefmt="pretty",
+ showindex=False,
+ )
+
+ actual_str = result_message.dataframe_to_str(
+ data_frame_type=DataFrameType.PYSPARK,
+ data_frame=pyspark_dataframe,
+ rows=2,
+ )
+ assert actual_str == expected_dataframe_str, (
+ f"Expected pyspark dataframe string but got: {actual_str}"
+ )
+
+
+def test_dataframe_to_str_invalid_type():
+ """
+ Test the dataframe_to_str method with an invalid DataFrame type.
+ """
+ result_message = DataFrameExpectationResultMessage()
+
+ with pytest.raises(ValueError) as context:
+ result_message.dataframe_to_str(
+ data_frame_type="invalid_type", data_frame=pd.DataFrame(), rows=2
+ )
+
+ assert str(context.value) == "Unsupported DataFrame type: invalid_type", (
+ f"Expected ValueError message but got: {str(context.value)}"
+ )
+
+
+def test_success_message_no_additional_message():
+ """
+ Test the success message initialization and string representation. Test with no additional message
+ """
+ expectation_name = "TestExpectation"
+ success_message = DataFrameExpectationSuccessMessage(expectation_name)
+ message_str = str(success_message)
+ assert expectation_name in message_str, (
+ f"Expectation name should be in the message: {message_str}"
+ )
+
+
+def test_success_message_with_additional_message():
+ """
+ Test the success message initialization and string representation. Test with an additional message
+ """
+ expectation_name = "TestExpectation"
+ additional_message = "This is a success message."
+ success_message_with_additional = DataFrameExpectationSuccessMessage(
+ expectation_name, additional_message
+ )
+ message_str = str(success_message_with_additional)
+ assert expectation_name in message_str, (
+ f"Expectation name should be in the message: {message_str}"
+ )
+ assert additional_message in message_str, (
+ f"Additional message should be in the success message: {message_str}"
+ )
+
+
+def test_failure_message_default_params():
+ """
+ Test the failure message initialization and string representation with default parameters.
+ """
+ expectation_name = "TestExpectation"
+ data_frame_type = None
+ failure_message = DataFrameExpectationFailureMessage(expectation_name, data_frame_type)
+
+ message_str = str(failure_message)
+ assert expectation_name in message_str, (
+ f"Expectation name should be in the message: {message_str}"
+ )
+
+ violations_df = failure_message.get_violations_data_frame()
+ assert violations_df is None, (
+ f"Violations DataFrame should be None when not provided but got: {violations_df}"
+ )
+
+
+def test_failure_message_custom_message():
+ """
+ Test the failure message initialization and string representation with a custom message.
+ """
+ expectation_name = "TestExpectation"
+ data_frame_type = None
+ custom_message = "This is a custom failure message."
+ failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=expectation_name,
+ data_frame_type=data_frame_type,
+ message=custom_message,
+ )
+
+ message_str = str(failure_message)
+ assert expectation_name in message_str, (
+ f"Expectation name should be in the message: {message_str}"
+ )
+ assert custom_message in message_str, (
+ f"Custom message should be in the failure message: {message_str}"
+ )
+
+ violations_df = failure_message.get_violations_data_frame()
+ assert violations_df is None, (
+ f"Violations DataFrame should be None when not provided but got: {violations_df}"
+ )
+
+
+def test_failure_message_with_violations_dataframe():
+ """
+ Test the failure message initialization and string representation with a violations DataFrame.
+ """
+ expectation_name = "TestExpectation"
+ data_frame_type = DataFrameType.PANDAS
+ violations_dataframe = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
+ failure_message = DataFrameExpectationFailureMessage(
+ expectation_str=expectation_name,
+ data_frame_type=data_frame_type,
+ violations_data_frame=violations_dataframe,
+ limit_violations=2,
+ )
+
+ expected_dataframe = violations_dataframe
+ expected_dataframe_str = tabulate(
+ expected_dataframe.head(2),
+ headers="keys",
+ tablefmt="pretty",
+ showindex=False,
+ )
+
+ message_str = str(failure_message)
+ assert expectation_name in message_str, (
+ f"Expectation name should be in the message: {message_str}"
+ )
+ assert expected_dataframe_str in message_str, (
+ f"Violations DataFrame should be included in the message: {message_str}"
+ )
+
+ actual_violations_df = failure_message.get_violations_data_frame()
+ assert_pandas_df_equal(actual_violations_df, expected_dataframe)
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..2a6924d
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,1331 @@
+version = 1
+requires-python = ">=3.10"
+resolution-markers = [
+ "python_full_version < '3.11'",
+ "python_full_version == '3.11.*'",
+ "python_full_version >= '3.12'",
+]
+
+[[package]]
+name = "accessible-pygments"
+version = "0.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bc/c1/bbac6a50d02774f91572938964c582fff4270eee73ab822a4aeea4d8b11b/accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872", size = 1377899 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903 },
+]
+
+[[package]]
+name = "alabaster"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 },
+]
+
+[[package]]
+name = "anyio"
+version = "4.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+ { name = "idna" },
+ { name = "sniffio" },
+ { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097 },
+]
+
+[[package]]
+name = "babel"
+version = "2.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 },
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "soupsieve" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.10.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286 },
+]
+
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709 },
+ { url = "https://files.pythonhosted.org/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814 },
+ { url = "https://files.pythonhosted.org/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467 },
+ { url = "https://files.pythonhosted.org/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280 },
+ { url = "https://files.pythonhosted.org/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454 },
+ { url = "https://files.pythonhosted.org/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609 },
+ { url = "https://files.pythonhosted.org/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849 },
+ { url = "https://files.pythonhosted.org/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586 },
+ { url = "https://files.pythonhosted.org/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290 },
+ { url = "https://files.pythonhosted.org/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663 },
+ { url = "https://files.pythonhosted.org/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964 },
+ { url = "https://files.pythonhosted.org/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064 },
+ { url = "https://files.pythonhosted.org/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015 },
+ { url = "https://files.pythonhosted.org/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792 },
+ { url = "https://files.pythonhosted.org/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198 },
+ { url = "https://files.pythonhosted.org/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262 },
+ { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988 },
+ { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324 },
+ { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742 },
+ { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863 },
+ { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837 },
+ { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550 },
+ { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162 },
+ { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019 },
+ { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310 },
+ { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022 },
+ { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383 },
+ { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098 },
+ { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991 },
+ { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456 },
+ { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978 },
+ { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969 },
+ { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 },
+ { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 },
+ { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 },
+ { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 },
+ { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 },
+ { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 },
+ { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 },
+ { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 },
+ { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 },
+ { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 },
+ { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 },
+ { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 },
+ { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 },
+ { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 },
+ { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 },
+ { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 },
+ { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 },
+ { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 },
+ { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 },
+ { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 },
+ { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 },
+ { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 },
+ { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 },
+ { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 },
+ { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 },
+ { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 },
+ { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 },
+ { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 },
+ { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 },
+ { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 },
+ { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 },
+ { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 },
+ { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 },
+ { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 },
+ { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 },
+ { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 },
+ { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 },
+ { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 },
+ { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 },
+ { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 },
+ { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 },
+ { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 },
+ { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 },
+ { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 },
+ { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 },
+ { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 },
+ { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 },
+ { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 },
+ { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 },
+]
+
+[[package]]
+name = "click"
+version = "8.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "platform_system == 'Windows'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295 },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
+]
+
+[[package]]
+name = "coverage"
+version = "7.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800 },
+ { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198 },
+ { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953 },
+ { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766 },
+ { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625 },
+ { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568 },
+ { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665 },
+ { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681 },
+ { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912 },
+ { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559 },
+ { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266 },
+ { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169 },
+ { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912 },
+ { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310 },
+ { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706 },
+ { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634 },
+ { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741 },
+ { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837 },
+ { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429 },
+ { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490 },
+ { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208 },
+ { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126 },
+ { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314 },
+ { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203 },
+ { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879 },
+ { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098 },
+ { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331 },
+ { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825 },
+ { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573 },
+ { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706 },
+ { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221 },
+ { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624 },
+ { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744 },
+ { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325 },
+ { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180 },
+ { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479 },
+ { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290 },
+ { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924 },
+ { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129 },
+ { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380 },
+ { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375 },
+ { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978 },
+ { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253 },
+ { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591 },
+ { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411 },
+ { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303 },
+ { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157 },
+ { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921 },
+ { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526 },
+ { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317 },
+ { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948 },
+ { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837 },
+ { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061 },
+ { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398 },
+ { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574 },
+ { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797 },
+ { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361 },
+ { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349 },
+ { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114 },
+ { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723 },
+ { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238 },
+ { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180 },
+ { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241 },
+ { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510 },
+ { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110 },
+ { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395 },
+ { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433 },
+ { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970 },
+ { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324 },
+ { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445 },
+ { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324 },
+ { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261 },
+ { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092 },
+ { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755 },
+ { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793 },
+ { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587 },
+ { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168 },
+ { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850 },
+ { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071 },
+ { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570 },
+ { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738 },
+ { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994 },
+ { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282 },
+ { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430 },
+ { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190 },
+ { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658 },
+ { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342 },
+ { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568 },
+ { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687 },
+ { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711 },
+ { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761 },
+]
+
+[package.optional-dependencies]
+toml = [
+ { name = "tomli", marker = "python_full_version <= '3.11'" },
+]
+
+[[package]]
+name = "dataframe-expectations"
+version = "0.1.0"
+source = { virtual = "." }
+dependencies = [
+ { name = "pandas" },
+ { name = "pyspark" },
+ { name = "tabulate" },
+]
+
+[package.dev-dependencies]
+dev = [
+ { name = "numpy" },
+ { name = "pre-commit" },
+ { name = "pytest" },
+ { name = "pytest-cov" },
+ { name = "ruff" },
+]
+docs = [
+ { name = "pandas" },
+ { name = "pydata-sphinx-theme" },
+ { name = "pyspark" },
+ { name = "sphinx" },
+ { name = "sphinx-autobuild" },
+ { name = "tabulate" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "pandas", specifier = ">=1.5.0" },
+ { name = "pyspark", specifier = ">=3.3.0" },
+ { name = "tabulate", specifier = ">=0.8.9" },
+]
+
+[package.metadata.requires-dev]
+dev = [
+ { name = "numpy", specifier = ">=1.21.0" },
+ { name = "pre-commit", specifier = ">=2.20.0" },
+ { name = "pytest", specifier = ">=7.0.0" },
+ { name = "pytest-cov", specifier = ">=4.0.0" },
+ { name = "ruff", specifier = ">=0.1.0" },
+]
+docs = [
+ { name = "pandas", specifier = ">=1.5.0" },
+ { name = "pydata-sphinx-theme", specifier = ">=0.13.0" },
+ { name = "pyspark", specifier = ">=3.3.0" },
+ { name = "sphinx", specifier = ">=4.0.0" },
+ { name = "sphinx-autobuild", specifier = ">=2021.3.14" },
+ { name = "tabulate", specifier = ">=0.8.9" },
+]
+
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047 },
+]
+
+[[package]]
+name = "docutils"
+version = "0.21.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 },
+]
+
+[[package]]
+name = "filelock"
+version = "3.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054 },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
+]
+
+[[package]]
+name = "identify"
+version = "2.6.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183 },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 },
+]
+
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769 },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e8/4b/3541d44f3937ba468b75da9eebcae497dcf67adb65caa16760b0a6807ebb/markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559", size = 11631 },
+ { url = "https://files.pythonhosted.org/packages/98/1b/fbd8eed11021cabd9226c37342fa6ca4e8a98d8188a8d9b66740494960e4/markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419", size = 12057 },
+ { url = "https://files.pythonhosted.org/packages/40/01/e560d658dc0bb8ab762670ece35281dec7b6c1b33f5fbc09ebb57a185519/markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695", size = 22050 },
+ { url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591", size = 20681 },
+ { url = "https://files.pythonhosted.org/packages/c9/2a/b5c12c809f1c3045c4d580b035a743d12fcde53cf685dbc44660826308da/markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c", size = 20705 },
+ { url = "https://files.pythonhosted.org/packages/cf/e3/9427a68c82728d0a88c50f890d0fc072a1484de2f3ac1ad0bfc1a7214fd5/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f", size = 21524 },
+ { url = "https://files.pythonhosted.org/packages/bc/36/23578f29e9e582a4d0278e009b38081dbe363c5e7165113fad546918a232/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6", size = 20282 },
+ { url = "https://files.pythonhosted.org/packages/56/21/dca11354e756ebd03e036bd8ad58d6d7168c80ce1fe5e75218e4945cbab7/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1", size = 20745 },
+ { url = "https://files.pythonhosted.org/packages/87/99/faba9369a7ad6e4d10b6a5fbf71fa2a188fe4a593b15f0963b73859a1bbd/markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa", size = 14571 },
+ { url = "https://files.pythonhosted.org/packages/d6/25/55dc3ab959917602c96985cb1253efaa4ff42f71194bddeb61eb7278b8be/markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8", size = 15056 },
+ { url = "https://files.pythonhosted.org/packages/d0/9e/0a02226640c255d1da0b8d12e24ac2aa6734da68bff14c05dd53b94a0fc3/markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1", size = 13932 },
+ { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631 },
+ { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058 },
+ { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287 },
+ { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940 },
+ { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887 },
+ { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692 },
+ { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471 },
+ { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923 },
+ { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572 },
+ { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077 },
+ { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876 },
+ { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 },
+ { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 },
+ { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 },
+ { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 },
+ { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 },
+ { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 },
+ { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 },
+ { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 },
+ { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 },
+ { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 },
+ { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 },
+ { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 },
+ { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 },
+ { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 },
+ { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 },
+ { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 },
+ { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 },
+ { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 },
+ { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 },
+ { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 },
+ { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 },
+ { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 },
+ { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 },
+ { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 },
+ { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 },
+ { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 },
+ { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 },
+ { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 },
+ { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 },
+ { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 },
+ { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 },
+ { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 },
+ { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 },
+ { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 },
+ { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 },
+ { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 },
+ { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 },
+ { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 },
+ { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 },
+ { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 },
+ { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 },
+ { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 },
+ { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 },
+ { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 },
+ { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 },
+ { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 },
+ { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 },
+ { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 },
+ { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 },
+ { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 },
+ { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 },
+ { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 },
+ { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 },
+ { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 },
+ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 },
+]
+
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 },
+]
+
+[[package]]
+name = "numpy"
+version = "2.2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245 },
+ { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048 },
+ { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542 },
+ { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301 },
+ { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320 },
+ { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050 },
+ { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034 },
+ { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185 },
+ { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149 },
+ { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620 },
+ { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963 },
+ { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743 },
+ { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616 },
+ { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579 },
+ { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005 },
+ { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570 },
+ { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548 },
+ { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521 },
+ { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866 },
+ { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455 },
+ { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348 },
+ { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362 },
+ { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103 },
+ { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382 },
+ { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462 },
+ { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618 },
+ { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511 },
+ { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783 },
+ { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506 },
+ { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190 },
+ { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828 },
+ { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006 },
+ { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765 },
+ { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736 },
+ { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719 },
+ { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072 },
+ { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213 },
+ { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632 },
+ { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532 },
+ { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885 },
+ { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467 },
+ { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144 },
+ { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217 },
+ { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014 },
+ { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935 },
+ { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122 },
+ { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143 },
+ { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260 },
+ { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225 },
+ { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374 },
+ { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391 },
+ { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754 },
+ { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476 },
+ { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666 },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 },
+]
+
+[[package]]
+name = "pandas"
+version = "2.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "numpy" },
+ { name = "python-dateutil" },
+ { name = "pytz" },
+ { name = "tzdata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763 },
+ { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217 },
+ { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791 },
+ { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373 },
+ { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444 },
+ { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459 },
+ { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086 },
+ { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790 },
+ { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831 },
+ { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267 },
+ { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281 },
+ { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453 },
+ { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361 },
+ { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702 },
+ { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 },
+ { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 },
+ { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 },
+ { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 },
+ { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 },
+ { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 },
+ { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 },
+ { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 },
+ { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 },
+ { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 },
+ { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 },
+ { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 },
+ { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 },
+ { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 },
+ { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 },
+ { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 },
+ { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 },
+ { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 },
+ { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 },
+ { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 },
+ { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 },
+ { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 },
+ { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 },
+ { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 },
+ { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 },
+ { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 },
+ { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 },
+ { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 },
+ { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 },
+ { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 },
+ { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 },
+ { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 },
+ { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651 },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
+]
+
+[[package]]
+name = "pre-commit"
+version = "4.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cfgv" },
+ { name = "identify" },
+ { name = "nodeenv" },
+ { name = "pyyaml" },
+ { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965 },
+]
+
+[[package]]
+name = "py4j"
+version = "0.10.9.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/31/0b210511177070c8d5d3059556194352e5753602fa64b85b7ab81ec1a009/py4j-0.10.9.9.tar.gz", hash = "sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879", size = 761089 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/bd/db/ea0203e495be491c85af87b66e37acfd3bf756fd985f87e46fc5e3bf022c/py4j-0.10.9.9-py2.py3-none-any.whl", hash = "sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533", size = 203008 },
+]
+
+[[package]]
+name = "pydata-sphinx-theme"
+version = "0.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "accessible-pygments" },
+ { name = "babel" },
+ { name = "beautifulsoup4" },
+ { name = "docutils" },
+ { name = "pygments" },
+ { name = "sphinx" },
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/00/20/bb50f9de3a6de69e6abd6b087b52fa2418a0418b19597601605f855ad044/pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7", size = 2412693 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264 },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 },
+]
+
+[[package]]
+name = "pyspark"
+version = "4.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "py4j" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ae/40/1414582f16c1d7b051c668c2e19c62d21a18bd181d944cb24f5ddbb2423f/pyspark-4.0.1.tar.gz", hash = "sha256:9d1f22d994f60369228397e3479003ffe2dd736ba79165003246ff7bd48e2c73", size = 434204896 }
+
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+ { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+ { name = "iniconfig" },
+ { name = "packaging" },
+ { name = "pluggy" },
+ { name = "pygments" },
+ { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750 },
+]
+
+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "coverage", extra = ["toml"] },
+ { name = "pluggy" },
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424 },
+]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
+]
+
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227 },
+ { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019 },
+ { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646 },
+ { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793 },
+ { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293 },
+ { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872 },
+ { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828 },
+ { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415 },
+ { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561 },
+ { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826 },
+ { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577 },
+ { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556 },
+ { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114 },
+ { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638 },
+ { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463 },
+ { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986 },
+ { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543 },
+ { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763 },
+ { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 },
+ { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 },
+ { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 },
+ { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 },
+ { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 },
+ { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 },
+ { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 },
+ { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 },
+ { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 },
+ { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 },
+ { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 },
+ { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 },
+ { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 },
+ { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 },
+ { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 },
+ { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 },
+ { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 },
+ { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 },
+ { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 },
+ { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 },
+ { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 },
+ { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 },
+ { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 },
+ { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 },
+ { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 },
+ { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 },
+ { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 },
+ { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 },
+ { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 },
+ { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 },
+ { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 },
+ { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 },
+ { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 },
+ { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 },
+ { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 },
+ { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 },
+ { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 },
+ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "certifi" },
+ { name = "charset-normalizer" },
+ { name = "idna" },
+ { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 },
+]
+
+[[package]]
+name = "ruff"
+version = "0.14.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415 },
+ { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267 },
+ { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872 },
+ { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558 },
+ { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898 },
+ { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168 },
+ { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942 },
+ { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622 },
+ { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143 },
+ { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844 },
+ { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241 },
+ { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476 },
+ { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749 },
+ { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758 },
+ { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811 },
+ { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467 },
+ { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123 },
+ { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636 },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
+]
+
+[[package]]
+name = "snowballstemmer"
+version = "3.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274 },
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 },
+]
+
+[[package]]
+name = "sphinx"
+version = "8.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "alabaster" },
+ { name = "babel" },
+ { name = "colorama", marker = "sys_platform == 'win32'" },
+ { name = "docutils" },
+ { name = "imagesize" },
+ { name = "jinja2" },
+ { name = "packaging" },
+ { name = "pygments" },
+ { name = "requests" },
+ { name = "snowballstemmer" },
+ { name = "sphinxcontrib-applehelp" },
+ { name = "sphinxcontrib-devhelp" },
+ { name = "sphinxcontrib-htmlhelp" },
+ { name = "sphinxcontrib-jsmath" },
+ { name = "sphinxcontrib-qthelp" },
+ { name = "sphinxcontrib-serializinghtml" },
+ { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125 },
+]
+
+[[package]]
+name = "sphinx-autobuild"
+version = "2024.10.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "colorama" },
+ { name = "sphinx" },
+ { name = "starlette" },
+ { name = "uvicorn" },
+ { name = "watchfiles" },
+ { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a5/2c/155e1de2c1ba96a72e5dba152c509a8b41e047ee5c2def9e9f0d812f8be7/sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1", size = 14023 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa", size = 11908 },
+]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 },
+]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 },
+]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 },
+]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 },
+]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 },
+]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 },
+]
+
+[[package]]
+name = "starlette"
+version = "0.48.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+ { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736 },
+]
+
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
+]
+
+[[package]]
+name = "tomli"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236 },
+ { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084 },
+ { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832 },
+ { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052 },
+ { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555 },
+ { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128 },
+ { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445 },
+ { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165 },
+ { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891 },
+ { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796 },
+ { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121 },
+ { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070 },
+ { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859 },
+ { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296 },
+ { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124 },
+ { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698 },
+ { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819 },
+ { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766 },
+ { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771 },
+ { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586 },
+ { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792 },
+ { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909 },
+ { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946 },
+ { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705 },
+ { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244 },
+ { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637 },
+ { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925 },
+ { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045 },
+ { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835 },
+ { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109 },
+ { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930 },
+ { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964 },
+ { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065 },
+ { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088 },
+ { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193 },
+ { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488 },
+ { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669 },
+ { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709 },
+ { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563 },
+ { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756 },
+ { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408 },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 },
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "click" },
+ { name = "h11" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109 },
+]
+
+[[package]]
+name = "virtualenv"
+version = "20.35.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "distlib" },
+ { name = "filelock" },
+ { name = "platformdirs" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a4/d5/b0ccd381d55c8f45d46f77df6ae59fbc23d19e901e2d523395598e5f4c93/virtualenv-20.35.3.tar.gz", hash = "sha256:4f1a845d131133bdff10590489610c98c168ff99dc75d6c96853801f7f67af44", size = 6002907 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/27/73/d9a94da0e9d470a543c1b9d3ccbceb0f59455983088e727b8a1824ed90fb/virtualenv-20.35.3-py3-none-any.whl", hash = "sha256:63d106565078d8c8d0b206d48080f938a8b25361e19432d2c9db40d2899c810a", size = 5981061 },
+]
+
+[[package]]
+name = "watchfiles"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a7/1a/206e8cf2dd86fddf939165a57b4df61607a1e0add2785f170a3f616b7d9f/watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c", size = 407318 },
+ { url = "https://files.pythonhosted.org/packages/b3/0f/abaf5262b9c496b5dad4ed3c0e799cbecb1f8ea512ecb6ddd46646a9fca3/watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43", size = 394478 },
+ { url = "https://files.pythonhosted.org/packages/b1/04/9cc0ba88697b34b755371f5ace8d3a4d9a15719c07bdc7bd13d7d8c6a341/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31", size = 449894 },
+ { url = "https://files.pythonhosted.org/packages/d2/9c/eda4615863cd8621e89aed4df680d8c3ec3da6a4cf1da113c17decd87c7f/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac", size = 459065 },
+ { url = "https://files.pythonhosted.org/packages/84/13/f28b3f340157d03cbc8197629bc109d1098764abe1e60874622a0be5c112/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d", size = 488377 },
+ { url = "https://files.pythonhosted.org/packages/86/93/cfa597fa9389e122488f7ffdbd6db505b3b915ca7435ecd7542e855898c2/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d", size = 595837 },
+ { url = "https://files.pythonhosted.org/packages/57/1e/68c1ed5652b48d89fc24d6af905d88ee4f82fa8bc491e2666004e307ded1/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863", size = 473456 },
+ { url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab", size = 455614 },
+ { url = "https://files.pythonhosted.org/packages/61/a5/3d782a666512e01eaa6541a72ebac1d3aae191ff4a31274a66b8dd85760c/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82", size = 630690 },
+ { url = "https://files.pythonhosted.org/packages/9b/73/bb5f38590e34687b2a9c47a244aa4dd50c56a825969c92c9c5fc7387cea1/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4", size = 622459 },
+ { url = "https://files.pythonhosted.org/packages/f1/ac/c9bb0ec696e07a20bd58af5399aeadaef195fb2c73d26baf55180fe4a942/watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844", size = 272663 },
+ { url = "https://files.pythonhosted.org/packages/11/a0/a60c5a7c2ec59fa062d9a9c61d02e3b6abd94d32aac2d8344c4bdd033326/watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e", size = 287453 },
+ { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529 },
+ { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384 },
+ { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789 },
+ { url = "https://files.pythonhosted.org/packages/4a/24/33e71113b320030011c8e4316ccca04194bf0cbbaeee207f00cbc7d6b9f5/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b", size = 460521 },
+ { url = "https://files.pythonhosted.org/packages/f4/c3/3c9a55f255aa57b91579ae9e98c88704955fa9dac3e5614fb378291155df/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14", size = 488722 },
+ { url = "https://files.pythonhosted.org/packages/49/36/506447b73eb46c120169dc1717fe2eff07c234bb3232a7200b5f5bd816e9/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d", size = 596088 },
+ { url = "https://files.pythonhosted.org/packages/82/ab/5f39e752a9838ec4d52e9b87c1e80f1ee3ccdbe92e183c15b6577ab9de16/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff", size = 472923 },
+ { url = "https://files.pythonhosted.org/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606", size = 456080 },
+ { url = "https://files.pythonhosted.org/packages/b0/c3/d5932fd62bde1a30c36e10c409dc5d54506726f08cb3e1d8d0ba5e2bc8db/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701", size = 629432 },
+ { url = "https://files.pythonhosted.org/packages/f7/77/16bddd9779fafb795f1a94319dc965209c5641db5bf1edbbccace6d1b3c0/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10", size = 623046 },
+ { url = "https://files.pythonhosted.org/packages/46/ef/f2ecb9a0f342b4bfad13a2787155c6ee7ce792140eac63a34676a2feeef2/watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849", size = 271473 },
+ { url = "https://files.pythonhosted.org/packages/94/bc/f42d71125f19731ea435c3948cad148d31a64fccde3867e5ba4edee901f9/watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4", size = 287598 },
+ { url = "https://files.pythonhosted.org/packages/57/c9/a30f897351f95bbbfb6abcadafbaca711ce1162f4db95fc908c98a9165f3/watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e", size = 277210 },
+ { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745 },
+ { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769 },
+ { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374 },
+ { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485 },
+ { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813 },
+ { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816 },
+ { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186 },
+ { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812 },
+ { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196 },
+ { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657 },
+ { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042 },
+ { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410 },
+ { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209 },
+ { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321 },
+ { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783 },
+ { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279 },
+ { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405 },
+ { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976 },
+ { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506 },
+ { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936 },
+ { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147 },
+ { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007 },
+ { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280 },
+ { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056 },
+ { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162 },
+ { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909 },
+ { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389 },
+ { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964 },
+ { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114 },
+ { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264 },
+ { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877 },
+ { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176 },
+ { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577 },
+ { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425 },
+ { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826 },
+ { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208 },
+ { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315 },
+ { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869 },
+ { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919 },
+ { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845 },
+ { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027 },
+ { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615 },
+ { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836 },
+ { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099 },
+ { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626 },
+ { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519 },
+ { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078 },
+ { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664 },
+ { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154 },
+ { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820 },
+ { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510 },
+ { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408 },
+ { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968 },
+ { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096 },
+ { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040 },
+ { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847 },
+ { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072 },
+ { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104 },
+ { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112 },
+ { url = "https://files.pythonhosted.org/packages/ba/4c/a888c91e2e326872fa4705095d64acd8aa2fb9c1f7b9bd0588f33850516c/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3", size = 409611 },
+ { url = "https://files.pythonhosted.org/packages/1e/c7/5420d1943c8e3ce1a21c0a9330bcf7edafb6aa65d26b21dbb3267c9e8112/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2", size = 396889 },
+ { url = "https://files.pythonhosted.org/packages/0c/e5/0072cef3804ce8d3aaddbfe7788aadff6b3d3f98a286fdbee9fd74ca59a7/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d", size = 451616 },
+ { url = "https://files.pythonhosted.org/packages/83/4e/b87b71cbdfad81ad7e83358b3e447fedd281b880a03d64a760fe0a11fc2e/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b", size = 458413 },
+ { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250 },
+ { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117 },
+ { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493 },
+ { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546 },
+]
+
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423 },
+ { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080 },
+ { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329 },
+ { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312 },
+ { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319 },
+ { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631 },
+ { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016 },
+ { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426 },
+ { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360 },
+ { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388 },
+ { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830 },
+ { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 },
+ { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 },
+ { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 },
+ { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 },
+ { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 },
+ { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 },
+ { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 },
+ { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 },
+ { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 },
+ { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 },
+ { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 },
+ { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 },
+ { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 },
+ { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 },
+ { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 },
+ { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 },
+ { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 },
+ { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 },
+ { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 },
+ { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 },
+ { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 },
+ { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 },
+ { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 },
+ { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 },
+ { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 },
+ { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 },
+ { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 },
+ { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 },
+ { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 },
+ { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 },
+ { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 },
+ { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 },
+ { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 },
+ { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109 },
+ { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343 },
+ { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599 },
+ { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207 },
+ { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155 },
+ { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884 },
+ { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
+]