diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..cc25291 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,34 @@ +version: 2 +updates: + # UV package manager support + - package-ecosystem: "uv" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" + timezone: "Europe/Berlin" + open-pull-requests-limit: 10 + # Group updates to reduce PR noise + groups: + major-updates: + patterns: + - "*" + update-types: + - "major" + minor-updates: + patterns: + - "*" + update-types: + - "minor" + patch-updates: + patterns: + - "*" + update-types: + - "patch" + # Ignore dependencies that need manual review + ignore: + - dependency-name: "pyspark" + # PySpark updates can break compatibility, needs manual testing + - dependency-name: "pandas" + update-types: ["version-update:semver-major"] + # Only allow minor/patch updates for pandas to avoid breaking changes diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..55fdb4b --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,17 @@ +## Description + + +## Checklist + + +- [ ] Tests have been added in the prescribed format +- [ ] `CHANGELOG.md` has been updated to reflect changes +- [ ] Version has been updated in `pyproject.toml` diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..a799fb6 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,71 @@ +name: CI + +on: [push] + +permissions: + contents: write + +concurrency: + group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }} + cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }} + +jobs: + test: + runs-on: [ self-hosted, python-small ] + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + fail-fast: false # Don't cancel other jobs if one fails + name: Test (Python ${{ matrix.python-version }}) + steps: + - uses: actions/checkout@v4 + - name: Install Python and UV + uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + uv sync --group dev + - name: Run sanity check + run: | + uv run python sanity_checks.py + working-directory: dataframe_expectations + - name: Run tests + run: | + uv run pytest tests/ --cov=dataframe_expectations + + lint: + runs-on: [ self-hosted, python-small ] + env: + PYTHON_VERSION: "3.11" # Use a single version for linting + steps: + - uses: actions/checkout@v4 + - name: Install Python and UV + uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install dependencies + run: | + uv sync --group dev + - name: Pre-commit + run: | + uv run pre-commit run --all-files --show-diff-on-failure + + docs: + runs-on: [ self-hosted, python-small ] + env: + PYTHON_VERSION: "3.11" # Use a single version for docs + needs: [test, lint] + steps: + - uses: actions/checkout@v4 + - name: Install Python and UV + uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install dependencies + run: | + uv sync --group docs + - name: Build docs + run: | + uv run sphinx-build source build/html + working-directory: docs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2372d6a --- /dev/null +++ b/.gitignore @@ -0,0 +1,227 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# sphinx +# Sphinx documentation build output +build/ + +# Sphinx cache and temporary files +source/.doctrees/ +.doctrees/ + +# Auto-generated API documentation (if using sphinx-apidoc) +source/_autosummary/ +source/_generated/ +source/api/ + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml + + +# Ignore generated documentation +docs/build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..8d5dd55 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,61 @@ +fail_fast: false +default_stages: [pre-commit] + +repos: + # ============================================================================ + # General checks + # ============================================================================ + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + stages: [pre-commit, manual] + - id: end-of-file-fixer + stages: [pre-commit, manual] + - id: check-yaml + stages: [pre-commit, manual] + - id: check-added-large-files + stages: [pre-commit, manual] + - id: check-docstring-first + files: \.py$ + stages: [pre-commit, manual] + - id: check-merge-conflict + stages: [pre-commit, manual] + - id: check-case-conflict + stages: [pre-commit, manual] + + # ============================================================================ + # Ruff - Modern Python linter and formatter + # ============================================================================ + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.1 + hooks: + # Ruff linter with auto-fix + - id: ruff + args: [--fix] + files: ^(dataframe_expectations|tests)/.*\.py$ + stages: [pre-commit, manual] + + # Ruff formatter (replaces black) + - id: ruff-format + files: ^(dataframe_expectations|tests)/.*\.py$ + stages: [pre-commit, manual] + + # ============================================================================ + # Type checking - mypy + # ============================================================================ + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.18.2 + hooks: + - id: mypy + files: ^(dataframe_expectations|tests)/.*\.py$ + args: [--config-file, ./pyproject.toml] + additional_dependencies: [ + types-tabulate, + pandas-stubs, + pyspark-stubs, + types-PyYAML, + pytest, + pytest-cov, + ] + stages: [pre-commit, manual] diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3ab705c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog dataframe-expectations + +## Version 0.1.0 +- Initial commit contains all the basic functionality for the library +- Added documentation diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..0582197 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 GetYourGuide + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 45f630e..f95bbfc 100644 --- a/README.md +++ b/README.md @@ -1 +1,143 @@ -# dataframe-expectations +## 🎯 DataFrameExpectations + +**DataFrameExpectations** is a Python library designed to validate **Pandas** and **PySpark** DataFrames using customizable, reusable expectations. It simplifies testing in data pipelines and end-to-end workflows by providing a standardized framework for DataFrame validation. + +Instead of using different validation approaches for DataFrames, this library provides a +standardized solution for this use case. As a result, any contributions made here—such as adding new expectations—can be leveraged by all users of the library. +You can find the complete list of expectations [here](docs/build/html/expectations.html). + + +### Installation: +```bash +pip install dataframe-expectations +``` + +### Development setup + +To set up the development environment: + +```bash +# 1. Clone the repository +git clone https://github.com/getyourguide/dataframe-expectations.git +cd dataframe-expectations + +# 2. Install UV package manager +pip install uv + +# 3. Install development dependencies (this will automatically create a virtual environment) +uv sync --group dev + +# 4. (Optional) To explicitly activate the virtual environment: +source .venv/bin/activate # On Windows: .venv\Scripts\activate + +# 5. Run tests (this will run the tests in the virtual environment) +uv run pytest tests/ --cov=dataframe_expectations +``` + +### Using the library + +**Pandas example:** +```python +from dataframe_expectations.expectations_suite import DataFameExpectationsSuite + +suite = ( + DataFrameExpectationsSuite() + .expect_value_greater_than("age", 18) + .expect_value_less_than("age", 10) +) + +# Create a Pandas DataFrame +import pandas as pd +test_pandas_df = pd.DataFrame({"age": [20, 15, 30], "name": ["Alice", "Bob", "Charlie"]}) + +suite.run(test_pandas_df) + +``` + + +**PySpark example:** +```python +from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + +suite = ( + DataFrameExpectationsSuite() + .expect_value_greater_than("age", 18) + .expect_value_less_than("age", 40) +) + +# Create a PySpark DataFrame +test_spark_df = spark.createDataFrame( + [ + {"name": "Alice", "age": 20}, + {"name": "Bob", "age": 15}, + {"name": "Charlie", "age": 30}, + ] +) + +suite.run(test_spark_df) + +``` + +**Output:** +```python +========================== Running expectations suite ========================== +ExpectationValueGreaterThan ('age' greater than 18) ... FAIL +ExpectationValueLessThan ('age' less than 40) ... OK +============================ 1 success, 1 failures ============================= + +ExpectationSuiteFailure: (1/2) expectations failed. + +================================================================================ +List of violations: +-------------------------------------------------------------------------------- +[Failed 1/1] ExpectationValueGreaterThan ('age' greater than 18): Found 1 row(s) where 'age' is not greater than 18. +Some examples of violations: ++-----+------+ +| age | name | ++-----+------+ +| 15 | Bob | ++-----+------+ +================================================================================ + +``` + +### How to contribute? +Contributions are welcome! You can enhance the library by adding new expectations, refining existing ones, or improving the testing framework. + +### Versioning + +This project follows [Semantic Versioning](https://semver.org/) (SemVer): +- **MAJOR** version for incompatible API changes +- **MINOR** version for backward-compatible functionality additions +- **PATCH** version for backward-compatible bug fixes + +To update the version, manually edit the version field in `pyproject.toml`: + +```toml +[project] +version = "0.2.0" # Update this line +``` + +Alternatively, you can use uv to bump version number: + +```bash +# Bump patch version (0.1.0 -> 0.1.1) +uv version --bump patch + +# Bump minor version (0.1.0 -> 0.2.0) +uv version --bump minor + +# Bump major version (0.1.0 -> 1.0.0) +uv version --bump major + +# Set specific version +uv version 0.2.0 +``` + +Don't forget to update the [CHANGELOG.md](CHANGELOG.md) file with a detailed description of the changes you've introduced. + +### Security +For security issues please contact security@getyourguide.com. + +### Legal +dataframe-expectations is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for the full text. diff --git a/dataframe_expectations/__init__.py b/dataframe_expectations/__init__.py new file mode 100644 index 0000000..a7c7e58 --- /dev/null +++ b/dataframe_expectations/__init__.py @@ -0,0 +1,16 @@ +from enum import Enum +from typing import Union + +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame + +DataFrameLike = Union[PySparkDataFrame, PandasDataFrame] + + +class DataFrameType(str, Enum): + """ + Enum for DataFrame types. + """ + + PANDAS = "pandas" + PYSPARK = "pyspark" diff --git a/dataframe_expectations/expectations/__init__.py b/dataframe_expectations/expectations/__init__.py new file mode 100644 index 0000000..09233c2 --- /dev/null +++ b/dataframe_expectations/expectations/__init__.py @@ -0,0 +1,110 @@ +from abc import ABC, abstractmethod +from typing import cast + +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame + +# Import the connect DataFrame type for Spark Connect +try: + from pyspark.sql.connect.dataframe import DataFrame as PySparkConnectDataFrame +except ImportError: + # Fallback for older PySpark versions that don't have connect + PySparkConnectDataFrame = None # type: ignore[misc,assignment] + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.result_message import ( + DataFrameExpectationResultMessage, +) + + +class DataFrameExpectation(ABC): + """ + Base class for DataFrame expectations. + """ + + def get_expectation_name(self) -> str: + """ + Returns the class name as the expectation name. + """ + return type(self).__name__ + + @abstractmethod + def get_description(self) -> str: + """ + Returns a description of the expectation. + """ + raise NotImplementedError( + f"description method must be implemented for {self.__class__.__name__}" + ) + + def __str__(self): + """ + Returns a string representation of the expectation. + """ + return f"{self.get_expectation_name()} ({self.get_description()})" + + @classmethod + def infer_data_frame_type(cls, data_frame: DataFrameLike) -> DataFrameType: + """ + Infer the DataFrame type based on the provided DataFrame. + """ + if isinstance(data_frame, PandasDataFrame): + return DataFrameType.PANDAS + elif isinstance(data_frame, PySparkDataFrame): + return DataFrameType.PYSPARK + elif PySparkConnectDataFrame is not None and isinstance( + data_frame, PySparkConnectDataFrame + ): + return DataFrameType.PYSPARK + else: + raise ValueError(f"Unsupported DataFrame type: {type(data_frame)}") + + def validate(self, data_frame: DataFrameLike, **kwargs): + """ + Validate the DataFrame against the expectation. + """ + data_frame_type = self.infer_data_frame_type(data_frame) + + if data_frame_type == DataFrameType.PANDAS: + return self.validate_pandas(data_frame=data_frame, **kwargs) + elif data_frame_type == DataFrameType.PYSPARK: + return self.validate_pyspark(data_frame=data_frame, **kwargs) + else: + raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") + + @abstractmethod + def validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a pandas DataFrame against the expectation. + """ + raise NotImplementedError( + f"validate_pandas method must be implemented for {self.__class__.__name__}" + ) + + @abstractmethod + def validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a PySpark DataFrame against the expectation. + """ + raise NotImplementedError( + f"validate_pyspark method must be implemented for {self.__class__.__name__}" + ) + + @classmethod + def num_data_frame_rows(cls, data_frame: DataFrameLike) -> int: + """ + Count the number of rows in the DataFrame. + """ + data_frame_type = cls.infer_data_frame_type(data_frame) + if data_frame_type == DataFrameType.PANDAS: + # Cast to PandasDataFrame since we know it's a Pandas DataFrame at this point + return len(cast(PandasDataFrame, data_frame)) + elif data_frame_type == DataFrameType.PYSPARK: + # Cast to PySparkDataFrame since we know it's a PySpark DataFrame at this point + return cast(PySparkDataFrame, data_frame).count() + else: + raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") diff --git a/dataframe_expectations/expectations/aggregation_expectation.py b/dataframe_expectations/expectations/aggregation_expectation.py new file mode 100644 index 0000000..b6a1b2e --- /dev/null +++ b/dataframe_expectations/expectations/aggregation_expectation.py @@ -0,0 +1,130 @@ +from abc import abstractmethod +from typing import List, Union + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, +) + + +class DataFrameAggregationExpectation(DataFrameExpectation): + """ + Base class for DataFrame aggregation expectations. + This class is designed to first aggregate data and then validate the aggregation results. + """ + + def __init__( + self, + expectation_name: str, + column_names: List[str], + description: str, + ): + """ + Template for implementing DataFrame aggregation expectations, where data is first aggregated + and then the aggregation results are validated. + + :param expectation_name: The name of the expectation. This will be used during logging. + :param column_names: The list of column names to aggregate on. + :param description: A description of the expectation used in logging. + """ + self.expectation_name = expectation_name + self.column_names = column_names + self.description = description + + def get_expectation_name(self) -> str: + """ + Returns the expectation name. + """ + return self.expectation_name + + def get_description(self) -> str: + """ + Returns a description of the expectation. + """ + return self.description + + @abstractmethod + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Aggregate and validate a pandas DataFrame against the expectation. + + Note: This method should NOT check for column existence - that's handled + automatically by the validate_pandas method. + """ + raise NotImplementedError( + f"aggregate_and_validate_pandas method must be implemented for {self.__class__.__name__}" + ) + + @abstractmethod + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Aggregate and validate a PySpark DataFrame against the expectation. + + Note: This method should NOT check for column existence - that's handled + automatically by the validate_pyspark method. + """ + raise NotImplementedError( + f"aggregate_and_validate_pyspark method must be implemented for {self.__class__.__name__}" + ) + + def validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a pandas DataFrame against the expectation. + Automatically checks column existence before calling the implementation. + """ + # Check if all required columns exist + column_error = self._check_columns_exist(data_frame) + if column_error: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=column_error, + ) + + # Call the implementation-specific validation + return self.aggregate_and_validate_pandas(data_frame, **kwargs) + + def validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a PySpark DataFrame against the expectation. + Automatically checks column existence before calling the implementation. + """ + # Check if all required columns exist + column_error = self._check_columns_exist(data_frame) + if column_error: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=column_error, + ) + + # Call the implementation-specific validation + return self.aggregate_and_validate_pyspark(data_frame, **kwargs) + + def _check_columns_exist(self, data_frame: DataFrameLike) -> Union[str, None]: + """ + Check if all required columns exist in the DataFrame. + Returns error message if columns are missing, None otherwise. + """ + # Skip column check if no columns are required (e.g., for DataFrame-level expectations) + if not self.column_names: + return None + + missing_columns = [col for col in self.column_names if col not in data_frame.columns] + if missing_columns: + if len(missing_columns) == 1: + return f"Column '{missing_columns[0]}' does not exist in the DataFrame." + else: + missing_columns_str = ", ".join([f"'{col}'" for col in missing_columns]) + return f"Columns [{missing_columns_str}] do not exist in the DataFrame." + return None diff --git a/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py b/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py new file mode 100644 index 0000000..c428b4b --- /dev/null +++ b/dataframe_expectations/expectations/aggregation_expectations/any_value_expectations.py @@ -0,0 +1,490 @@ +from typing import cast + +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame +from pyspark.sql import functions as F + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations.aggregation_expectation import ( + DataFrameAggregationExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, +) + + +class ExpectationMinRows(DataFrameAggregationExpectation): + """ + Expectation that validates a DataFrame has at least a minimum number of rows. + + This expectation counts the total number of rows in the DataFrame and checks if it + meets or exceeds the specified minimum threshold. + + Examples: + DataFrame with 100 rows: + - ExpectationMinRows(min_rows=50) → PASS + - ExpectationMinRows(min_rows=150) → FAIL + """ + + def __init__(self, min_rows: int): + """ + Initialize the minimum rows expectation. + + Args: + min_rows (int): Minimum number of rows required (inclusive). + """ + if min_rows < 0: + raise ValueError(f"min_rows must be non-negative, got {min_rows}") + + description = f"DataFrame contains at least {min_rows} rows" + + self.min_rows = min_rows + + super().__init__( + expectation_name="ExpectationMinRows", + column_names=[], # No specific columns required + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate minimum rows in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + row_count = len(pandas_df) + + if row_count >= self.min_rows: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"DataFrame has {row_count} rows, expected at least {self.min_rows}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting DataFrame rows: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate minimum rows in a PySpark DataFrame.""" + try: + row_count = data_frame.count() + + if row_count >= self.min_rows: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"DataFrame has {row_count} rows, expected at least {self.min_rows}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting DataFrame rows: {str(e)}", + ) + + +class ExpectationMaxRows(DataFrameAggregationExpectation): + """ + Expectation that validates a DataFrame has at most a maximum number of rows. + + This expectation counts the total number of rows in the DataFrame and checks if it + does not exceed the specified maximum threshold. + + Examples: + DataFrame with 100 rows: + - ExpectationMaxRows(max_rows=150) → PASS + - ExpectationMaxRows(max_rows=50) → FAIL + """ + + def __init__(self, max_rows: int): + """ + Initialize the maximum rows expectation. + + Args: + max_rows (int): Maximum number of rows allowed (inclusive). + """ + if max_rows < 0: + raise ValueError(f"max_rows must be non-negative, got {max_rows}") + + description = f"DataFrame contains at most {max_rows} rows" + + self.max_rows = max_rows + + super().__init__( + expectation_name="ExpectationMaxRows", + column_names=[], # No specific columns required + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum rows in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + row_count = len(pandas_df) + + if row_count <= self.max_rows: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"DataFrame has {row_count} rows, expected at most {self.max_rows}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting DataFrame rows: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum rows in a PySpark DataFrame.""" + try: + row_count = data_frame.count() + + if row_count <= self.max_rows: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"DataFrame has {row_count} rows, expected at most {self.max_rows}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting DataFrame rows: {str(e)}", + ) + + +class ExpectationMaxNullPercentage(DataFrameAggregationExpectation): + """ + Expectation that validates the percentage of null/NaN values in a specific column + is below a specified threshold. + + This expectation counts null values (including NaN for pandas) in the specified column + and calculates the percentage relative to total rows, then checks if it's below the + specified maximum threshold. + + Examples: + Column with 100 rows and 5 null values (5% null): + - ExpectationMaxNullPercentage(column_name="age", max_percentage=10.0) → PASS + - ExpectationMaxNullPercentage(column_name="age", max_percentage=3.0) → FAIL + + Note: The percentage is expressed as a value between 0.0 and 100.0 (e.g., 5.5 for 5.5%). + """ + + def __init__(self, column_name: str, max_percentage: float): + """ + Initialize the maximum null percentage expectation. + + Args: + column_name (str): Name of the column to check for null percentage. + max_percentage (float): Maximum percentage of null values allowed (0.0-100.0). + """ + if not 0 <= max_percentage <= 100: + raise ValueError(f"max_percentage must be between 0.0 and 100.0, got {max_percentage}") + + description = f"column '{column_name}' null percentage is at most {max_percentage}%" + + self.column_name = column_name + self.max_percentage = max_percentage + + super().__init__( + expectation_name="ExpectationMaxNullPercentage", + column_names=[column_name], # Specify the required column + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum null percentage in a pandas DataFrame column.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Get total number of rows + total_rows = len(pandas_df) + + if total_rows == 0: + # Empty DataFrame has 0% null values + actual_percentage = 0.0 + else: + # Count null and NaN values in the specific column using isnull() which handles both + null_count = pandas_df[self.column_name].isnull().sum() + actual_percentage = (null_count / total_rows) * 100 + + if actual_percentage <= self.max_percentage: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {actual_percentage:.2f}% null values, expected at most {self.max_percentage:.2f}%.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error calculating null percentage for column '{self.column_name}': {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum null percentage in a PySpark DataFrame column.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + + # Get total number of rows + total_rows = pyspark_df.count() + + if total_rows == 0: + # Empty DataFrame has 0% null values + actual_percentage = 0.0 + else: + # Count null values in the specific column + null_count_result = pyspark_df.select( + F.sum(F.when(F.col(self.column_name).isNull(), 1).otherwise(0)).alias( + "null_count" + ) + ).collect() + + null_count = null_count_result[0]["null_count"] + actual_percentage = (null_count / total_rows) * 100 + + if actual_percentage <= self.max_percentage: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {actual_percentage:.2f}% null values, expected at most {self.max_percentage:.2f}%.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error calculating null percentage for column '{self.column_name}': {str(e)}", + ) + + +class ExpectationMaxNullCount(DataFrameAggregationExpectation): + """ + Expectation that validates the absolute count of null/NaN values in a specific column + is below a specified threshold. + + This expectation counts null values (including NaN for pandas) in the specified column + and checks if the absolute count is below the specified maximum threshold. + + Examples: + Column with 100 rows and 5 null values: + - ExpectationMaxNullCount(column_name="age", max_count=10) → PASS + - ExpectationMaxNullCount(column_name="age", max_count=3) → FAIL + + Note: The count is the absolute number of null values, not a percentage. + """ + + def __init__(self, column_name: str, max_count: int): + """ + Initialize the maximum null count expectation. + + Args: + column_name (str): Name of the column to check for null count. + max_count (int): Maximum number of null values allowed. + """ + if max_count < 0: + raise ValueError(f"max_count must be non-negative, got {max_count}") + + description = f"column '{column_name}' has at most {max_count} null values" + + self.column_name = column_name + self.max_count = max_count + + super().__init__( + expectation_name="ExpectationMaxNullCount", + column_names=[column_name], # Specify the required column + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum null count in a pandas DataFrame column.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Count null and NaN values in the specific column using isnull() which handles both + null_count = pandas_df[self.column_name].isnull().sum() + + if null_count <= self.max_count: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {null_count} null values, expected at most {self.max_count}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error calculating null count for column '{self.column_name}': {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate maximum null count in a PySpark DataFrame column.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + + # Count null values in the specific column + null_count_result = pyspark_df.select( + F.sum(F.when(F.col(self.column_name).isNull(), 1).otherwise(0)).alias("null_count") + ).collect() + + # Handle the case where null_count might be None (e.g., empty DataFrame) + null_count = null_count_result[0]["null_count"] + if null_count is None: + null_count = 0 + + if null_count <= self.max_count: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {null_count} null values, expected at most {self.max_count}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error calculating null count for column '{self.column_name}': {str(e)}", + ) + + +# Factory functions for the registry +@register_expectation("ExpectationMinRows") +@requires_params("min_rows", types={"min_rows": int}) +def create_expectation_min_rows(**kwargs) -> ExpectationMinRows: + """ + Create an ExpectMinRows instance. + + Args: + min_rows (int): Minimum number of rows required. + + Returns: + ExpectationMinRows: A configured expectation instance. + """ + return ExpectationMinRows(min_rows=kwargs["min_rows"]) + + +@register_expectation("ExpectationMaxRows") +@requires_params("max_rows", types={"max_rows": int}) +def create_expectation_max_rows(**kwargs) -> ExpectationMaxRows: + """ + Create an ExpectationMaxRows instance. + + Args: + max_rows (int): Maximum number of rows allowed. + + Returns: + ExpectationMaxRows: A configured expectation instance. + """ + return ExpectationMaxRows(max_rows=kwargs["max_rows"]) + + +@register_expectation("ExpectationMaxNullPercentage") +@requires_params( + "column_name", + "max_percentage", + types={"column_name": str, "max_percentage": (int, float)}, +) +def create_expectation_max_null_percentage(**kwargs) -> ExpectationMaxNullPercentage: + """ + Create an ExpectationMaxNullPercentage instance. + + Args: + column_name (str): Name of the column to check for null percentage. + max_percentage (float): Maximum percentage of null values allowed (0.0-100.0). + + Returns: + ExpectationMaxNullPercentage: A configured expectation instance. + """ + return ExpectationMaxNullPercentage( + column_name=kwargs["column_name"], + max_percentage=kwargs["max_percentage"], + ) + + +@register_expectation("ExpectationMaxNullCount") +@requires_params( + "column_name", + "max_count", + types={"column_name": str, "max_count": int}, +) +def create_expectation_max_null_count(**kwargs) -> ExpectationMaxNullCount: + """ + Create an ExpectationMaxNullCount instance. + + Args: + column_name (str): Name of the column to check for null count. + max_count (int): Maximum number of null values allowed. + + Returns: + ExpectationMaxNullCount: A configured expectation instance. + """ + return ExpectationMaxNullCount( + column_name=kwargs["column_name"], + max_count=kwargs["max_count"], + ) diff --git a/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py b/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py new file mode 100644 index 0000000..b9e8b53 --- /dev/null +++ b/dataframe_expectations/expectations/aggregation_expectations/numerical_expectations.py @@ -0,0 +1,472 @@ +from typing import Union, cast + +import pandas as pd +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame +from pyspark.sql import functions as F + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations.aggregation_expectation import ( + DataFrameAggregationExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, +) + + +class ExpectationColumnQuantileBetween(DataFrameAggregationExpectation): + """ + Expectation that validates a quantile value of a column falls within a specified range. + + This expectation computes the specified quantile of the column and checks if it + falls between the provided minimum and maximum bounds (inclusive). + + Quantile values: + - 0.0 = minimum value + - 0.5 = median value + - 1.0 = maximum value + - Any value between 0.0 and 1.0 for custom quantiles + + Examples: + Column 'age' with values [20, 25, 30, 35]: + - quantile=0.5 (median) = 27.5 + - ExpectationColumnQuantileBetween(column_name="age", quantile=0.5, min_value=25, max_value=30) → PASS + - ExpectationColumnQuantileBetween(column_name="age", quantile=1.0, min_value=30, max_value=40) → PASS (max=35) + - ExpectationColumnQuantileBetween(column_name="age", quantile=0.0, min_value=15, max_value=25) → PASS (min=20) + """ + + def __init__( + self, + column_name: str, + quantile: float, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Initialize the column quantile between expectation. + + Args: + column_name (str): Name of the column to check. + quantile (float): Quantile to compute (0.0 to 1.0, where 0.0=min, 0.5=median, 1.0=max). + min_value (Union[int, float]): Minimum allowed value for the column quantile (inclusive). + max_value (Union[int, float]): Maximum allowed value for the column quantile (inclusive). + + Raises: + ValueError: If quantile is not between 0.0 and 1.0. + """ + if not (0.0 <= quantile <= 1.0): + raise ValueError(f"Quantile must be between 0.0 and 1.0, got {quantile}") + + # Create descriptive names for common quantiles + quantile_names = { + 0.0: "minimum", + 0.25: "25th percentile", + 0.5: "median", + 0.75: "75th percentile", + 1.0: "maximum", + } + self.quantile_desc = quantile_names.get(quantile, f"{quantile} quantile") + + description = ( + f"column '{column_name}' {self.quantile_desc} value between {min_value} and {max_value}" + ) + + self.column_name = column_name + self.quantile = quantile + self.min_value = min_value + self.max_value = max_value + + super().__init__( + expectation_name="ExpectationColumnQuantileBetween", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column quantile in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Calculate quantile + if self.quantile == 0.0: + quantile_val = pandas_df[self.column_name].min() + elif self.quantile == 1.0: + quantile_val = pandas_df[self.column_name].max() + elif self.quantile == 0.5: + quantile_val = pandas_df[self.column_name].median() + else: + quantile_val = pandas_df[self.column_name].quantile(self.quantile) + + # Handle case where all values are null + if pd.isna(quantile_val): + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' contains only null values.", + ) + + # Check if quantile is within bounds + if self.min_value <= quantile_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=( + f"Column '{self.column_name}' {self.quantile_desc} value {quantile_val} is not between " + f"{self.min_value} and {self.max_value}." + ), + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error calculating {self.quantile} quantile for column '{self.column_name}': {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column quantile in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # First check if all values are null to avoid edge cases + non_null_count = pyspark_df.select(F.count(self.column_name)).collect()[0][0] + if non_null_count == 0: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' contains only null values.", + ) + + # Calculate quantile + if self.quantile == 0.0: + result = pyspark_df.select(F.min(self.column_name).alias("quantile_val")).collect() + elif self.quantile == 1.0: + result = pyspark_df.select(F.max(self.column_name).alias("quantile_val")).collect() + elif self.quantile == 0.5: + result = pyspark_df.select( + F.median(self.column_name).alias("quantile_val") # type: ignore + ).collect() + else: + # Use percentile_approx for other quantiles + result = pyspark_df.select( + F.percentile_approx(F.col(self.column_name), F.lit(self.quantile)).alias( # type: ignore + "quantile_val" + ) + ).collect() + + quantile_val = result[0]["quantile_val"] + + # Defensive check: quantile_val should not be None after the non-null count check above, + # but we keep this for extra safety in case of unexpected Spark behavior or schema issues. + if quantile_val is None: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' contains only null values.", + ) + + # Check if quantile is within bounds + if self.min_value <= quantile_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' {self.quantile_desc} value {quantile_val} is not between {self.min_value} and {self.max_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error calculating {self.quantile} quantile for column '{self.column_name}': {str(e)}", + ) + + +class ExpectationColumnMeanBetween(DataFrameAggregationExpectation): + """ + Expectation that validates the mean value of a column falls within a specified range. + + This expectation computes the mean (average) value of the specified column and checks if it + falls between the provided minimum and maximum bounds (inclusive). + + Note: Mean is implemented separately since it's not a quantile operation. + + Examples: + Column 'age' with values [20, 25, 30, 35]: + - mean_value = 27.5 + - ExpectationColumnMeanBetween(column_name="age", min_value=25, max_value=30) → PASS + - ExpectationColumnMeanBetween(column_name="age", min_value=30, max_value=35) → FAIL + """ + + def __init__( + self, + column_name: str, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Initialize the column mean between expectation. + + Args: + column_name (str): Name of the column to check. + min_value (Union[int, float]): Minimum allowed value for the column mean (inclusive). + max_value (Union[int, float]): Maximum allowed value for the column mean (inclusive). + """ + description = f"column '{column_name}' mean value between {min_value} and {max_value}" + + self.column_name = column_name + self.min_value = min_value + self.max_value = max_value + + super().__init__( + expectation_name="ExpectationColumnMeanBetween", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column mean in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Calculate mean + mean_val = pandas_df[self.column_name].mean() + + # Handle case where all values are null + if pd.isna(mean_val): + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' contains only null values.", + ) + + # Check if mean is within bounds + if self.min_value <= mean_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error calculating mean for column '{self.column_name}': {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column mean in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # Calculate mean + mean_result = pyspark_df.select(F.avg(self.column_name).alias("mean_val")).collect() + mean_val = mean_result[0]["mean_val"] + + # Handle case where all values are null + if mean_val is None: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' contains only null values.", + ) + + # Check if mean is within bounds + if self.min_value <= mean_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error calculating mean for column '{self.column_name}': {str(e)}", + ) + + +# Register the main expectation +@register_expectation("ExpectationColumnQuantileBetween") +@requires_params( + "column_name", + "quantile", + "min_value", + "max_value", + types={ + "column_name": str, + "quantile": float, + "min_value": (int, float), + "max_value": (int, float), + }, +) +def create_expectation_column_quantile_to_be_between( + **kwargs, +) -> ExpectationColumnQuantileBetween: + """ + Create an ExpectationColumnQuantileBetween instance. + + Args: + column_name (str): Name of the column to check. + quantile (float): Quantile to compute (0.0 to 1.0). + min_value (Union[int, float]): Minimum allowed value for the column quantile. + max_value (Union[int, float]): Maximum allowed value for the column quantile. + + Returns: + ExpectationColumnQuantileBetween: A configured expectation instance. + """ + return ExpectationColumnQuantileBetween( + column_name=kwargs["column_name"], + quantile=kwargs["quantile"], + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) + + +# Convenience functions for common quantiles +@register_expectation("ExpectationColumnMaxBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={"column_name": str, "min_value": (int, float), "max_value": (int, float)}, +) +def create_expectation_column_max_to_be_between( + **kwargs, +) -> ExpectationColumnQuantileBetween: + """ + Create an ExpectationColumnQuantileBetween instance for maximum values (quantile=1.0). + + Args: + column_name (str): Name of the column to check. + min_value (Union[int, float]): Minimum allowed value for the column maximum. + max_value (Union[int, float]): Maximum allowed value for the column maximum. + + Returns: + ExpectationColumnQuantileBetween: A configured expectation instance for maximum values. + """ + return ExpectationColumnQuantileBetween( + column_name=kwargs["column_name"], + quantile=1.0, + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) + + +@register_expectation("ExpectationColumnMinBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={"column_name": str, "min_value": (int, float), "max_value": (int, float)}, +) +def create_expectation_column_min_to_be_between( + **kwargs, +) -> ExpectationColumnQuantileBetween: + """ + Create an ExpectationColumnQuantileBetween instance for minimum values (quantile=0.0). + + Args: + column_name (str): Name of the column to check. + min_value (Union[int, float]): Minimum allowed value for the column minimum. + max_value (Union[int, float]): Maximum allowed value for the column minimum. + + Returns: + ExpectationColumnQuantileBetween: A configured expectation instance for minimum values. + """ + return ExpectationColumnQuantileBetween( + column_name=kwargs["column_name"], + quantile=0.0, + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) + + +@register_expectation("ExpectationColumnMeanBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={"column_name": str, "min_value": (int, float), "max_value": (int, float)}, +) +def create_expectation_column_mean_to_be_between( + **kwargs, +) -> ExpectationColumnMeanBetween: + """ + Create a custom ExpectationColumnMeanBetween instance for mean values. + Note: This uses a separate implementation since mean is not a quantile. + + Args: + column_name (str): Name of the column to check. + min_value (Union[int, float]): Minimum allowed value for the column mean. + max_value (Union[int, float]): Maximum allowed value for the column mean. + + Returns: + ExpectationColumnMeanBetween: A configured expectation instance for mean values. + """ + # For mean, we need a separate class since it's not a quantile + return ExpectationColumnMeanBetween( + column_name=kwargs["column_name"], + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) + + +@register_expectation("ExpectationColumnMedianBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={"column_name": str, "min_value": (int, float), "max_value": (int, float)}, +) +def create_expectation_column_median_to_be_between( + **kwargs, +) -> ExpectationColumnQuantileBetween: + """ + Create an ExpectationColumnQuantileBetween instance for median values (quantile=0.5). + + Args: + column_name (str): Name of the column to check. + min_value (Union[int, float]): Minimum allowed value for the column median. + max_value (Union[int, float]): Maximum allowed value for the column median. + + Returns: + ExpectationColumnQuantileBetween: A configured expectation instance for median values. + """ + return ExpectationColumnQuantileBetween( + column_name=kwargs["column_name"], + quantile=0.5, + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) diff --git a/dataframe_expectations/expectations/aggregation_expectations/unique.py b/dataframe_expectations/expectations/aggregation_expectations/unique.py new file mode 100644 index 0000000..5583494 --- /dev/null +++ b/dataframe_expectations/expectations/aggregation_expectations/unique.py @@ -0,0 +1,667 @@ +from typing import List, cast + +import pandas as pd +from pandas import DataFrame as PandasDataFrame +from pyspark.sql import DataFrame as PySparkDataFrame +from pyspark.sql import functions as F + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations.aggregation_expectation import ( + DataFrameAggregationExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, +) + + +class ExpectationUniqueRows(DataFrameAggregationExpectation): + """ + Expectation that checks if there are no duplicate rows for the given column names. If columns list is empty, checks for duplicates across all columns. + + For example: + For column_names ["col1", "col2"] + + Given the following DataFrame: + + | col1 | col2 | col3 | + |------|------|------| + | 1 | 10 | 100 | + | 2 | 20 | 100 | + | 3 | 30 | 100 | + | 1 | 20 | 100 | + + All rows are unique for columns ["col1", "col2"] and there will be no violations. + + For the same columns_names and the following DataFrame: + + | col1 | col2 | col3 | + |------|------|------| + | 1 | 10 | 100 | + | 2 | 20 | 100 | + | 3 | 30 | 100 | + | 1 | 10 | 100 | + + There will be 1 violation because the first and last rows are duplicates for columns ["col1", "col2"]. + + """ + + def __init__(self, column_names: List[str]): + """ + Initialize the unique expectation. + + :param column_names: List of column names to check for uniqueness. + If empty, checks all column_names. + """ + description = ( + f"all rows unique for columns {column_names}" + if column_names + else "all rows unique across all columns" + ) + + self.column_names = column_names + + super().__init__( + expectation_name="ExpectationUniqueRows", + column_names=column_names, + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate uniqueness in a pandas DataFrame. + """ + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + + # If columns list is empty, use all columns + check_columns = self.column_names if self.column_names else list(pandas_df.columns) + + # Find duplicates - dropna=False ensures null values are considered in duplicate detection + # This means rows with null values can be duplicates of each other + duplicates = pandas_df[pandas_df.duplicated(subset=check_columns, keep=False)] + + if len(duplicates) == 0: + return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name()) + + # Add duplicate count column and keep only one row per duplicate group + duplicate_counts = ( + pandas_df.groupby(check_columns, dropna=False).size().reset_index(name="#duplicates") + ) + # Filter to only keep groups with duplicates (count > 1) + duplicate_counts = duplicate_counts[duplicate_counts["#duplicates"] > 1] + + # Order by #duplicates, then by the specified columns + sort_columns = ["#duplicates"] + check_columns + duplicates_with_counts = duplicate_counts.sort_values(sort_columns) + + # Replace NaN with None + duplicates_with_counts = duplicates_with_counts.map(lambda x: None if pd.isna(x) else x) + + # Calculate total number of duplicate rows (not groups) + total_duplicate_rows = duplicates_with_counts["#duplicates"].sum() + + # Generate dynamic error message + error_msg = ( + f"duplicate rows found for columns {self.column_names}" + if self.column_names + else "duplicate rows found" + ) + + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=duplicates_with_counts, + message=f"Found {total_duplicate_rows} duplicate row(s). {error_msg}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate uniqueness in a PySpark DataFrame. + """ + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + + # If columns list is empty, use all columns + check_columns = self.column_names if self.column_names else pyspark_df.columns + + # Group by the specified columns and count duplicates + duplicates_df = ( + pyspark_df.groupBy(*check_columns) + .count() + .filter(F.col("count") > 1) + .withColumnRenamed("count", "#duplicates") + .orderBy(F.col("#duplicates"), *check_columns) + ) + + duplicate_count = duplicates_df.count() + + if duplicate_count == 0: + return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name()) + + # Calculate total number of duplicate rows (not groups) + total_duplicate_rows = duplicates_df.agg(F.sum("#duplicates")).collect()[0][0] + + # Generate dynamic error message + error_msg = ( + f"duplicate rows found for columns {self.column_names}" + if self.column_names + else "duplicate rows found" + ) + + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=duplicates_df, + message=f"Found {total_duplicate_rows} duplicate row(s). {error_msg}", + ) + + +class ExpectationDistinctColumnValuesEquals(DataFrameAggregationExpectation): + """ + Expectation that validates a column has exactly a specified number of distinct values. + + This expectation counts the number of unique/distinct values in a specified column + and checks if it equals the expected count. + + Examples: + Column with values [1, 2, 3, 2, 1] has 3 distinct values: + - ExpectationDistinctColumnValuesEquals(column_name="col1", expected_value=3) → PASS + - ExpectationDistinctColumnValuesEquals(column_name="col1", expected_value=5) → FAIL + + Note: The comparison is exact equality (inclusive). + """ + + def __init__(self, column_name: str, expected_value: int): + """ + Initialize the distinct values equals expectation. + + Args: + column_name (str): Name of the column to check. + expected_value (int): Expected number of distinct values (exact match). + """ + if expected_value < 0: + raise ValueError(f"expected_value must be non-negative, got {expected_value}") + + description = f"column '{column_name}' has exactly {expected_value} distinct values" + + self.column_name = column_name + self.expected_value = expected_value + + super().__init__( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Count distinct values (dropna=False includes NaN as a distinct value) + actual_count = pandas_df[self.column_name].nunique(dropna=False) + + if actual_count == self.expected_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected exactly {self.expected_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting distinct values: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # Count distinct values including nulls + actual_count = pyspark_df.select(self.column_name).distinct().count() + + if actual_count == self.expected_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected exactly {self.expected_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting distinct values: {str(e)}", + ) + + +class ExpectationDistinctColumnValuesLessThan(DataFrameAggregationExpectation): + """ + Expectation that validates a column has fewer than a specified number of distinct values. + + This expectation counts the number of unique/distinct values in a specified column + and checks if it's less than the specified threshold. + + Examples: + Column with values [1, 2, 3, 2, 1] has 3 distinct values: + - ExpectationDistinctColumnValuesLessThan(column_name="col1", threshold=5) → PASS (3 < 5) + - ExpectationDistinctColumnValuesLessThan(column_name="col1", threshold=3) → FAIL (3 is not < 3) + + Note: The threshold is exclusive (actual_count < threshold). + """ + + def __init__(self, column_name: str, threshold: int): + """ + Initialize the distinct values less than expectation. + + Args: + column_name (str): Name of the column to check. + threshold (int): Threshold for distinct values count (exclusive upper bound). + """ + if threshold < 0: + raise ValueError(f"threshold must be non-negative, got {threshold}") + + description = f"column '{column_name}' has fewer than {threshold} distinct values" + + self.column_name = column_name + self.threshold = threshold + + super().__init__( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Count distinct values (dropna=False includes NaN as a distinct value) + actual_count = pandas_df[self.column_name].nunique(dropna=False) + + if actual_count < self.threshold: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected fewer than {self.threshold}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting distinct values: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # Count distinct values including nulls + actual_count = pyspark_df.select(self.column_name).distinct().count() + + if actual_count < self.threshold: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected fewer than {self.threshold}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting distinct values: {str(e)}", + ) + + +class ExpectationDistinctColumnValuesGreaterThan(DataFrameAggregationExpectation): + """ + Expectation that validates a column has more than a specified number of distinct values. + + This expectation counts the number of unique/distinct values in a specified column + and checks if it's greater than the specified threshold. + + Examples: + Column with values [1, 2, 3, 2, 1] has 3 distinct values: + - ExpectationDistinctColumnValuesGreaterThan(column_name="col1", threshold=2) → PASS (3 > 2) + - ExpectationDistinctColumnValuesGreaterThan(column_name="col1", threshold=3) → FAIL (3 is not > 3) + + Note: The threshold is exclusive (actual_count > threshold). + """ + + def __init__(self, column_name: str, threshold: int): + """ + Initialize the distinct values greater than expectation. + + Args: + column_name (str): Name of the column to check. + threshold (int): Threshold for distinct values count (exclusive lower bound). + """ + if threshold < 0: + raise ValueError(f"threshold must be non-negative, got {threshold}") + + description = f"column '{column_name}' has more than {threshold} distinct values" + + self.column_name = column_name + self.threshold = threshold + + super().__init__( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Count distinct values (dropna=False includes NaN as a distinct value) + actual_count = pandas_df[self.column_name].nunique(dropna=False) + + if actual_count > self.threshold: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected more than {self.threshold}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting distinct values: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # Count distinct values including nulls + actual_count = pyspark_df.select(self.column_name).distinct().count() + + if actual_count > self.threshold: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected more than {self.threshold}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting distinct values: {str(e)}", + ) + + +class ExpectationDistinctColumnValuesBetween(DataFrameAggregationExpectation): + """ + Expectation that validates a column has a number of distinct values within a specified range. + + This expectation counts the number of unique/distinct values in a specified column + and checks if it's between the specified minimum and maximum values. + + Examples: + Column with values [1, 2, 3, 2, 1] has 3 distinct values: + - ExpectationDistinctColumnValuesBetween(column_name="col1", min_value=2, max_value=5) → PASS (2 ≤ 3 ≤ 5) + - ExpectationDistinctColumnValuesBetween(column_name="col1", min_value=4, max_value=6) → FAIL (3 is not ≥ 4) + + Note: Both bounds are inclusive (min_value ≤ actual_count ≤ max_value). + """ + + def __init__(self, column_name: str, min_value: int, max_value: int): + """ + Initialize the distinct values between expectation. + + Args: + column_name (str): Name of the column to check. + min_value (int): Minimum number of distinct values (inclusive lower bound). + max_value (int): Maximum number of distinct values (inclusive upper bound). + """ + if min_value < 0: + raise ValueError(f"min_value must be non-negative, got {min_value}") + if max_value < 0: + raise ValueError(f"max_value must be non-negative, got {max_value}") + if min_value > max_value: + raise ValueError(f"min_value ({min_value}) must be <= max_value ({max_value})") + + description = ( + f"column '{column_name}' has between {min_value} and {max_value} distinct values" + ) + + self.column_name = column_name + self.min_value = min_value + self.max_value = max_value + + super().__init__( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_names=[column_name], + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a pandas DataFrame.""" + try: + # Cast to PandasDataFrame for type safety + pandas_df = cast(PandasDataFrame, data_frame) + # Count distinct values (dropna=False includes NaN as a distinct value) + actual_count = pandas_df[self.column_name].nunique(dropna=False) + + if self.min_value <= actual_count <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected between {self.min_value} and {self.max_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting distinct values: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate distinct values count in a PySpark DataFrame.""" + try: + # Cast to PySparkDataFrame for type safety + pyspark_df = cast(PySparkDataFrame, data_frame) + # Count distinct values including nulls + actual_count = pyspark_df.select(self.column_name).distinct().count() + + if self.min_value <= actual_count <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' has {actual_count} distinct values, expected between {self.min_value} and {self.max_value}.", + ) + + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting distinct values: {str(e)}", + ) + + +# Register the expectations +@register_expectation("ExpectationUniqueRows") +@requires_params("column_names", types={"column_names": list}) +def create_expectation_unique(**kwargs) -> ExpectationUniqueRows: + """ + Create an ExpectationUniqueRows instance. + + :param column_names: List of column names to check for uniqueness. If empty, checks all columns. + :return: ExpectationUniqueRows instance + """ + column_names = kwargs["column_names"] + return ExpectationUniqueRows(column_names=column_names) + + +@register_expectation("ExpectationDistinctColumnValuesEquals") +@requires_params( + "column_name", + "expected_value", + types={"column_name": str, "expected_value": int}, +) +def create_expectation_distinct_column_values_equals( + **kwargs, +) -> ExpectationDistinctColumnValuesEquals: + """ + Create an ExpectationDistinctColumnValuesEquals instance. + + Args: + column_name (str): Name of the column to check. + expected_value (int): Expected number of distinct values. + + Returns: + ExpectationDistinctColumnValuesEquals: A configured expectation instance. + """ + return ExpectationDistinctColumnValuesEquals( + column_name=kwargs["column_name"], + expected_value=kwargs["expected_value"], + ) + + +@register_expectation("ExpectationDistinctColumnValuesLessThan") +@requires_params( + "column_name", + "threshold", + types={"column_name": str, "threshold": int}, +) +def create_expectation_distinct_column_values_less_than( + **kwargs, +) -> ExpectationDistinctColumnValuesLessThan: + """ + Create an ExpectationDistinctColumnValuesLessThan instance. + + Args: + column_name (str): Name of the column to check. + threshold (int): Threshold for distinct values count (exclusive upper bound). + + Returns: + ExpectationDistinctColumnValuesLessThan: A configured expectation instance. + """ + return ExpectationDistinctColumnValuesLessThan( + column_name=kwargs["column_name"], + threshold=kwargs["threshold"], + ) + + +@register_expectation("ExpectationDistinctColumnValuesGreaterThan") +@requires_params( + "column_name", + "threshold", + types={"column_name": str, "threshold": int}, +) +def create_expectation_distinct_column_values_greater_than( + **kwargs, +) -> ExpectationDistinctColumnValuesGreaterThan: + """ + Create an ExpectationDistinctColumnValuesGreaterThan instance. + + Args: + column_name (str): Name of the column to check. + threshold (int): Threshold for distinct values count (exclusive lower bound). + + Returns: + ExpectationDistinctColumnValuesGreaterThan: A configured expectation instance. + """ + return ExpectationDistinctColumnValuesGreaterThan( + column_name=kwargs["column_name"], + threshold=kwargs["threshold"], + ) + + +@register_expectation("ExpectationDistinctColumnValuesBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={"column_name": str, "min_value": int, "max_value": int}, +) +def create_expectation_distinct_column_values_between( + **kwargs, +) -> ExpectationDistinctColumnValuesBetween: + """ + Create an ExpectationDistinctColumnValuesBetween instance. + + Args: + column_name (str): Name of the column to check. + min_value (int): Minimum number of distinct values (inclusive lower bound). + max_value (int): Maximum number of distinct values (inclusive upper bound). + + Returns: + ExpectationDistinctColumnValuesBetween: A configured expectation instance. + """ + return ExpectationDistinctColumnValuesBetween( + column_name=kwargs["column_name"], + min_value=kwargs["min_value"], + max_value=kwargs["max_value"], + ) diff --git a/dataframe_expectations/expectations/column_expectation.py b/dataframe_expectations/expectations/column_expectation.py new file mode 100644 index 0000000..af63b08 --- /dev/null +++ b/dataframe_expectations/expectations/column_expectation.py @@ -0,0 +1,113 @@ +from typing import Callable + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, +) + + +class DataFrameColumnExpectation(DataFrameExpectation): + """ + Base class for DataFrame column expectations. + This class is designed to validate a specific column in a DataFrame against a condition defined by + `fn_violations_pandas` and `fn_violations_pyspark` functions.""" + + def __init__( + self, + expectation_name: str, + column_name: str, + fn_violations_pandas: Callable, + fn_violations_pyspark: Callable, + description: str, + error_message: str, + ): + """ + Template for implementing DataFrame column expectations, where a column value is tested against a + condition. The conditions are defined by the `fn_violations_pandas` and `fn_violations_pyspark` functions. + + :param expectation_name: The name of the expectation. This will be used during logging. + :param column_name: The name of the column to check. + :param fn_violations_pandas: Function to find violations in a pandas DataFrame. + :param fn_violations_pyspark: Function to find violations in a PySpark DataFrame. + :param description: A description of the expectation used in logging. + :param error_message: The error message to return if the expectation fails. + """ + self.column_name = column_name + self.expectation_name = expectation_name + self.fn_violations_pandas = fn_violations_pandas + self.fn_violations_pyspark = fn_violations_pyspark + self.description = description + self.error_message = error_message + + def get_expectation_name(self) -> str: + """ + Returns the expectation name. + """ + return self.expectation_name + + def get_description(self) -> str: + """ + Returns a description of the expectation. + """ + return self.description + + def row_validation( + self, + data_frame_type: DataFrameType, + data_frame: DataFrameLike, + fn_violations: Callable, + **kwargs, + ) -> DataFrameExpectationResultMessage: + """ + Validate the DataFrame against the expectation. + + :param data_frame_type: The type of DataFrame (Pandas or PySpark). + :param data_frame: The DataFrame to validate. + :param fn_violations: The function to find violations. + :return: ExpectationResultMessage indicating success or failure. + """ + + if self.column_name not in data_frame.columns: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=data_frame_type, + message=f"Column '{self.column_name}' does not exist in the DataFrame.", + ) + + violations = fn_violations(data_frame) + + # calculate number of violations based on DataFrame type + num_violations = self.num_data_frame_rows(violations) + + if num_violations == 0: + return DataFrameExpectationSuccessMessage(expectation_name=self.get_expectation_name()) + + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=data_frame_type, + violations_data_frame=violations, + message=f"Found {num_violations} row(s) where {self.error_message}", + ) + + def validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + return self.row_validation( + data_frame_type=DataFrameType.PANDAS, + data_frame=data_frame, + fn_violations=self.fn_violations_pandas, + **kwargs, + ) + + def validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + return self.row_validation( + data_frame_type=DataFrameType.PYSPARK, + data_frame=data_frame, + fn_violations=self.fn_violations_pyspark, + **kwargs, + ) diff --git a/dataframe_expectations/expectations/column_expectations/__init__.py b/dataframe_expectations/expectations/column_expectations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataframe_expectations/expectations/column_expectations/any_value_expectations.py b/dataframe_expectations/expectations/column_expectations/any_value_expectations.py new file mode 100644 index 0000000..b3c47db --- /dev/null +++ b/dataframe_expectations/expectations/column_expectations/any_value_expectations.py @@ -0,0 +1,97 @@ +from pyspark.sql import functions as F + +from dataframe_expectations.expectations.column_expectation import ( + DataFrameColumnExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params + + +@register_expectation("ExpectationValueEquals") +@requires_params("column_name", "value", types={"column_name": str, "value": object}) +def create_expectation_value_equals(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + value = kwargs["value"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueEquals", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name] != value], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name) != value), + description=f"'{column_name}' equals {value}", + error_message=f"'{column_name}' is not equal to {value}.", + ) + + +@register_expectation("ExpectationValueNotEquals") +@requires_params("column_name", "value", types={"column_name": str, "value": object}) +def create_expectation_value_not_equals(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + value = kwargs["value"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueNotEquals", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name] == value], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name) == value), + description=f"'{column_name}' is not equal to {value}", + error_message=f"'{column_name}' is equal to {value}.", + ) + + +@register_expectation("ExpectationValueNull") +@requires_params("column_name", types={"column_name": str}) +def create_expectation_value_null(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueNull", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].notnull()], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isNotNull()), + description=f"'{column_name}' is null", + error_message=f"'{column_name}' is not null.", + ) + + +@register_expectation("ExpectationValueNotNull") +@requires_params("column_name", types={"column_name": str}) +def create_expectation_value_not_null(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueNotNull", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].isnull()], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isNull()), + description=f"'{column_name}' is not null", + error_message=f"'{column_name}' is null.", + ) + + +@register_expectation("ExpectationValueIn") +@requires_params("column_name", "values", types={"column_name": str, "values": list}) +def create_expectation_value_in(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + values = kwargs["values"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueIn", + column_name=column_name, + fn_violations_pandas=lambda df: df[~df[column_name].isin(values)], + fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).isin(values)), + description=f"'{column_name}' is in {values}", + error_message=f"'{column_name}' is not in {values}.", + ) + + +@register_expectation("ExpectationValueNotIn") +@requires_params("column_name", "values", types={"column_name": str, "values": list}) +def create_expectation_value_not_in(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + values = kwargs["values"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueNotIn", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].isin(values)], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name).isin(values)), + description=f"'{column_name}' is not in {values}", + error_message=f"'{column_name}' is in {values}.", + ) diff --git a/dataframe_expectations/expectations/column_expectations/numerical_expectations.py b/dataframe_expectations/expectations/column_expectations/numerical_expectations.py new file mode 100644 index 0000000..e635964 --- /dev/null +++ b/dataframe_expectations/expectations/column_expectations/numerical_expectations.py @@ -0,0 +1,68 @@ +from pyspark.sql import functions as F + +from dataframe_expectations.expectations.column_expectation import ( + DataFrameColumnExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params + + +@register_expectation("ExpectationValueGreaterThan") +@requires_params("column_name", "value", types={"column_name": str, "value": (int, float)}) +def create_expectation_value_greater_than(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + value = kwargs["value"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueGreaterThan", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name] <= value], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name) <= value), + description=f"'{column_name}' is greater than {value}", + error_message=f"'{column_name}' is not greater than {value}.", + ) + + +@register_expectation("ExpectationValueLessThan") +@requires_params("column_name", "value", types={"column_name": str, "value": (int, float)}) +def create_expectation_value_less_than(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + value = kwargs["value"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueLessThan", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name] >= value], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name) >= value), + description=f"'{column_name}' is less than {value}", + error_message=f"'{column_name}' is not less than {value}.", + ) + + +@register_expectation("ExpectationValueBetween") +@requires_params( + "column_name", + "min_value", + "max_value", + types={ + "column_name": str, + "min_value": (int, float), + "max_value": (int, float), + }, +) +def create_expectation_value_between(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + return DataFrameColumnExpectation( + expectation_name="ExpectationValueBetween", + column_name=column_name, + fn_violations_pandas=lambda df: df[ + (df[column_name] < min_value) | (df[column_name] > max_value) + ], + fn_violations_pyspark=lambda df: df.filter( + (F.col(column_name) < min_value) | (F.col(column_name) > max_value) + ), + description=f"'{column_name}' is between {min_value} and {max_value}", + error_message=f"'{column_name}' is not between {min_value} and {max_value}.", + ) diff --git a/dataframe_expectations/expectations/column_expectations/string_expectations.py b/dataframe_expectations/expectations/column_expectations/string_expectations.py new file mode 100644 index 0000000..204729b --- /dev/null +++ b/dataframe_expectations/expectations/column_expectations/string_expectations.py @@ -0,0 +1,142 @@ +from pyspark.sql import functions as F + +from dataframe_expectations.expectations.column_expectation import ( + DataFrameColumnExpectation, +) +from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, +) +from dataframe_expectations.expectations.utils import requires_params + + +@register_expectation("ExpectationStringContains") +@requires_params("column_name", "substring", types={"column_name": str, "substring": str}) +def create_expectation_string_contains(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + substring = kwargs["substring"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringContains", + column_name=column_name, + fn_violations_pandas=lambda df: df[~df[column_name].str.contains(substring, na=False)], + fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).contains(substring)), + description=f"'{column_name}' contains '{substring}'", + error_message=f"'{column_name}' does not contain '{substring}'.", + ) + + +@register_expectation("ExpectationStringNotContains") +@requires_params("column_name", "substring", types={"column_name": str, "substring": str}) +def create_expectation_string_not_contains(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + substring = kwargs["substring"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringNotContains", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].str.contains(substring, na=False)], + fn_violations_pyspark=lambda df: df.filter(F.col(column_name).contains(substring)), + description=f"'{column_name}' does not contain '{substring}'", + error_message=f"'{column_name}' contains '{substring}'.", + ) + + +@register_expectation("ExpectationStringStartsWith") +@requires_params("column_name", "prefix", types={"column_name": str, "prefix": str}) +def create_expectation_string_starts_with(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + prefix = kwargs["prefix"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringStartsWith", + column_name=column_name, + fn_violations_pandas=lambda df: df[~df[column_name].str.startswith(prefix, na=False)], + fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).startswith(prefix)), + description=f"'{column_name}' starts with '{prefix}'", + error_message=f"'{column_name}' does not start with '{prefix}'.", + ) + + +@register_expectation("ExpectationStringEndsWith") +@requires_params("column_name", "suffix", types={"column_name": str, "suffix": str}) +def create_expectation_string_ends_with(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + suffix = kwargs["suffix"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringEndsWith", + column_name=column_name, + fn_violations_pandas=lambda df: df[~df[column_name].str.endswith(suffix, na=False)], + fn_violations_pyspark=lambda df: df.filter(~F.col(column_name).endswith(suffix)), + description=f"'{column_name}' ends with '{suffix}'", + error_message=f"'{column_name}' does not end with '{suffix}'.", + ) + + +@register_expectation("ExpectationStringLengthLessThan") +@requires_params("column_name", "length", types={"column_name": str, "length": int}) +def create_expectation_string_length_less_than(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + length = kwargs["length"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringLengthLessThan", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].str.len() >= length], + fn_violations_pyspark=lambda df: df.filter(F.length(column_name) >= length), + description=f"'{column_name}' length is less than {length}", + error_message=f"'{column_name}' length is not less than {length}.", + ) + + +@register_expectation("ExpectationStringLengthGreaterThan") +@requires_params("column_name", "length", types={"column_name": str, "length": int}) +def create_expectation_string_length_greater_than( + **kwargs, +) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + length = kwargs["length"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].str.len() <= length], + fn_violations_pyspark=lambda df: df.filter(F.length(F.col(column_name)) <= length), + description=f"'{column_name}' length is greater than {length}", + error_message=f"'{column_name}' length is not greater than {length}.", + ) + + +@register_expectation("ExpectationStringLengthBetween") +@requires_params( + "column_name", + "min_length", + "max_length", + types={"column_name": str, "min_length": int, "max_length": int}, +) +def create_expectation_string_length_between(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + min_length = kwargs["min_length"] + max_length = kwargs["max_length"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringLengthBetween", + column_name=column_name, + fn_violations_pandas=lambda df: df[ + (df[column_name].str.len() < min_length) | (df[column_name].str.len() > max_length) + ], + fn_violations_pyspark=lambda df: df.filter( + (F.length(F.col(column_name)) < min_length) + | (F.length(F.col(column_name)) > max_length) + ), + description=f"'{column_name}' length is between {min_length} and {max_length}", + error_message=f"'{column_name}' length is not between {min_length} and {max_length}.", + ) + + +@register_expectation("ExpectationStringLengthEquals") +@requires_params("column_name", "length", types={"column_name": str, "length": int}) +def create_expectation_string_length_equals(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + length = kwargs["length"] + return DataFrameColumnExpectation( + expectation_name="ExpectationStringLengthEquals", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name].str.len() != length], + fn_violations_pyspark=lambda df: df.filter(F.length(F.col(column_name)) != length), + description=f"'{column_name}' length equals {length}", + error_message=f"'{column_name}' length is not equal to {length}.", + ) diff --git a/dataframe_expectations/expectations/expectation_registry.py b/dataframe_expectations/expectations/expectation_registry.py new file mode 100644 index 0000000..a83ceba --- /dev/null +++ b/dataframe_expectations/expectations/expectation_registry.py @@ -0,0 +1,110 @@ +from typing import Callable, Dict + +from dataframe_expectations.expectations import DataFrameExpectation +from dataframe_expectations.logging_utils import setup_logger + +logger = setup_logger(__name__) + + +class DataFrameExpectationRegistry: + """Registry for dataframe expectations.""" + + _expectations: Dict[str, Callable[..., DataFrameExpectation]] = {} + _loaded: bool = False + + @classmethod + def register(cls, name: str): + """Decorator to register an expectation factory function.""" + + def decorator(func: Callable[..., DataFrameExpectation]): + logger.debug(f"Registering expectation '{name}' with function {func.__name__}") + + # check if the name is already registered + if name in cls._expectations: + error_message = f"Expectation '{name}' is already registered." + logger.error(error_message) + raise ValueError(error_message) + + cls._expectations[name] = func + return func + + return decorator + + @classmethod + def _ensure_loaded(cls): + """Ensure all expectation modules are loaded (lazy loading).""" + if not cls._loaded: + cls._load_all_expectations() + cls._loaded = True + + @classmethod + def _load_all_expectations(cls): + """Load all expectation modules to ensure their decorators are executed.""" + import importlib + + # Automatically discover all Python modules in expectations subdirectories + # Explicitly import expectation modules + modules_to_import = [ + "dataframe_expectations.expectations.column_expectations.null_expectation", + "dataframe_expectations.expectations.column_expectations.type_expectation", + "dataframe_expectations.expectations.column_expectations.any_value_expectations", + "dataframe_expectations.expectations.column_expectations.numerical_expectations", + "dataframe_expectations.expectations.column_expectations.string_expectations", + "dataframe_expectations.expectations.aggregation_expectations.count_expectation", + "dataframe_expectations.expectations.aggregation_expectations.sum_expectation", + "dataframe_expectations.expectations.aggregation_expectations.any_value_expectations", + "dataframe_expectations.expectations.aggregation_expectations.numerical_expectations", + "dataframe_expectations.expectations.aggregation_expectations.unique", + # Add more modules as needed + ] + + for module_name in modules_to_import: + try: + importlib.import_module(module_name) + logger.debug(f"Loaded expectation module: {module_name}") + except ImportError as e: + logger.warning(f"Failed to import expectation module {module_name}: {e}") + + @classmethod + def get_expectation(cls, expectation_name: str, **kwargs) -> DataFrameExpectation: + """Get an expectation instance by name.""" + cls._ensure_loaded() # Lazy load expectations + logger.debug(f"Retrieving expectation '{expectation_name}' with arguments: {kwargs}") + if expectation_name not in cls._expectations: + available = cls.list_expectations() + error_message = ( + f"Unknown expectation '{expectation_name}'. " + f"Available expectations: {', '.join(available)}" + ) + logger.error(error_message) + raise ValueError(error_message) + return cls._expectations[expectation_name](**kwargs) + + @classmethod + def list_expectations(cls) -> list: + """List all registered expectation names.""" + cls._ensure_loaded() # Lazy load expectations + return list(cls._expectations.keys()) + + @classmethod + def remove_expectation(cls, expectation_name: str): + """Remove an expectation from the registry.""" + cls._ensure_loaded() # Lazy load expectations + logger.debug(f"Removing expectation '{expectation_name}'") + if expectation_name in cls._expectations: + del cls._expectations[expectation_name] + else: + error_message = f"Expectation '{expectation_name}' not found." + logger.error(error_message) + raise ValueError(error_message) + + @classmethod + def clear_expectations(cls): + """Clear all registered expectations.""" + logger.debug(f"Clearing {len(cls._expectations)} expectations from the registry") + cls._expectations.clear() + cls._loaded = False # Allow reloading + + +# Convenience decorator +register_expectation = DataFrameExpectationRegistry.register diff --git a/dataframe_expectations/expectations/utils.py b/dataframe_expectations/expectations/utils.py new file mode 100644 index 0000000..5c54ebb --- /dev/null +++ b/dataframe_expectations/expectations/utils.py @@ -0,0 +1,81 @@ +from functools import wraps +from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, get_args + +from dataframe_expectations.expectations import DataFrameExpectation + + +def requires_params( + *required_params, types: Optional[Dict[str, Union[Type, Tuple[Type, ...]]]] = None +): + """ + Decorator that validates required parameters and optionally checks their types. + + :param required_params: Required parameter names + :param types: Optional dict mapping parameter names to expected types + + Usage: + @requires_params("column_name", "value") + def func(**kwargs): ... + + @requires_params("column_name", "value", types={"column_name": str, "value": int}) + def func(**kwargs): ... + """ + + def decorator(func: Callable[..., DataFrameExpectation]): + @wraps(func) + def wrapper(**kwargs): + func_name = func.__name__ + + # Check for missing parameters + missing_params = [param for param in required_params if param not in kwargs] + if missing_params: + param_list = ", ".join(required_params) + raise ValueError( + f"{func_name} missing required parameters: {', '.join(missing_params)}. " + f"Required: [{param_list}]" + ) + + # Type checking if types dict is provided + if types: + type_errors = [] + for param_name, expected_type in types.items(): + if param_name in kwargs: + actual_value = kwargs[param_name] + if not _is_instance_of_type(actual_value, expected_type): + type_errors.append( + f"'{param_name}' expected {_get_type_name(expected_type)}, " + f"got {type(actual_value).__name__}" + ) + + if type_errors: + raise TypeError(f"{func_name} type validation errors: {'; '.join(type_errors)}") + + return func(**kwargs) + + return wrapper + + return decorator + + +def _is_instance_of_type(value: Any, expected_type: Type) -> bool: + """Helper function to check if value is instance of expected_type, handling Union types.""" + # Handle Union types (like Optional[str] which is Union[str, None]) + if hasattr(expected_type, "__origin__") and expected_type.__origin__ is Union: + # For Union types, check if value matches any of the union members + union_args = get_args(expected_type) + return any(isinstance(value, arg) for arg in union_args if arg is not type(None)) or ( + type(None) in union_args and value is None + ) + + # Handle regular types + return isinstance(value, expected_type) + + +def _get_type_name(type_hint: Type) -> str: + """Helper function to get a readable name for type hints.""" + if hasattr(type_hint, "__origin__") and type_hint.__origin__ is Union: + union_args = get_args(type_hint) + arg_names = [arg.__name__ if hasattr(arg, "__name__") else str(arg) for arg in union_args] + return f"Union[{', '.join(arg_names)}]" + + return getattr(type_hint, "__name__", str(type_hint)) diff --git a/dataframe_expectations/expectations_suite.py b/dataframe_expectations/expectations_suite.py new file mode 100644 index 0000000..e468378 --- /dev/null +++ b/dataframe_expectations/expectations_suite.py @@ -0,0 +1,961 @@ +from typing import List, Union, cast + +from dataframe_expectations.expectations import DataFrameLike +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.logging_utils import setup_logger +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + +logger = setup_logger(__name__) + + +class DataFrameExpectationsSuiteFailure(Exception): + """Raised when one or more expectations in the suite fail.""" + + def __init__( + self, + total_expectations: int, + failures: List[DataFrameExpectationFailureMessage], + *args, + ): + self.failures = failures + self.total_expectations = total_expectations + super().__init__(*args) + + def __str__(self): + margin_len = 80 + lines = [ + f"({len(self.failures)}/{self.total_expectations}) expectations failed.", + "\n" + "=" * margin_len, + "List of violations:", + "-" * margin_len, + ] + + for index, failure in enumerate(self.failures): + lines.append(f"[Failed {index + 1}/{len(self.failures)}] {failure}") + if index < len(self.failures) - 1: + lines.append("-" * margin_len) + + lines.append("=" * margin_len) + return "\n".join(lines) + + +class DataFrameExpectationsSuite: + """ + A suite of expectations for validating DataFrames. + """ + + def __init__(self): + """ + Initialize the expectation suite. + """ + self.__expectations = [] + + # Expectations for any data type + + def expect_value_equals( + self, + column_name: str, + value: object, + ): + """ + Add an expectation to check if the values in a column equal a specified value. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :param value: The value to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name=column_name, + value=value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_not_equals( + self, + column_name: str, + value: object, + ): + """ + Add an expectation to check if the values in a column do not equal a specified value. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :param value: The value to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name=column_name, + value=value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_null( + self, + column_name: str, + ): + """ + Add an expectation to check if the values in a column are null. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name=column_name, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_not_null( + self, + column_name: str, + ): + """ + Add an expectation to check if the values in a column are not null. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name=column_name, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_in( + self, + column_name: str, + values: List[object], + ): + """ + Add an expectation to check if the values in a column are in a specified list of values. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :param values: The list of values to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name=column_name, + values=values, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_not_in( + self, + column_name: str, + values: List[object], + ): + """ + Add an expectation to check if the values in a column are not in a specified list of values. + + Categories: + category: Column Expectations + subcategory: Any Value + + :param column_name: The name of the column to check. + :param values: The list of values to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name=column_name, + values=values, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + # Expectations for numerical data types + + def expect_value_greater_than( + self, + column_name: str, + value: float, + ): + """ + Add an expectation to check if the values in a column are greater than a specified value. + + Categories: + category: Column Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param value: The value to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name=column_name, + value=value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_less_than( + self, + column_name: str, + value: float, + ): + """ + Add an expectation to check if the values in a column are less than a specified value. + + Categories: + category: Column Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param value: The value to compare against. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name=column_name, + value=value, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_value_between( + self, + column_name: str, + min_value: float, + max_value: float, + ): + """ + Add an expectation to check if the values in a column are between two specified values. + + Categories: + category: Column Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param min_value: The minimum value for the range. + :param max_value: The maximum value for the range. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + # Expectations for string data types + + def expect_string_contains( + self, + column_name: str, + substring: str, + ): + """ + Add an expectation to check if the values in a string column contain a specified substring. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param substring: The substring to search for. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name=column_name, + substring=substring, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_not_contains( + self, + column_name: str, + substring: str, + ): + """ + Add an expectation to check if the values in a string column do not contain a specified substring. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param substring: The substring to search for. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name=column_name, + substring=substring, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_starts_with( + self, + column_name: str, + prefix: str, + ): + """ + Add an expectation to check if the values in a string column start with a specified prefix. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param prefix: The prefix to search for. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name=column_name, + prefix=prefix, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_ends_with( + self, + column_name: str, + suffix: str, + ): + """ + Add an expectation to check if the values in a string column end with a specified suffix. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param suffix: The suffix to search for. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name=column_name, + suffix=suffix, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_length_less_than( + self, + column_name: str, + length: int, + ): + """ + Add an expectation to check if the length of the values in a string column is less than a specified length. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param length: The length that the values should be less than. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name=column_name, + length=length, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_length_greater_than( + self, + column_name: str, + length: int, + ): + """ + Add an expectation to check if the length of the values in a string column is greater than a specified length. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param length: The length that the values should be greater than. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name=column_name, + length=length, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_length_between( + self, + column_name: str, + min_length: int, + max_length: int, + ): + """ + Add an expectation to check if the length of the values in a string column is between two specified lengths. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param min_length: The minimum length that the values should be. + :param max_length: The maximum length that the values should be. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name=column_name, + min_length=min_length, + max_length=max_length, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_string_length_equals( + self, + column_name: str, + length: int, + ): + """ + Add an expectation to check if the length of the values in a string column equals a specified length. + + Categories: + category: Column Expectations + subcategory: String + + :param column_name: The name of the column to check. + :param length: The length that the values should equal. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name=column_name, + length=length, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + # Expectations for aggregation data types + def expect_min_rows( + self, + min_rows: int, + ): + """ + Add an expectation to check if the DataFrame has at least a minimum number of rows. + Categories: + category: DataFrame Aggregation Expectations + subcategory: Any Value + :param min_rows: The minimum number of rows expected. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=min_rows, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_max_rows( + self, + max_rows: int, + ): + """ + Add an expectation to check if the DataFrame has at most a maximum number of rows. + Categories: + category: DataFrame Aggregation Expectations + subcategory: Any Value + :param max_rows: The maximum number of rows expected. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=max_rows, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_max_null_percentage( + self, + column_name: str, + max_percentage: float, + ): + """ + Add an expectation to check if the percentage of null/NaN values in a specific column is below a threshold. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check for null percentage. + :param max_percentage: The maximum allowed percentage of null/NaN values (0.0 to 100.0). + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name=column_name, + max_percentage=max_percentage, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_max_null_count( + self, + column_name: str, + max_count: int, + ): + """ + Add an expectation to check if the count of null/NaN values in a specific column is below a threshold. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check for null count. + :param max_count: The maximum allowed count of null/NaN values. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name=column_name, + max_count=max_count, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_unique_rows( + self, + column_names: List[str], + ): + """ + Add an expectation to check if the rows in the DataFrame are unique based on specified columns. + + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + + :param column_names: The list of column names to check for uniqueness. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=column_names, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_distinct_column_values_equals( + self, + column_name: str, + expected_value: int, + ): + """ + Add an expectation to check if the number of distinct values in a column equals an expected count. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check. + :param expected_value: The expected number of distinct values (exact match). + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name=column_name, + expected_value=expected_value, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_distinct_column_values_less_than( + self, + column_name: str, + threshold: int, + ): + """ + Add an expectation to check if the number of distinct values in a column is less than a threshold. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check. + :param threshold: The threshold for distinct values count (exclusive upper bound). + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name=column_name, + threshold=threshold, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_distinct_column_values_greater_than( + self, + column_name: str, + threshold: int, + ): + """ + Add an expectation to check if the number of distinct values in a column is greater than a threshold. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check. + :param threshold: The threshold for distinct values count (exclusive lower bound). + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name=column_name, + threshold=threshold, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_distinct_column_values_between( + self, + column_name: str, + min_value: int, + max_value: int, + ): + """ + Add an expectation to check if the number of distinct values in a column falls within a range. + Categories: + category: Column Aggregation Expectations + subcategory: Any Value + :param column_name: The name of the column to check. + :param min_value: The minimum number of distinct values (inclusive lower bound). + :param max_value: The maximum number of distinct values (inclusive upper bound). + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_column_quantile_between( + self, + column_name: str, + quantile: float, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Add an expectation to check if a quantile of a column falls within a specified range. + + Categories: + category: Column Aggregation Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param quantile: The quantile to compute (0.0 to 1.0, where 0.0=min, 0.5=median, 1.0=max). + :param min_value: The minimum allowed value for the quantile. + :param max_value: The maximum allowed value for the quantile. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name=column_name, + quantile=quantile, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_column_max_between( + self, + column_name: str, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Add an expectation to check if the maximum value of a column falls within a specified range. + + Categories: + category: Column Aggregation Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param min_value: The minimum allowed value for the column maximum. + :param max_value: The maximum allowed value for the column maximum. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_column_min_between( + self, + column_name: str, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Add an expectation to check if the minimum value of a column falls within a specified range. + + Categories: + category: Column Aggregation Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param min_value: The minimum allowed value for the column minimum. + :param max_value: The maximum allowed value for the column minimum. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_column_mean_between( + self, + column_name: str, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Add an expectation to check if the mean value of a column falls within a specified range. + + Categories: + category: Column Aggregation Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param min_value: The minimum allowed value for the column mean. + :param max_value: The maximum allowed value for the column mean. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def expect_column_median_between( + self, + column_name: str, + min_value: Union[int, float], + max_value: Union[int, float], + ): + """ + Add an expectation to check if the median value of a column falls within a specified range. + + Categories: + category: Column Aggregation Expectations + subcategory: Numerical + + :param column_name: The name of the column to check. + :param min_value: The minimum allowed value for the column median. + :param max_value: The maximum allowed value for the column median. + :return: an instance of DataFrameExpectationsSuite. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name=column_name, + min_value=min_value, + max_value=max_value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + + def run( + self, + data_frame: DataFrameLike, + ) -> None: + """ + Run all expectations on the provided DataFrame with PySpark caching optimization. + + :param data_frame: The DataFrame to validate. + """ + from dataframe_expectations import DataFrameType + from dataframe_expectations.expectations import DataFrameExpectation + + successes = [] + failures = [] + margin_len = 80 + + header_message = "Running expectations suite" + header_prefix = "=" * ((margin_len - len(header_message) - 2) // 2) + header_suffix = "=" * ( + (margin_len - len(header_message) - 2) // 2 - len(header_message) % 2 + ) + logger.info(f"{header_prefix} {header_message} {header_suffix}") + + # PySpark caching optimization + data_frame_type = DataFrameExpectation.infer_data_frame_type(data_frame) + was_already_cached = False + + if data_frame_type == DataFrameType.PYSPARK: + # Import PySpark DataFrame for type casting + from pyspark.sql import DataFrame as PySparkDataFrame + + # Cast to PySpark DataFrame since we know it's PySpark at this point + pyspark_df = cast(PySparkDataFrame, data_frame) + + # Check if DataFrame is already cached + was_already_cached = pyspark_df.is_cached + + # Cache the DataFrame if it wasn't already cached + if not was_already_cached: + logger.debug("Caching PySpark DataFrame for expectations suite execution") + pyspark_df.cache() + # Update the original reference for subsequent operations + data_frame = pyspark_df + + try: + # Run all expectations + for expectation in self.__expectations: + result = expectation.validate(data_frame=data_frame) + if isinstance(result, DataFrameExpectationSuccessMessage): + logger.info( + f"{expectation.get_expectation_name()} ({expectation.get_description()}) ... OK" + ) + successes.append(result) + elif isinstance(result, DataFrameExpectationFailureMessage): + logger.info( + f"{expectation.get_expectation_name()} ({expectation.get_description()}) ... FAIL" + ) + failures.append(result) + else: + raise ValueError( + f"Unexpected result type: {type(result)} for expectation: {expectation.get_expectation_name()}" + ) + finally: + # Uncache the DataFrame if we cached it (and it wasn't already cached) + if data_frame_type == DataFrameType.PYSPARK and not was_already_cached: + from pyspark.sql import DataFrame as PySparkDataFrame + + logger.debug("Uncaching PySpark DataFrame after expectations suite execution") + cast(PySparkDataFrame, data_frame).unpersist() + + footer_message = f"{len(successes)} success, {len(failures)} failures" + footer_prefix = "=" * ((margin_len - len(footer_message) - 2) // 2) + footer_suffix = "=" * ( + (margin_len - len(footer_message) - 2) // 2 + len(footer_message) % 2 + ) + logger.info(f"{footer_prefix} {footer_message} {footer_suffix}") + + if len(failures) > 0: + raise DataFrameExpectationsSuiteFailure( + total_expectations=len(self.__expectations), failures=failures + ) + + +if __name__ == "__main__": + # Example usage + suite = DataFrameExpectationsSuite() + suite.expect_value_greater_than(column_name="age", value=18) + suite.expect_value_less_than(column_name="salary", value=100000) + suite.expect_unique_rows(column_names=["id"]) + suite.expect_column_mean_between(column_name="age", min_value=20, max_value=40) + suite.expect_column_max_between(column_name="salary", min_value=80000, max_value=150000) + + import pandas as pd + + # Create a sample DataFrame + df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "age": [20, 25, 30, 35], + "salary": [50000, 120000, 80000, 90000], + } + ) + + suite.run(data_frame=df) diff --git a/dataframe_expectations/logging_utils.py b/dataframe_expectations/logging_utils.py new file mode 100644 index 0000000..f2f74e6 --- /dev/null +++ b/dataframe_expectations/logging_utils.py @@ -0,0 +1,30 @@ +import logging + + +def setup_logger(name=None): + """Sets up the logger for the entire run.""" + # Suppress verbose logs from py4j + logging.getLogger("py4j").setLevel(logging.ERROR) + logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR) + + # Create or get a logger + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) # Set the default log level + logger.propagate = False # Disable logger propagation to prevent duplicate logs + DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + MSG_FORMAT = "%(asctime)s %(levelname)-8s [%(filename)s:%(funcName)s():%(lineno)d] %(message)s" + + # Check if the logger already has handlers to avoid duplicate logs + if not logger.hasHandlers(): + # Create a console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + + # Create a formatter and set it for the handler + formatter = logging.Formatter(MSG_FORMAT, DATE_FORMAT) + console_handler.setFormatter(formatter) + + # Add the handler to the logger + logger.addHandler(console_handler) + + return logger diff --git a/dataframe_expectations/result_message.py b/dataframe_expectations/result_message.py new file mode 100644 index 0000000..34e5f56 --- /dev/null +++ b/dataframe_expectations/result_message.py @@ -0,0 +1,76 @@ +from abc import ABC +from typing import Optional + +from tabulate import tabulate # type: ignore + +from dataframe_expectations import DataFrameLike, DataFrameType + + +class DataFrameExpectationResultMessage(ABC): + """ + Base class for expectation result message. + """ + + message: str = "" + + def __str__(self): + """ + Print the result of the expectation. + """ + return self.message + + def dataframe_to_str(self, data_frame_type: DataFrameType, data_frame, rows: int) -> str: + """ + Print the DataFrame based on its type. + """ + + if data_frame_type == DataFrameType.PANDAS: + data_frame = data_frame.head(rows) + elif data_frame_type == DataFrameType.PYSPARK: + data_frame = data_frame.limit(rows).toPandas() + else: + raise ValueError(f"Unsupported DataFrame type: {data_frame_type}") + + return tabulate(data_frame, headers="keys", tablefmt="pretty", showindex=False) + + +class DataFrameExpectationSuccessMessage(DataFrameExpectationResultMessage): + def __init__(self, expectation_name: str, message: Optional[str] = None): + """ + Initialize the expectation success message. + """ + self.message = f"{expectation_name} succeeded." + if message is not None: + self.message = f"{self.message}: {message}" + + +class DataFrameExpectationFailureMessage(DataFrameExpectationResultMessage): + def __init__( + self, + expectation_str: str, + data_frame_type: DataFrameType, + violations_data_frame: Optional[DataFrameLike] = None, + message: Optional[str] = None, + limit_violations: int = 5, + ): + self.message = expectation_str + if message is not None: + self.message = f"{self.message}: {message}" + if violations_data_frame is not None: + self.data_frame_type = data_frame_type + + self.violations_data_frame = violations_data_frame + violations_dataframe_str = self.dataframe_to_str( + data_frame_type=data_frame_type, + data_frame=violations_data_frame, + rows=limit_violations, + ) + self.message = ( + f"{self.message} \nSome examples of violations: \n{violations_dataframe_str}" + ) + + def get_violations_data_frame(self) -> Optional[DataFrameLike]: + """ + Get the DataFrame with violations. + """ + return self.violations_data_frame if hasattr(self, "violations_data_frame") else None diff --git a/dataframe_expectations/sanity_checks.py b/dataframe_expectations/sanity_checks.py new file mode 100644 index 0000000..ec26529 --- /dev/null +++ b/dataframe_expectations/sanity_checks.py @@ -0,0 +1,392 @@ +""" +DataFrame Expectations Framework Sanity Check Script + +This script validates consistency across the entire expectations framework by checking: +1. All expectations implemented in the expectations/ directory are registered in the registry +2. All registered expectations have corresponding expect_* methods in DataFrameExpectationsSuite +3. All registered expectations have corresponding unit tests in tests/dataframe_expectations/expectations_implemented/ + +Usage: + python sanity_check_expectations.py +""" + +import ast +import re +import sys +from pathlib import Path +from typing import Dict, List, Optional, Set + + +class ExpectationsSanityChecker: + """Validates consistency across the expectations framework.""" + + def __init__(self, project_root: Path): + self.project_root = project_root + self.expectations_dir = project_root / "dataframe_expectations" / "expectations" + self.suite_file = project_root / "dataframe_expectations" / "expectations_suite.py" + self.tests_dir = project_root / "tests" / "expectations_implemented" + + # Results storage + self.registered_expectations: Dict[str, str] = {} # expectation_name -> file_path + self.suite_methods: Set[str] = set() # expect_* method names + self.test_files: Dict[str, str] = {} # expectation_name -> test_file_path + + # Issues tracking + self.issues: List[str] = [] + + def run_full_check(self) -> bool: + """Run all consistency checks and return True if all pass.""" + print("🔍 Starting DataFrame Expectations Framework Sanity Check...") + print("=" * 70) + + # Step 1: Discover registered expectations + print("\n📋 Step 1: Discovering registered expectations...") + self._discover_registered_expectations() + print(f" Found {len(self.registered_expectations)} registered expectations") + + # Step 2: Discover suite methods + print("\n🎯 Step 2: Discovering suite methods...") + self._discover_suite_methods() + print(f" Found {len(self.suite_methods)} expect_* methods in suite") + + # Step 3: Discover test files + print("\n🧪 Step 3: Discovering test files...") + self._discover_test_files() + print(f" Found {len(self.test_files)} test files") + + # Step 4: Validate consistency + print("\n✅ Step 4: Validating consistency...") + self._validate_registry_to_suite_mapping() + self._validate_registry_to_tests_mapping() + self._validate_orphaned_suite_methods() + self._validate_orphaned_test_files() + + # Report results + self._print_results() + + return len(self.issues) == 0 + + def _discover_registered_expectations(self): + """Find all @register_expectation decorators in expectation files.""" + expectation_files = list(self.expectations_dir.rglob("*.py")) + + for file_path in expectation_files: + if file_path.name == "__init__.py": + continue + + try: + with open(file_path, "r") as f: + content = f.read() + + # Parse AST to find @register_expectation decorators + tree = ast.parse(content) + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + for decorator in node.decorator_list: + if self._is_register_expectation_decorator(decorator): + expectation_name = self._extract_expectation_name(decorator) + if expectation_name: + self.registered_expectations[expectation_name] = str(file_path) + + except Exception as e: + print(f" ⚠️ Warning: Could not parse {file_path}: {e}") + + def _is_register_expectation_decorator(self, decorator) -> bool: + """Check if a decorator is @register_expectation.""" + if isinstance(decorator, ast.Call): + if isinstance(decorator.func, ast.Name) and decorator.func.id == "register_expectation": + return True + return False + + def _extract_expectation_name(self, decorator) -> Optional[str]: + """Extract expectation name from @register_expectation("Name") decorator.""" + if isinstance(decorator, ast.Call) and decorator.args: + first_arg = decorator.args[0] + if isinstance(first_arg, ast.Constant): + return str(first_arg.value) + return None + + def _discover_suite_methods(self): + """Find all expect_* methods in DataFrameExpectationsSuite.""" + if not self.suite_file.exists(): + self.issues.append(f"❌ Suite file not found: {self.suite_file}") + return + + try: + with open(self.suite_file, "r") as f: + content = f.read() + + # Use regex to find expect_* method definitions + method_pattern = r"def\s+(expect_[a-z_]+)\s*\(" + matches = re.findall(method_pattern, content) + self.suite_methods = set(matches) + + except Exception as e: + self.issues.append(f"❌ Could not parse suite file {self.suite_file}: {e}") + + def _discover_test_files(self): + """Find all test files and map them to expectation names.""" + if not self.tests_dir.exists(): + self.issues.append(f"❌ Tests directory not found: {self.tests_dir}") + return + + test_files = list(self.tests_dir.rglob("test_*.py")) + + for test_file in test_files: + # Skip template files + if "template" in test_file.name.lower(): + continue + + # Extract potential expectation name from filename + # e.g., test_expect_value_equals.py -> ExpectationValueEquals + filename = test_file.stem + if filename.startswith("test_expect_"): + # Convert test_expect_value_equals -> ValueEquals + expectation_part = filename[12:] # Remove "test_expect_" + expectation_name = "Expectation" + self._snake_to_pascal_case(expectation_part) + self.test_files[expectation_name] = str(test_file) + + def _snake_to_pascal_case(self, snake_str: str) -> str: + """Convert snake_case to PascalCase.""" + components = snake_str.split("_") + return "".join(word.capitalize() for word in components) + + def _validate_registry_to_suite_mapping(self): + """Check that all registered expectations have suite methods.""" + print(" 🔗 Checking registry -> suite mapping...") + + missing_suite_methods = [] + + for expectation_name in self.registered_expectations.keys(): + # Convert expectation name to expected suite method name + expected_method = self._expectation_to_suite_method(expectation_name) + + if expected_method not in self.suite_methods: + missing_suite_methods.append((expectation_name, expected_method)) + + if missing_suite_methods: + self.issues.append("❌ Registered expectations missing suite methods:") + for exp_name, method_name in missing_suite_methods: + self.issues.append(f" • {exp_name} -> missing {method_name}()") + + def _validate_registry_to_tests_mapping(self): + """Check that all registered expectations have test files.""" + print(" 🧪 Checking registry -> tests mapping...") + + missing_tests = [] + + for expectation_name in self.registered_expectations.keys(): + if expectation_name not in self.test_files: + missing_tests.append(expectation_name) + + if missing_tests: + self.issues.append("❌ Registered expectations missing test files:") + for exp_name in missing_tests: + expected_test_file = self._expectation_to_test_filename(exp_name) + self.issues.append(f" • {exp_name} -> missing {expected_test_file}") + + def _validate_orphaned_suite_methods(self): + """Check for suite methods without corresponding registered expectations.""" + print(" 🔍 Checking for orphaned suite methods...") + + orphaned_methods = [] + + for method_name in self.suite_methods: + expected_expectation = self._suite_method_to_expectation(method_name) + + if expected_expectation not in self.registered_expectations: + orphaned_methods.append((method_name, expected_expectation)) + + if orphaned_methods: + self.issues.append("❌ Suite methods without registered expectations:") + for method_name, exp_name in orphaned_methods: + self.issues.append(f" • {method_name}() -> missing {exp_name}") + + def _validate_orphaned_test_files(self): + """Check for test files without corresponding registered expectations.""" + print(" 🧪 Checking for orphaned test files...") + + orphaned_tests = [] + + for expectation_name, test_file in self.test_files.items(): + if expectation_name not in self.registered_expectations: + orphaned_tests.append((expectation_name, test_file)) + + if orphaned_tests: + self.issues.append("❌ Test files without registered expectations:") + for exp_name, test_file in orphaned_tests: + self.issues.append(f" • {test_file} -> missing {exp_name}") + + def _expectation_to_suite_method(self, expectation_name: str) -> str: + """Convert expectation name to expected suite method name.""" + # Remove "Expectation" prefix if present + if expectation_name.startswith("Expectation"): + name_part = expectation_name[11:] # Remove "Expectation" + else: + name_part = expectation_name + + # Convert PascalCase to snake_case and add "expect_" prefix + snake_case = re.sub("([A-Z])", r"_\1", name_part).lower().lstrip("_") + return f"expect_{snake_case}" + + def _suite_method_to_expectation(self, method_name: str) -> str: + """Convert suite method name to expected expectation name.""" + if method_name.startswith("expect_"): + name_part = method_name[7:] # Remove "expect_" + # Convert snake_case to PascalCase and add "Expectation" prefix + pascal_case = self._snake_to_pascal_case(name_part) + return f"Expectation{pascal_case}" + return method_name + + def _expectation_to_test_filename(self, expectation_name: str) -> str: + """Convert expectation name to expected test filename.""" + method_name = self._expectation_to_suite_method(expectation_name) + return f"test_{method_name}.py" + + def _print_results(self): + """Print the final results of the sanity check.""" + print("\n" + "=" * 70) + print("📊 SANITY CHECK RESULTS") + print("=" * 70) + + print("\n📈 Summary:") + print(f" • Registered expectations: {len(self.registered_expectations)}") + print(f" • Suite methods: {len(self.suite_methods)}") + print(f" • Test files: {len(self.test_files)}") + print(f" • Issues found: {len(self.issues)}") + + if self.issues: + print(f"\n❌ ISSUES FOUND ({len(self.issues)}):") + print("-" * 40) + for issue in self.issues: + print(issue) + else: + print("\n✅ ALL CHECKS PASSED!") + print(" The expectations framework is consistent across:") + print(" • Registry registrations") + print(" • Suite method implementations") + print(" • Unit test coverage") + + print("\n" + "=" * 70) + + def print_detailed_mappings(self): + """Print detailed mappings for debugging purposes.""" + print("\n🔍 DETAILED MAPPINGS") + print("=" * 50) + + print(f"\n📋 Registered Expectations ({len(self.registered_expectations)}):") + for name, file_path in sorted(self.registered_expectations.items()): + print(f" • {name} ({Path(file_path).name})") + + print(f"\n🎯 Suite Methods ({len(self.suite_methods)}):") + for method in sorted(self.suite_methods): + print(f" • {method}()") + + print(f"\n🧪 Test Files ({len(self.test_files)}):") + for name, file_path in sorted(self.test_files.items()): + print(f" • {name} -> {Path(file_path).name}") + + def should_run_check(self) -> bool: + """Check if we should run based on changed files in the current branch.""" + import subprocess + + try: + # Try to get the default branch name (usually main or master) + try: + result = subprocess.run( + ["git", "symbolic-ref", "refs/remotes/origin/HEAD"], + capture_output=True, + text=True, + check=True, + ) + default_branch = result.stdout.strip().split("/")[-1] + except subprocess.CalledProcessError: + # Fallback to common default branch names + for branch in ["main", "master"]: + try: + subprocess.run( + ["git", "rev-parse", f"origin/{branch}"], + capture_output=True, + text=True, + check=True, + ) + default_branch = branch + break + except subprocess.CalledProcessError: + continue + else: + default_branch = "main" # Final fallback + + # Get list of changed files compared to default branch + result = subprocess.run( + ["git", "diff", f"origin/{default_branch}...HEAD", "--name-only"], + capture_output=True, + text=True, + check=True, + ) + changed_files = [f for f in result.stdout.strip().split("\n") if f] + + if not changed_files: + print("🔍 No files changed, skipping sanity check.") + return False + + # Check if any relevant files changed + relevant_patterns = [ + "dataframe_expectations/", + "tests/dataframe_expectations/", + ] + + changed_relevant_files = [] + for file in changed_files: + for pattern in relevant_patterns: + if pattern in file: + changed_relevant_files.append(file) + break + + if changed_relevant_files: + print("🔍 Relevant DataFrame expectations files changed:") + for file in changed_relevant_files: + print(f" • {file}") + return True + else: + print("🔍 No relevant DataFrame expectations files changed, skipping sanity check.") + return False + + except subprocess.CalledProcessError as e: + print(f"⚠️ Git command failed: {e}") + print("🔍 Running sanity check anyway as a safety measure.") + return True + except Exception as e: + print(f"⚠️ Error checking changed files: {e}") + print("🔍 Running sanity check anyway as a safety measure.") + return True + + +if __name__ == "__main__": + # Use relative path from the script location + script_dir = Path(__file__).parent + # Go up one level: sanity_checks.py is in dataframe_expectations/, project root is parent + project_root = script_dir.parent + + # Validate directory structure + expected_dirs = ["dataframe_expectations", "tests", "pyproject.toml"] + missing_dirs = [d for d in expected_dirs if not (project_root / d).exists()] + + if missing_dirs: + print(f"❌ Missing expected directories/files: {missing_dirs}") + print(f"Script location: {Path(__file__)}") + print(f"Project root: {project_root}") + sys.exit(1) + + checker = ExpectationsSanityChecker(project_root) + + # Run the checks + success = checker.run_full_check() + + # Optionally print detailed mappings for debugging + if "--verbose" in sys.argv or "-v" in sys.argv: + checker.print_detailed_mappings() + + # Exit with appropriate code + sys.exit(0 if success else 1) diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..f0637be --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,32 @@ +# Minimal makefile for Sphinx documentation with uv support + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= uv run sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx-build using the O variable. +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +# Custom targets for development +clean: + @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +livehtml: + uv run sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O) + +# Install documentation dependencies +install-docs: + uv sync --group docs + +# Build docs with fresh install +build-fresh: install-docs html diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..5492f92 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx>=4.0.0 +sphinx-rtd-theme>=1.0.0 +sphinx-autobuild>=2021.3.14 diff --git a/docs/source/_ext/expectations_autodoc.py b/docs/source/_ext/expectations_autodoc.py new file mode 100644 index 0000000..ec3b0d8 --- /dev/null +++ b/docs/source/_ext/expectations_autodoc.py @@ -0,0 +1,365 @@ +""" +Custom Sphinx extension for generating categorized DataFrame expectations documentation. +""" + +import inspect +import re +from collections import defaultdict +from typing import Any, Dict, List, Tuple + +from docutils import nodes +from docutils.nodes import Node +from docutils.parsers.rst import directives +from sphinx.application import Sphinx +from sphinx.util.docutils import SphinxDirective + +from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + + +def parse_metadata_from_docstring(docstring: str) -> Tuple[str, str]: + """Parse metadata from docstring using YAML-style format.""" + if not docstring: + return None, None + + # Look for Categories section with YAML-style indentation + pattern = r"Categories:\s*\n\s*category:\s*(.+)\n\s*subcategory:\s*(.+)" + match = re.search(pattern, docstring, re.IGNORECASE) + + if match: + return match.group(1).strip(), match.group(2).strip() + + return None, None + + +def infer_category_from_method_name(method_name: str) -> Tuple[str, str]: + """Infer category and subcategory from method name as fallback.""" + if any( + keyword in method_name + for keyword in ["quantile", "max", "min", "mean", "median", "unique_rows"] + ): + return "Column Aggregation Expectations", get_subcategory_from_name(method_name) + else: + return "Column Expectations", get_subcategory_from_name(method_name) + + +def get_subcategory_from_name(method_name: str) -> str: + """Get subcategory from method name.""" + if any( + keyword in method_name + for keyword in ["string", "length", "contains", "starts", "ends"] + ): + return "String" + elif any( + keyword in method_name + for keyword in [ + "greater", + "less", + "between", + "quantile", + "max", + "min", + "mean", + "median", + ] + ): + return "Numerical" + else: + return "Any Value" + + +def clean_docstring_from_metadata(docstring: str) -> str: + """Remove metadata section from docstring.""" + if not docstring: + return "" + + # Remove Categories section + pattern = r"Categories:\s*\n\s*category:.*\n\s*subcategory:.*\n?" + cleaned = re.sub(pattern, "", docstring, flags=re.IGNORECASE) + + return cleaned.strip() + + +class ExpectationsDirective(SphinxDirective): + """ + Custom directive to generate categorized expectations documentation. + + Usage: + .. expectations:: + :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite + :show-summary: true + :show-cards: true + """ + + has_content = False + required_arguments = 0 + optional_arguments = 0 + option_spec = { + 'class': directives.unchanged_required, + 'show-summary': directives.flag, + 'show-cards': directives.flag, + } + + def run(self) -> List[Node]: + """Generate the expectations documentation.""" + # Import the class + class_path = self.options.get('class', 'dataframe_expectations.expectations_suite.DataFrameExpectationsSuite') + module_name, class_name = class_path.rsplit('.', 1) + + try: + module = __import__(module_name, fromlist=[class_name]) + cls = getattr(module, class_name) + except (ImportError, AttributeError) as e: + error = f"Could not import {class_path}: {e}" + return [nodes.error("", nodes.paragraph("", error))] + + # Collect expectations by category + expectations_by_category = defaultdict(lambda: defaultdict(list)) + method_details = {} + + for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): + if name.startswith("_") or not name.startswith("expect_"): + continue + + docstring = inspect.getdoc(method) or "No description provided." + category, subcategory = parse_metadata_from_docstring(docstring) + if not category: + category, subcategory = infer_category_from_method_name(name) + + expectations_by_category[category][subcategory].append(name) + method_details[name] = { + "method": method, + "docstring": docstring, + "signature": inspect.signature(method), + "category": category, + "subcategory": subcategory, + } + + # Generate nodes + nodes_list = [] + + # Add summary table if requested + if 'show-summary' in self.options: + nodes_list.extend(self._generate_summary_table(expectations_by_category, method_details)) + + # Add cards if requested + if 'show-cards' in self.options: + nodes_list.extend(self._generate_expectation_cards(expectations_by_category, method_details)) + + return nodes_list + + def _generate_summary_table(self, expectations_by_category, method_details) -> List[Node]: + """Generate summary table nodes.""" + nodes_list = [] + + # Add section with title and proper ID + summary_section = nodes.section() + summary_section['ids'] = ['expectations-summary'] + summary_section['names'] = ['expectations-summary'] + summary_title = nodes.title("", "Expectations Summary") + summary_section += summary_title + + # Create table + table = nodes.table() + tgroup = nodes.tgroup(cols=3) + table += tgroup + + # Add column specifications + for width in [30, 25, 45]: + colspec = nodes.colspec(colwidth=width) + tgroup += colspec + + # Add table head + thead = nodes.thead() + tgroup += thead + + row = nodes.row() + thead += row + + for header in ["Category", "Subcategory", "Expectations"]: + entry = nodes.entry() + row += entry + entry += nodes.paragraph("", header) + + # Add table body + tbody = nodes.tbody() + tgroup += tbody + + for category in sorted(expectations_by_category.keys()): + for subcategory in sorted(expectations_by_category[category].keys()): + expectations = expectations_by_category[category][subcategory] + + row = nodes.row() + tbody += row + + # Category cell + entry = nodes.entry() + row += entry + entry += nodes.paragraph("", category) + + # Subcategory cell + entry = nodes.entry() + row += entry + entry += nodes.paragraph("", subcategory) + + # Expectations cell + entry = nodes.entry() + row += entry + + exp_para = nodes.paragraph() + for i, exp in enumerate(sorted(expectations)): + if i > 0: + exp_para += nodes.Text(", ") + + # Create clickable link to the card using raw HTML + raw_link = nodes.raw( + f'{exp}', + f'{exp}', + format='html' + ) + exp_para += raw_link + + entry += exp_para + + summary_section += table + nodes_list.append(summary_section) + return nodes_list + + def _generate_expectation_cards(self, expectations_by_category, method_details) -> List[Node]: + """Generate expectation cards in Great Expectations gallery style.""" + nodes_list = [] + + for category in sorted(expectations_by_category.keys()): + # Category header - use proper heading for TOC inclusion as top-level section + cat_section = nodes.section() + cat_section['ids'] = [f"category-{category.lower().replace(' ', '-')}"] + cat_section['names'] = [category.lower().replace(' ', '-')] + + cat_header = nodes.title("", category) + cat_header['classes'] = ['category-title'] + cat_section += cat_header + + # Create cards container for this category + cards_container = nodes.container() + cards_container['classes'] = ['expectations-gallery'] + + for subcategory in sorted(expectations_by_category[category].keys()): + # Subcategory header - use paragraph with special styling + subcat_header = nodes.paragraph() + subcat_header['classes'] = ['subcategory-title'] + subcat_header += nodes.Text(subcategory) + cards_container += subcat_header + + # Cards grid for this subcategory + cards_grid = nodes.container() + cards_grid['classes'] = ['cards-grid'] + + for method_name in sorted(expectations_by_category[category][subcategory]): + details = method_details[method_name] + card = self._create_expectation_card(method_name, details) + cards_grid += card + + cards_container += cards_grid + + cat_section += cards_container + nodes_list.append(cat_section) + + return nodes_list + + def _create_expectation_card(self, method_name: str, details: dict) -> Node: + """Create a single expectation card.""" + # Create card container + card = nodes.container() + card['classes'] = ['expectation-card'] + card['ids'] = [f"card-{method_name}"] + + # Card header with method name + card_header = nodes.container() + card_header['classes'] = ['card-header'] + + method_title = nodes.paragraph() + method_title['classes'] = ['method-name'] + method_title += nodes.Text(method_name) + card_header += method_title + + card += card_header + + # Card body + card_body = nodes.container() + card_body['classes'] = ['card-body'] + + # Description + clean_docstring = clean_docstring_from_metadata(details["docstring"]) + if clean_docstring: + description = clean_docstring.split('\n')[0] # First line only + desc_para = nodes.paragraph() + desc_para['classes'] = ['card-description'] + desc_para += nodes.Text(description) + card_body += desc_para + + # Data quality issue tags (similar to Great Expectations) + tags_container = nodes.container() + tags_container['classes'] = ['tags-container'] + + # Add category as a tag + category_tag = nodes.inline() + category_tag['classes'] = ['tag', 'category-tag'] + category_tag += nodes.Text(details['category']) + tags_container += category_tag + + # Add subcategory as a tag + subcategory_tag = nodes.inline() + subcategory_tag['classes'] = ['tag', 'subcategory-tag'] + subcategory_tag += nodes.Text(details['subcategory']) + tags_container += subcategory_tag + + card_body += tags_container + + # Parameters preview + params = [p for p in details["signature"].parameters.keys() if p != "self"] + if params: + params_container = nodes.container() + params_container['classes'] = ['params-preview'] + + params_title = nodes.paragraph() + params_title['classes'] = ['params-title'] + params_title += nodes.Text("Parameters:") + params_container += params_title + + params_list = nodes.paragraph() + params_list['classes'] = ['params-list'] + params_text = ", ".join(params[:3]) # Show first 3 parameters + if len(params) > 3: + params_text += f", ... (+{len(params) - 3} more)" + params_list += nodes.Text(params_text) + params_container += params_list + + card_body += params_container + + card += card_body + + # Card footer with actions - link to API reference + card_footer = nodes.container() + card_footer['classes'] = ['card-footer'] + + # Create link to API reference using raw HTML + api_link = nodes.raw( + f'View API Reference', + f'View API Reference', + format='html' + ) + card_footer += api_link + + card += card_footer + + return card + + +def setup(app: Sphinx) -> Dict[str, Any]: + """Setup function for the Sphinx extension.""" + app.add_directive("expectations", ExpectationsDirective) + + return { + 'version': '0.1', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 0000000..55f6815 --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,647 @@ +/* Custom styling for expectations documentation with PyData Sphinx theme */ + +/* Fix PyData theme page width constraint that causes header overlapping */ +.bd-page-width { + max-width: none !important; + width: 100% !important; +} + +.bd-header__inner.bd-page-width { + max-width: none !important; + width: calc(100% - 2rem) !important; + margin: 0 1rem !important; +} + +/* Improve header layout to prevent overlapping */ +.bd-header .bd-header__inner { + justify-content: space-between !important; + align-items: center !important; + flex-wrap: nowrap !important; +} + +.col-lg-3.navbar-header-items__start { + flex: 0 0 auto !important; + max-width: 40% !important; + overflow: hidden !important; +} + +.col-lg-9.navbar-header-items { + flex: 1 1 auto !important; + min-width: 0 !important; + display: flex !important; + justify-content: space-between !important; + align-items: center !important; +} + +.navbar-brand.logo .title.logo__title { + font-size: 1rem !important; + white-space: nowrap !important; + overflow: hidden !important; + text-overflow: ellipsis !important; + max-width: 100% !important; +} + +.navbar-header-items__center { + flex: 1 1 auto !important; + min-width: 0 !important; + overflow: hidden !important; +} + +.navbar-header-items__end { + flex: 0 0 auto !important; +} + +/* Fix sidebar layout - left navigation with section nav underneath */ +.bd-sidebar-primary { + display: block !important; + width: 280px !important; + position: sticky !important; + top: var(--pst-header-height) !important; + height: calc(100vh - var(--pst-header-height)) !important; + overflow-y: auto !important; +} + +.bd-sidebar-secondary { + display: none !important; +} + +/* Ensure main content adjusts for left sidebar only */ +.bd-main { + display: flex !important; +} + +.bd-content { + flex: 1 !important; + min-width: 0 !important; + margin-left: 0 !important; + margin-right: 0 !important; +} + +/* Make sure the article container is properly sized */ +.bd-article-container { + width: 100% !important; + max-width: none !important; + padding: 0 3rem !important; + min-width: 0 !important; +} + +/* Style the "On this page" section in the left sidebar */ +.bd-sidebar-primary .bd-toc { + margin-top: 2rem !important; + padding-top: 1rem !important; + border-top: 1px solid var(--pst-color-border) !important; +} + +.bd-sidebar-primary .bd-toc .toc-title { + font-weight: bold !important; + margin-bottom: 0.75rem !important; + color: var(--pst-color-text-base) !important; + font-size: 0.9rem !important; + text-transform: uppercase !important; + letter-spacing: 0.5px !important; +} + +.bd-sidebar-primary .bd-toc ul { + list-style: none !important; + padding-left: 0 !important; + margin: 0 !important; +} + +.bd-sidebar-primary .bd-toc ul ul { + padding-left: 1rem !important; + margin-top: 0.25rem !important; +} + +.bd-sidebar-primary .bd-toc li { + margin-bottom: 0.25rem !important; +} + +.bd-sidebar-primary .bd-toc a { + color: var(--pst-color-text-muted) !important; + text-decoration: none !important; + display: block !important; + padding: 0.25rem 0.5rem !important; + font-size: 0.85rem !important; + border-radius: 3px !important; + line-height: 1.4 !important; +} + +.bd-sidebar-primary .bd-toc a:hover { + color: var(--pst-color-primary) !important; + background-color: var(--pst-color-primary-bg) !important; +} + +.bd-sidebar-primary .bd-toc a.current { + color: var(--pst-color-primary) !important; + background-color: var(--pst-color-primary-bg) !important; + font-weight: 500 !important; +} + +/* Mobile responsive fixes */ +@media (max-width: 991px) { + .col-lg-3.navbar-header-items__start { + max-width: 50% !important; + } + + .navbar-brand.logo .title.logo__title { + font-size: 0.9rem !important; + } +} + +@media (max-width: 768px) { + .bd-header .bd-header__inner { + flex-wrap: wrap !important; + gap: 0.5rem !important; + } + + .col-lg-3.navbar-header-items__start, + .col-lg-9.navbar-header-items { + flex: 1 1 100% !important; + max-width: 100% !important; + } + + .navbar-brand.logo .title.logo__title { + font-size: 1rem !important; + white-space: normal !important; + line-height: 1.2 !important; + } + + .bd-sidebar-primary { + width: 100% !important; + position: relative !important; + height: auto !important; + } + + .bd-main { + flex-direction: column !important; + } +} + +/* Full-width layout for PyData theme */ +.bd-main .bd-content .bd-article-container { + max-width: none !important; +} + +.bd-container-fluid { + max-width: none !important; +} + +.bd-content { + padding-left: 3rem !important; + padding-right: 3rem !important; +} + +/* Style all tables in the expectations documentation */ +table.docutils { + width: 100% !important; + table-layout: fixed !important; + border-collapse: collapse !important; + margin: 1em 0 !important; +} + +table.docutils th, +table.docutils td { + border: 1px solid #ddd !important; + padding: 8px !important; + text-align: left !important; + vertical-align: top !important; + word-wrap: break-word !important; /* Allow long words to break */ + overflow-wrap: break-word !important; /* Modern browsers */ + white-space: normal !important; /* Allow text wrapping */ +} + +/* Set specific column widths for the expectations summary table */ +table.docutils th:nth-child(1), +table.docutils td:nth-child(1) { + width: 25% !important; /* Category column */ +} + +table.docutils th:nth-child(2), +table.docutils td:nth-child(2) { + width: 20% !important; /* Subcategory column */ +} + +table.docutils th:nth-child(3), +table.docutils td:nth-child(3) { + width: 55% !important; /* Expectations column */ +} + +table.docutils th { + background-color: #f5f5f5 !important; + font-weight: bold !important; +} + +table.docutils tr:nth-child(even) { + background-color: #f9f9f9 !important; +} + +/* Make expectation names in summary table clickable and styled */ +table.docutils td a, +table.docutils td a.expectation-link { + word-break: break-word !important; + display: inline !important; + color: #007bff !important; + text-decoration: none !important; + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace !important; + font-size: 0.9em !important; + padding: 2px 4px !important; + background-color: #f8f9fa !important; + border-radius: 3px !important; + border: 1px solid #e9ecef !important; +} + +table.docutils td a:hover, +table.docutils td a.expectation-link:hover { + background-color: #e3f2fd !important; + border-color: #2196f3 !important; + text-decoration: none !important; +} + +/* For very small screens, allow some responsiveness */ +@media (max-width: 768px) { + table.docutils { + font-size: 0.9em !important; + } + + table.docutils th, + table.docutils td { + padding: 6px !important; + } +} + +/* Style method signatures */ +.method-signature { + background-color: #f8f8f8; + border: 1px solid #e1e1e1; + border-radius: 3px; + padding: 10px; + font-family: 'Courier New', Courier, monospace; + margin: 10px 0; +} + +/* Style expectation method headers */ +.expectation-method h5 { + color: #2e8b57; + border-bottom: 2px solid #2e8b57; + padding-bottom: 5px; +} + +/* Style parameter lists */ +.parameter-list { + margin-left: 20px; +} + +.parameter-list li { + margin-bottom: 5px; +} + +/* Add some spacing to sections */ +.section { + margin-bottom: 2em; +} + +/* Style code blocks */ +pre { + background-color: #f8f8f8; + border: 1px solid #e1e1e1; + border-radius: 4px; + padding: 10px; + overflow-x: auto; +} + +/* Style inline code */ +code { + background-color: #f1f1f1; + padding: 2px 4px; + border-radius: 3px; + font-family: 'Courier New', Courier, monospace; +} + +/* Style custom method documentation to match autodoc exactly */ +dl.py.method { + margin-bottom: 2em; +} + +dt.sig.sig-object.py { + background-color: #f8f8f8; + border: 1px solid #d1d1d1; + border-radius: 3px; + padding: 10px; + font-family: 'SFMono-Regular', Menlo, 'Liberation Mono', Courier, monospace; + font-size: 0.9em; + margin-bottom: 0.5em; + font-weight: normal; +} + +dt.sig.sig-object.py .sig-name { + font-weight: bold; +} + +dt.sig.sig-object.py .sig-paren { + color: #666; +} + +dd.field-list { + margin-left: 2em; +} + +dd.field-list dt { + font-weight: bold; + margin-bottom: 0.5em; +} + +dd.field-list dd { + margin-left: 1em; + margin-bottom: 0.5em; +} + +/* Style the description content */ +dd.desc-content { + margin-left: 2em; +} + +dd.desc-content p { + margin-bottom: 1em; +} + +/* Style parameter lists */ +dl.field-list dt { + font-weight: bold; + color: #2980b9; +} + +dl.field-list dd p strong { + color: #333; +} + +/* Ensure our custom expectations use standard Sphinx autodoc styling */ +dl.py.method > dt { + background-color: #f8f8f8 !important; + border: 1px solid #d1d1d1 !important; + border-radius: 3px !important; + padding: 10px !important; + font-family: 'SFMono-Regular', Menlo, 'Liberation Mono', Courier, monospace !important; + font-size: 0.9em !important; + margin-bottom: 0.5em !important; + font-weight: normal !important; +} + +dl.py.method > dd { + margin-left: 2em !important; +} + +/* Override any custom formatting that interferes with autodoc */ +.expectation-method h5, +.method-signature, +.parameter-list { + display: none !important; /* Hide any custom formatting */ +} + +/* Make sure field lists look standard */ +dl.py.method dd dl.field-list { + margin-top: 1em !important; +} + +dl.py.method dd dl.field-list dt { + font-weight: bold !important; + color: #2980b9 !important; + margin-bottom: 0.5em !important; +} + +dl.py.method dd dl.field-list dd { + margin-left: 1em !important; + margin-bottom: 1em !important; +} + +/* Great Expectations Gallery Style Cards */ +.expectations-gallery { + margin: 2rem 0 !important; + padding: 0 !important; + width: 100% !important; +} + +/* Style category title headings for TOC inclusion */ +.category-title, +h1.category-title, +h2.category-title { + color: #2c3e50 !important; + border-bottom: 3px solid #3498db !important; + padding-bottom: 10px !important; + margin: 2em 0 1em 0 !important; + font-size: 1.4em !important; + font-weight: bold !important; +} + +.subcategory-title { + color: #34495e !important; + margin: 1.5em 0 1em 0 !important; + font-size: 1.2em !important; + font-weight: bold !important; +} + +.cards-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); + gap: 32px !important; + margin: 2rem 0 !important; + align-items: start; + padding: 1rem 0 !important; + width: 100% !important; + max-width: none !important; +} + +.expectation-card { + border: 1px solid #e1e8ed !important; + border-radius: 8px !important; + background: white !important; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important; + transition: all 0.3s ease !important; + overflow: hidden !important; + min-height: 320px !important; + display: flex !important; + flex-direction: column !important; + margin: 0 !important; + width: 100% !important; + min-width: 400px !important; +} + +/* Ensure expectation cards have proper internal padding */ +.expectation-card .card-header, +.expectation-card .card-body, +.expectation-card .card-footer { + box-sizing: border-box !important; +} + +.expectation-card .card-header { + padding: 20px 24px !important; +} + +.expectation-card .card-body { + padding: 24px !important; +} + +.expectation-card .card-footer { + padding: 20px 24px !important; +} + +.expectation-card:hover { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); + transform: translateY(-2px); +} + +.card-header { + background: #f8f9fa !important; + padding: 20px 24px !important; + border-bottom: 1px solid #e1e8ed !important; + min-height: 60px !important; + display: flex !important; + align-items: center !important; +} + +.method-name { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + font-size: 1.1em; + font-weight: bold; + color: #2c3e50; + margin: 0; + word-wrap: break-word; + word-break: break-word; + overflow-wrap: break-word; + hyphens: auto; + line-height: 1.3; +} + +.card-body { + padding: 24px !important; + flex: 1 !important; + display: flex !important; + flex-direction: column !important; + justify-content: space-between !important; +} + +.card-description { + color: #555; + margin-bottom: 15px; + line-height: 1.5; +} + +.tags-container { + margin: 15px 0; +} + +.tag { + display: inline-block; + padding: 4px 8px; + border-radius: 12px; + font-size: 0.8em; + margin-right: 8px; + margin-bottom: 5px; +} + +.category-tag { + background: #e3f2fd; + color: #1976d2; + border: 1px solid #bbdefb; +} + +.subcategory-tag { + background: #f3e5f5; + color: #7b1fa2; + border: 1px solid #ce93d8; +} + +.params-preview { + margin-top: 15px; + padding: 10px; + background: #f8f9fa; + border-radius: 4px; +} + +.params-title { + font-weight: bold; + color: #495057; + margin: 0 0 5px 0; + font-size: 0.9em; +} + +.params-list { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; + font-size: 0.85em; + color: #6c757d; + margin: 0; +} + +.card-footer { + padding: 20px 24px !important; + background: #f8f9fa !important; + border-top: 1px solid #e1e8ed !important; + text-align: right !important; + margin-top: auto !important; +} + +.btn { + display: inline-block; + padding: 8px 16px; + border-radius: 4px; + text-decoration: none; + font-size: 0.9em; + font-weight: 500; + transition: all 0.2s ease; +} + +.btn-details { + background: #007bff; + color: white !important; + border: 1px solid #007bff; + text-decoration: none !important; +} + +.btn-details:hover { + background: #0056b3; + border-color: #0056b3; + text-decoration: none !important; + color: white !important; +} + +/* Responsive design */ +@media (max-width: 1400px) { + .cards-grid { + grid-template-columns: repeat(auto-fit, minmax(380px, 1fr)) !important; + gap: 28px !important; + } +} + +@media (max-width: 1200px) { + .cards-grid { + grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)) !important; + gap: 24px !important; + } + + .expectation-card { + min-width: 350px !important; + } +} + +@media (max-width: 768px) { + .cards-grid { + grid-template-columns: 1fr !important; + gap: 24px !important; + margin: 1.5rem 0 !important; + padding: 0 !important; + } + + .expectation-card { + margin: 0 !important; + min-height: 280px !important; + min-width: auto !important; + } + + .bd-content { + padding-left: 1.5rem !important; + padding-right: 1.5rem !important; + } + + .bd-article-container { + padding: 0 1.5rem !important; + } +} diff --git a/docs/source/_templates/expectations_summary.html b/docs/source/_templates/expectations_summary.html new file mode 100644 index 0000000..d064bc8 --- /dev/null +++ b/docs/source/_templates/expectations_summary.html @@ -0,0 +1,27 @@ +
+

Expectations Summary

+ + + + + + + + + + {% for category, subcategories in expectations_by_category.items() %} + {% for subcategory, expectations in subcategories.items() %} + + + + + + {% endfor %} + {% endfor %} + +
CategorySubcategoryExpectations
{{ category }}{{ subcategory }} + {% for exp in expectations %} + {{ exp }}{% if not loop.last %}, {% endif %} + {% endfor %} +
+
diff --git a/docs/source/adding_expectations.rst b/docs/source/adding_expectations.rst new file mode 100644 index 0000000..99e6843 --- /dev/null +++ b/docs/source/adding_expectations.rst @@ -0,0 +1,493 @@ +Adding Your Expectations +======================== + +This guide will walk you through the process of creating custom expectations for DataFrame validation. +There are three main approaches depending on your use case. + +Defining Your Expectations +-------------------------- + +Most use cases that involve validating a single column in the dataframe can be covered by the initialising the +``DataFrameColumnExpectation`` class with the correct parameters. Expectations implemented by initialising +``DataFrameColumnExpectation`` can be found in the ``column_expectations`` module, categorised based on the data-type of +the column value. + +If you want to go ahead with implementing ``DataFrameColumnExpectation``, you first need to identify the data-type of +the column value. Existing expectations are already categorised into ``string``, ``numerical`` or ``any_value`` +expectations. Create a new category in column_expectations if you think existing categories don't fit your use case. +Once you have decided where the expectation needs to be added, you can define it as follows: + +.. code-block:: python + + from dataframe_expectations.expectations.expectation_registry import ( + register_expectation, + ) + from dataframe_expectations.expectations.utils import requires_params + + + @register_expectation("ExpectIsDivisible") + @requires_params("column_name", "value", types={"column_name": str, "value": int}) + def create_expectation_do_something_unexpected(**kwargs) -> DataFrameColumnExpectation: + column_name = kwargs["column_name"] + value = kwargs["value"] + + return DataFrameColumnExpectation( + expectation_name="ExpectIsDivisible", + column_name=column_name, + fn_violations_pandas=lambda df: df[df[column_name] % value != 0], # function that finds violations + fn_violations_pyspark=lambda df: df.filter(F.col(column_name) % value != 0), # function that finds violations + description=f"'{column_name}' divisible by {value}", + error_message=f"'{column_name}' not divisible by {value}.", + ) + +For additional guidance, you can refer to the implementation of ``ExpectationValueGreaterThan`` and +``ExpectationValueLessThan`` in ``column_expectation_factory.py``. These examples demonstrate how to initialise the +``DataFrameColumnExpectation`` class with the right parameters and define filtering logic for different dataframes. +The ``@register_expectation`` decorator is needed to add your expectation to the library. ``@requires_params`` decorator +is a utility that helps you validate the input parameters. + +Adding Aggregation-Based Expectations +-------------------------------------- + +Just like the column expectations, you can find the aggregation-based expectations in the ``aggregation_expectations`` +module. For expectations that require aggregation operations (such as row counts, distinct value counts, null +percentages, etc.), you should implement custom expectation classes by inheriting from +``DataFrameAggregationExpectation``. These types of expectations cannot be easily covered +by the ``DataFrameColumnExpectation`` class because they involve DataFrame-level or column-level aggregations rather +than row-by-row validations. + +Existing expectations are already categorised into ``string``, ``numerical`` or ``any_value`` +expectations. Before you implement your aggregation-based expectation, infer the category of the aggregation operation +and add it to the right category. Feel free to create a new category if needed. + +Here's an example of how to implement an aggregation-based expectation: + +.. code-block:: python + + from dataframe_expectations import DataFrameLike, DataFrameType + from dataframe_expectations.expectations.aggregation_expectation import ( + DataFrameAggregationExpectation, + ) + from dataframe_expectations.expectations.expectation_registry import register_expectation + from dataframe_expectations.expectations.utils import requires_params + from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, + ) + import pandas as pd + from pyspark.sql import functions as F + + + class ExpectationMinRows(DataFrameAggregationExpectation): + """ + Expectation that validates the DataFrame has at least a minimum number of rows. + """ + + def __init__(self, min_count: int): + description = f"DataFrame has at least {min_count} row(s)" + self.min_count = min_count + + super().__init__( + expectation_name="ExpectationMinRows", + column_names=[], # Empty list since this operates on entire DataFrame + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate minimum row count in a pandas DataFrame.""" + # Note: Parent class already checks if the column is present when column_names is not empty + try: + row_count = len(data_frame) + + if row_count >= self.min_count: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"DataFrame has {row_count} row(s), expected at least {self.min_count}.", + ) + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error counting rows: {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate minimum row count in a PySpark DataFrame.""" + # Note: Parent class already checks if the column is present when column_names is not empty + try: + row_count = data_frame.count() + + if row_count >= self.min_count: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"DataFrame has {row_count} row(s), expected at least {self.min_count}.", + ) + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error counting rows: {str(e)}", + ) + + + @register_expectation("ExpectationMinRows") + @requires_params("min_count", types={"min_count": int}) + def create_expectation_min_rows(**kwargs) -> ExpectationMinRows: + """ + Create an ExpectationMinRows instance. + + Args: + min_count (int): Minimum required number of rows. + + Returns: + ExpectationMinRows: A configured expectation instance. + """ + return ExpectationMinRows(min_count=kwargs["min_count"]) + +Key differences for aggregation-based expectations: + +1. **Inherit from** ``DataFrameAggregationExpectation``: This base class provides the framework for aggregation operations and automatically handles column validation. + +2. **Implement** ``aggregate_and_validate_pandas`` **and** ``aggregate_and_validate_pyspark``: These methods are specifically designed for aggregation operations rather than the generic ``validate_pandas`` and ``validate_pyspark`` methods. + +3. **Call** ``super().__init__()``: Initialize the parent class with expectation metadata including ``expectation_name``, ``column_names``, and ``description``. + +4. **Automatic column validation**: The parent class automatically validates that required columns exist before calling your implementation methods. You don't need to manually check for column existence. + +5. **Error handling**: Wrap aggregation operations in try-catch blocks since aggregations can fail due to data type issues or other DataFrame problems. + +Example of a column-based aggregation expectation: + +.. code-block:: python + + class ExpectationColumnMeanBetween(DataFrameAggregationExpectation): + """ + Expectation that validates the mean value of a column falls within a specified range. + """ + + def __init__(self, column_name: str, min_value: float, max_value: float): + description = f"column '{column_name}' mean value between {min_value} and {max_value}" + + self.column_name = column_name + self.min_value = min_value + self.max_value = max_value + + super().__init__( + expectation_name="ExpectationColumnMeanBetween", + column_names=[column_name], # List of columns this expectation requires + description=description, + ) + + def aggregate_and_validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column mean in a pandas DataFrame.""" + # Column validation is automatically handled by the parent class + try: + mean_val = data_frame[self.column_name].mean() + + if pd.isna(mean_val): + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' contains only null values.", + ) + + if self.min_value <= mean_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.", + ) + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PANDAS, + message=f"Error calculating mean for column '{self.column_name}': {str(e)}", + ) + + def aggregate_and_validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """Validate column mean in a PySpark DataFrame.""" + # Column validation is automatically handled by the parent class + try: + mean_result = data_frame.select(F.avg(self.column_name).alias("mean_val")).collect() + mean_val = mean_result[0]["mean_val"] + + if mean_val is None: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' contains only null values.", + ) + + if self.min_value <= mean_val <= self.max_value: + return DataFrameExpectationSuccessMessage( + expectation_name=self.get_expectation_name() + ) + else: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Column '{self.column_name}' mean value {mean_val} is not between {self.min_value} and {self.max_value}.", + ) + except Exception as e: + return DataFrameExpectationFailureMessage( + expectation_str=str(self), + data_frame_type=DataFrameType.PYSPARK, + message=f"Error calculating mean for column '{self.column_name}': {str(e)}", + ) + +Key considerations for aggregation-based expectations: + +1. **Performance**: Aggregation operations can be expensive, especially on large datasets in PySpark. Consider the performance implications of your aggregation logic. + +2. **Different DataFrame types**: Ensure your implementation works correctly for both Pandas and PySpark DataFrames, as aggregation APIs may differ (e.g., ``df.mean()`` vs ``F.avg()``). + +3. **Error handling**: Include proper error handling for edge cases like empty DataFrames or all-null columns. + +4. **Message clarity**: Provide clear, informative error messages that help users understand what went wrong. + +5. **Automatic column validation**: The ``DataFrameAggregationExpectation`` base class automatically validates that required columns exist before calling your ``aggregate_and_validate_*`` methods. Simply specify the required columns in the ``column_names`` parameter during initialization. + +6. **Focus on aggregation logic**: Since column validation is handled automatically, you can focus purely on implementing your aggregation and validation logic without worrying about column existence checks. + +Examples of aggregation-based expectations include: + +- ``ExpectationMinRows`` / ``ExpectationMaxRows``: Validate row count limits +- ``ExpectationDistinctColumnValuesEquals``: Validate the number of distinct values in a column +- ``ExpectationMaxNullPercentage``: Validate the percentage of null values in a column +- ``ExpectationUniqueRows``: Validate that rows are unique across specified columns +- ``ExpectationColumnMeanBetween``: Validate that column mean falls within a range +- ``ExpectationColumnQuantileBetween``: Validate that column quantiles fall within ranges + +For more examples, check the aggregation_expectations module. + +Custom Expectations with Full Control +-------------------------------------- + +While the ``DataFrameColumnExpectation`` covers most use cases there might be other instances where you need more control +over the validation logic. For such instances you can define a new expectation by inheriting the ``DataFrameExpectation`` +class. + +To help you get started, here's a template you can customize to fit your specific use case: + +.. code-block:: python + + from typing import Callable + + from dataframe_expectations import DataFrameLike, DataFrameType + from dataframe_expectations.expectations import DataFrameExpectation + from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, + ) + + class ExpectTheUnexpected(DataFrameExpectation): + """ + Description of the expectation + """ + + def __init__(self, ): + """ + Initialize the expectation. For example: + - column_name: The name of the column to validate. + - value: The expected threshold for validation. + """ + + pass + + def get_description(self) -> str: + """ + Returns a description of the expectation. + """ + return + + def validate_pandas( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a pandas DataFrame against the expectation. + """ + + + def validate_pyspark( + self, data_frame: DataFrameLike, **kwargs + ) -> DataFrameExpectationResultMessage: + """ + Validate a PySpark DataFrame against the expectation. + """ + + +Adding to DataFrameExpectationsSuite +------------------------------------- + +The ``DataFrameExpectationsSuite`` encapsulates all the expectations that are provided by this library. +After defining and testing your expectation, integrate it into the ``DataFrameExpectationsSuite`` by creating a new +method with a descriptive name starting with the prefix ``expect_`` (this is needed to generate documentation later). +Here's an example: + +.. code-block:: python + + class DataFrameExpectationsSuite: + """ + A suite of expectations for validating DataFrames. + """ + ... + + def expect_is_divisible( + self, + column_name: str, + value: float, + # You can add more parmeters here + ): + """ + Define what the expectation does + :param column_name: The name of the column to check. + :param value: The value to compare against. + :return: An instance of DataFrameExpectationsSuite. + """ + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectIsDivisible", + column_name=column_name, + value=value, + ) + + logger.info(f"Adding expectation: {expectation}") + self.__expectations.append(expectation) + return self + +Adding Unit Tests +----------------- + +To ensure your expectations work as expected (pun intended), make sure to add unit tests in the +``tests/data_engineering/dataframe_expectations/expectations_implemented`` folder. Here's a template to get you started: + +.. code-block:: python + + import pytest + import pandas as pd + + from dataframe_expectations import DataFrameType + from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, + ) + from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, + ) + + + def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + This method should be implemented in the subclass. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDoesSomeCheck", + column_name="col1", + value=5, + ) + assert expectation.get_expectation_name() == "ExpectationDoesSomeCheck", f"Expected 'ExpectationDoesSomeCheck' but got: {expectation.get_expectation_name()}" + + def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_expectation_pandas_violations(): + """ + Test the expectation for pandas DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_expectation_pyspark_violations(spark): + """ + Test the expectation for PySpark DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + +For concrete examples of unit tests, check for tests in the ``expectations_implemented`` folder. You can also +find the unit test template here. + +Updating the Documentation +-------------------------- + +After the expectation is ready for use, the last thing remaining is adding your expectation to the documentation. The documentation is automatically generated using a CI pipeline with the ``uv`` package manager and is available at ``docs/build/html/expectations.html``. + +Make sure to add the docstring for the function you added to ``DataFrameExpectationsSuite`` before submitting your changes. The CI pipeline will automatically update the documentation using the make targets in the ``docs`` folder when your changes are merged. + +If you need to build the documentation locally for testing, you can use the make targets available in the ``docs`` folder. + +.. code-block:: bash + + cd docs + uv run sphinx-build source build/html + +or use the make command + +.. code-block:: bash + + cd docs + make html diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst new file mode 100644 index 0000000..8c9c9b5 --- /dev/null +++ b/docs/source/api_reference.rst @@ -0,0 +1,62 @@ +API Reference +============= + +This section provides detailed documentation for the core infrastructure classes and modules in the DataFrame Expectations library. For user-facing expectation methods, see :doc:`expectations`. + +Core Infrastructure +------------------- + +Base Expectation Classes +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: dataframe_expectations.expectations + :members: + :undoc-members: + :show-inheritance: + +Column Expectations +~~~~~~~~~~~~~~~~~~~ + +.. automodule:: dataframe_expectations.expectations.column_expectation + :members: + :undoc-members: + :show-inheritance: + +Aggregation Expectations +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: dataframe_expectations.expectations.aggregation_expectation + :members: + :undoc-members: + :show-inheritance: + +Expectation Registry +-------------------- + +.. automodule:: dataframe_expectations.expectations.expectation_registry + :members: + :undoc-members: + :show-inheritance: + +Result Messages +--------------- + +.. automodule:: dataframe_expectations.result_message + :members: + :undoc-members: + :show-inheritance: + +Utilities +--------- + +.. automodule:: dataframe_expectations.expectations.utils + :members: + :undoc-members: + :show-inheritance: + +Exception Classes +----------------- + +.. automodule:: dataframe_expectations.expectations_suite + :members: DataFrameExpectationsSuiteFailure + :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..f75ec19 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,83 @@ +import os +import sys + +# Add the project root and extension directories to the path +sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath('_ext')) + +# Project information +project = 'DataFrame Expectations' +copyright = '2024, Your Name' +author = 'Your Name' +release = '0.1.0' + +# Extensions +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon', # For Google/NumPy style docstrings + 'sphinx.ext.intersphinx', + 'expectations_autodoc', # Our custom extension +] + +# Theme +html_theme = 'pydata_sphinx_theme' + +# PyData theme options for modern, full-width usage +html_theme_options = { + "use_edit_page_button": False, + "navigation_depth": 3, + "show_prev_next": True, + "navbar_persistent": ["search-button"], + "navbar_center": ["navbar-nav"], + "navbar_end": [], + "sidebar_includehidden": True, + "primary_sidebar_end": ["page-toc"], + "secondary_sidebar_items": [], + "show_toc_level": 3, +} + +# Autodoc settings +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, + 'special-members': '__init__', +} + +# Napoleon settings for docstring parsing +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False + +# Intersphinx mapping for cross-references +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + 'pyspark': ('https://spark.apache.org/docs/latest/api/python/', None), +} + +# HTML output options +html_static_path = ['_static'] +html_css_files = [ + 'custom.css', +] + +# Configure HTML title and layout +html_title = f"{project} v{release} Documentation" +html_short_title = project + +# PyData theme context +html_context = { + 'display_github': True, + 'github_user': 'getyourguide', + 'github_repo': 'dataframe-expectations', + 'github_version': 'main', + 'doc_path': 'docs/source/', +} + +# Logo configuration +html_logo = None # You can add a logo path here if needed +html_favicon = None # You can add a favicon path here if needed diff --git a/docs/source/expectations.rst b/docs/source/expectations.rst new file mode 100644 index 0000000..8c82b3b --- /dev/null +++ b/docs/source/expectations.rst @@ -0,0 +1,11 @@ +Expectation Gallery +=================== + + +This page provides comprehensive documentation for all available DataFrame expectations. +The expectations are automatically categorized and organized for easy browsing. + +.. expectations:: + :class: dataframe_expectations.expectations_suite.DataFrameExpectationsSuite + :show-summary: + :show-cards: diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst new file mode 100644 index 0000000..322dd6a --- /dev/null +++ b/docs/source/getting_started.rst @@ -0,0 +1,122 @@ +Getting Started +=============== + +Welcome to DataFrame Expectations! This guide will help you get up and running quickly with validating your Pandas and PySpark DataFrames. + +Installation +------------ + +Install DataFrame Expectations using pip: + +.. code-block:: bash + + pip install dataframe-expectations + +Requirements +~~~~~~~~~~~~ + +* Python 3.10+ +* pandas >= 1.5.0 +* pyspark >= 3.3.0 +* tabulate >= 0.8.9 + +Basic Usage +----------- + +DataFrame Expectations provides a fluent API for building validation suites. Here's how to get started: + +Pandas Example +~~~~~~~~~~~~~~ + +.. code-block:: python + + import pandas as pd + from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + + # Create a sample DataFrame + df = pd.DataFrame({ + "age": [25, 15, 45, 22], + "name": ["Alice", "Bob", "Charlie", "Diana"], + "salary": [50000, 60000, 80000, 45000] + }) + + # Build a validation suite + suite = ( + DataFrameExpectationsSuite() + .expect_min_rows(3) # At least 3 rows + .expect_max_rows(10) # At most 10 rows + .expect_value_greater_than("age", 18) # All ages > 18 + .expect_value_less_than("salary", 100000) # All salaries < 100k + .expect_value_not_null("name") # No null names + ) + + # Run validation + suite.run(df) + + +PySpark Example +~~~~~~~~~~~~~~~ + +.. code-block:: python + + from pyspark.sql import SparkSession + from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite + + # Initialize Spark + spark = SparkSession.builder.appName("DataFrameExpectations").getOrCreate() + + # Create a sample DataFrame + data = [ + {"age": 25, "name": "Alice", "salary": 50000}, + {"age": 15, "name": "Bob", "salary": 60000}, + {"age": 45, "name": "Charlie", "salary": 80000}, + {"age": 22, "name": "Diana", "salary": 45000} + ] + df = spark.createDataFrame(data) + + # Build a validation suite (same API as Pandas!) + suite = ( + DataFrameExpectationsSuite() + .expect_min_rows(3) + .expect_max_rows(10) + .expect_value_greater_than("age", 18) + .expect_value_less_than("salary", 100000) + .expect_value_not_null("name") + ) + + # Run validation + suite.run(df) + +Example Output +~~~~~~~~~~~~~~ + +When validations fail, you'll see detailed output like this: + +.. code-block:: text + + ========================== Running expectations suite ========================== + ExpectationMinRows (DataFrame contains at least 3 rows) ... OK + ExpectationMaxRows (DataFrame contains at most 10 rows) ... OK + ExpectationValueGreaterThan ('age' is greater than 18) ... FAIL + ExpectationValueLessThan ('salary' is less than 100000) ... OK + ExpectationValueNotNull ('name' is not null) ... OK + ============================ 4 success, 1 failures ============================= + + ExpectationSuiteFailure: (1/5) expectations failed. + + ================================================================================ + List of violations: + -------------------------------------------------------------------------------- + [Failed 1/1] ExpectationValueGreaterThan ('age' is greater than 18): Found 1 row(s) where 'age' is not greater than 18. + Some examples of violations: + +-----+------+--------+ + | age | name | salary | + +-----+------+--------+ + | 15 | Bob | 60000 | + +-----+------+--------+ + ================================================================================ + +How to contribute? +------------------ +Contributions are welcome! You can enhance the library by adding new expectations, refining existing ones, or improving +the testing framework or the documentation. diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..06143a2 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,22 @@ +DataFrame Expectations +====================== + +**DataFrameExpectations** is a Python library designed to validate **Pandas** and **PySpark** DataFrames using +customizable, reusable expectations. It simplifies testing in data pipelines and end-to-end workflows by providing a +standardized framework for DataFrame validation. + +Instead of using different validation approaches for DataFrames, this library provides a standardized solution for this +use case. As a result, any contributions made here, such as adding new expectations, can be leveraged by all users of +the library. + +See the starter guide :doc:`here `. +See the complete list of expectations :doc:`here `. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + getting_started + adding_expectations + expectations + api_reference diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c3ea1a7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "dataframe-expectations" +version = "0.1.0" +description = "Python library designed to validate Pandas and PySpark DataFrames using customizable, reusable expectations" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "pandas>=1.5.0", + "pyspark>=3.3.0", + "tabulate>=0.8.9", +] + +[dependency-groups] +dev = [ + "numpy>=1.21.0", + "pytest>=7.0.0", + "pre-commit>=2.20.0", + "ruff>=0.1.0", + "pytest-cov>=4.0.0", +] +docs = [ + "sphinx>=4.0.0", + "pydata-sphinx-theme>=0.13.0", + "sphinx-autobuild>=2021.3.14", + "pyspark>=3.3.0", + "pandas>=1.5.0", + "tabulate>=0.8.9", +] + + +[tool.ruff] +target-version = "py310" # Target the minimum supported version +line-length = 100 + +[tool.mypy] +python_version = "3.10" # Target the minimum supported version +warn_unused_configs = false +disallow_untyped_defs = false +warn_return_any = false +ignore_missing_imports = true +check_untyped_defs = false diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..833a626 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,26 @@ +import pytest +from pyspark.sql import SparkSession +import pandas as pd +import pandas.testing as pdt + + +@pytest.fixture(scope="module") +def spark() -> SparkSession: + """create a spark session we can reuse for every test""" + + return SparkSession.builder.master("local").appName("Test").getOrCreate() + + +def assert_pandas_df_equal(df1: pd.DataFrame, df2: pd.DataFrame): + # Optional: sort and reset index to avoid false mismatches due to row order + df1_sorted = df1.sort_values(by=df1.columns.tolist()).reset_index(drop=True) + df2_sorted = df2.sort_values(by=df2.columns.tolist()).reset_index(drop=True) + + pdt.assert_frame_equal(df1_sorted, df2_sorted, check_dtype=False) + + +def assert_pyspark_df_equal(df1, df2): + df1_pd = df1.toPandas().sort_values(by=df1.columns).reset_index(drop=True) + df2_pd = df2.toPandas().sort_values(by=df2.columns).reset_index(drop=True) + + pd.testing.assert_frame_equal(df1_pd, df2_pd, check_dtype=False) diff --git a/tests/expectations_helper_classes/__init__.py b/tests/expectations_helper_classes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/expectations_helper_classes/test_column_expectations.py b/tests/expectations_helper_classes/test_column_expectations.py new file mode 100644 index 0000000..7827b7c --- /dev/null +++ b/tests/expectations_helper_classes/test_column_expectations.py @@ -0,0 +1,69 @@ +import pytest +from unittest.mock import MagicMock + +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.column_expectation import ( + DataFrameColumnExpectation, +) + + +@pytest.fixture +def expectation(): + return DataFrameColumnExpectation( + expectation_name="MyColumnExpectation", + column_name="test_column", + fn_violations_pandas=lambda df: df, + fn_violations_pyspark=lambda df: df, + description="Test column expectation", + error_message="Test column expectation failed.", + ) + + +def test_get_expectation_name(expectation): + """ + Test that the expectation name is the class name. + """ + assert expectation.get_expectation_name() == "MyColumnExpectation", ( + f"Expected 'MyColumnExpectation' but got: {expectation.get_expectation_name()}" + ) + + +def test_validate_for_pandas_df(expectation): + """ + Test whether row_validation() and get_filter_fn() methods are called with the right parameters for Pandas. + """ + + # Mock methods + expectation.row_validation = MagicMock(return_value="mock_result") + + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + + # test validate_pandas called the right methods + _ = expectation.validate(data_frame=data_frame) + + expectation.row_validation.assert_called_once_with( + data_frame_type=DataFrameType.PANDAS, + data_frame=data_frame, + fn_violations=expectation.fn_violations_pandas, + ) + + +def test_validate_for_pyspark_df(expectation, spark): + """ + Test whether row_validation() and get_filter_fn() methods are called with the right parameters for PySpark. + """ + + # Mock methods + expectation.row_validation = MagicMock(return_value="mock_result") + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + + # test validate_pyspark called the right methods + _ = expectation.validate(data_frame=data_frame) + + expectation.row_validation.assert_called_once_with( + data_frame_type=DataFrameType.PYSPARK, + data_frame=data_frame, + fn_violations=expectation.fn_violations_pyspark, + ) diff --git a/tests/expectations_helper_classes/test_expectation_registry.py b/tests/expectations_helper_classes/test_expectation_registry.py new file mode 100644 index 0000000..611ec5b --- /dev/null +++ b/tests/expectations_helper_classes/test_expectation_registry.py @@ -0,0 +1,121 @@ +import pytest + +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) + + +class DummyExpectation: + def __init__(self, foo=None): + self.foo = foo + + +@pytest.fixture(autouse=True) +def cleanup_registry(): + # Save the original state of the registry + original = set(DataFrameExpectationRegistry.list_expectations()) + + yield + + # Remove any expectations added during the test + current = set(DataFrameExpectationRegistry.list_expectations()) + for name in current - original: + DataFrameExpectationRegistry.remove_expectation(name) + + +def test_register_and_get_expectation(): + """ + Test registering and retrieving an expectation. + """ + + @DataFrameExpectationRegistry.register("DummyExpectation") + def dummy_expectation_factory(foo=None): + return DummyExpectation(foo=foo) + + instance = DataFrameExpectationRegistry.get_expectation("DummyExpectation", foo=123) + assert isinstance(instance, DummyExpectation), ( + f"Expected DummyExpectation instance but got: {type(instance)}" + ) + assert instance.foo == 123, f"Expected foo=123 but got: {instance.foo}" + + +def test_duplicate_registration_raises(): + """ + Test that registering an expectation with the same name raises a ValueError. + """ + + @DataFrameExpectationRegistry.register("DuplicateExpectation") + def dummy1(foo=None): + return DummyExpectation(foo=foo) + + with pytest.raises(ValueError) as context: + + @DataFrameExpectationRegistry.register("DuplicateExpectation") + def dummy2(foo=None): + return DummyExpectation(foo=foo) + + assert "already registered" in str(context.value), ( + f"Expected 'already registered' in error message but got: {str(context.value)}" + ) + + +def test_get_unknown_expectation_raises(): + """ + Test that trying to get an unknown expectation raises a ValueError. + """ + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation("NonExistent") + assert "Unknown expectation" in str(context.value), ( + f"Expected 'Unknown expectation' in error message but got: {str(context.value)}" + ) + + +def test_list_expectations(): + """ + Test listing all registered expectations. + """ + + @DataFrameExpectationRegistry.register("First") + def dummy1(foo=None): + return DummyExpectation(foo=foo) + + @DataFrameExpectationRegistry.register("Second") + def dummy2(foo=None): + return DummyExpectation(foo=foo) + + names = DataFrameExpectationRegistry.list_expectations() + assert "First" in names, f"Expected 'First' in expectations list but got: {names}" + assert "Second" in names, f"Expected 'Second' in expectations list but got: {names}" + + +def test_remove_expectation(): + """ + Test removing an expectation from the registry. + """ + + @DataFrameExpectationRegistry.register("ToRemove") + def dummy(foo=None): + return DummyExpectation(foo=foo) + + names_before = DataFrameExpectationRegistry.list_expectations() + assert "ToRemove" in names_before, ( + f"Expected 'ToRemove' in expectations list before removal but got: {names_before}" + ) + + DataFrameExpectationRegistry.remove_expectation("ToRemove") + + names_after = DataFrameExpectationRegistry.list_expectations() + assert "ToRemove" not in names_after, ( + f"Expected 'ToRemove' not in expectations list after removal but got: {names_after}" + ) + + +def test_remove_nonexistent_expectation_raises(): + """ + Test that trying to remove a non-existent expectation raises a ValueError. + """ + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.remove_expectation("DefinitelyNotThere") + assert "not found" in str(context.value), ( + f"Expected 'not found' in error message but got: {str(context.value)}" + ) diff --git a/tests/expectations_helper_classes/test_expectations.py b/tests/expectations_helper_classes/test_expectations.py new file mode 100644 index 0000000..7826956 --- /dev/null +++ b/tests/expectations_helper_classes/test_expectations.py @@ -0,0 +1,302 @@ +import pytest +from unittest.mock import MagicMock, patch + +import pandas as pd + +from dataframe_expectations import DataFrameLike, DataFrameType +from dataframe_expectations.expectations import DataFrameExpectation + + +class MyTestExpectation(DataFrameExpectation): + def validate_pandas(self, data_frame: DataFrameLike, **kwargs): + """ + Mock implementation for pandas DataFrame validation. + """ + return "pandas validation successful" + + def validate_pyspark(self, data_frame: DataFrameLike, **kwargs): + """ + Mock implementation for PySpark DataFrame validation. + """ + return "pyspark validation successful" + + def get_description(self): + return "This is a test expectation for unit testing purposes." + + +class MockConnectDataFrame: + """Mock class to simulate pyspark.sql.connect.dataframe.DataFrame""" + + def __init__(self): + pass + + +def test_data_frame_type_enum(): + """ + Test that the DataFrameType enum has the correct values. + """ + assert DataFrameType.PANDAS.value == "pandas", ( + f"Expected 'pandas' but got: {DataFrameType.PANDAS.value}" + ) + assert DataFrameType.PYSPARK.value == "pyspark", ( + f"Expected 'pyspark' but got: {DataFrameType.PYSPARK.value}" + ) + + # Test string comparison (now works directly!) + assert DataFrameType.PANDAS == "pandas", "Expected DataFrameType.PANDAS == 'pandas' to be True" + assert DataFrameType.PYSPARK == "pyspark", ( + "Expected DataFrameType.PYSPARK == 'pyspark' to be True" + ) + + +def test_get_expectation_name(): + """ + Test that the expectation name is the class name. + """ + expectation = MyTestExpectation() + assert expectation.get_expectation_name() == "MyTestExpectation", ( + f"Expected 'MyTestExpectation' but got: {expectation.get_expectation_name()}" + ) + + +def test_validate_unsupported_dataframe_type(): + """ + Test that an error is raised for unsupported DataFrame types. + """ + expectation = MyTestExpectation() + with pytest.raises(ValueError): + expectation.validate(None) + + +def test_validate_pandas_called(): + """ + Test that validate_pandas method is called and with right parameters. + """ + expectation = MyTestExpectation() + + # Mock the validate_pandas method + expectation.validate_pandas = MagicMock(return_value="mock_result") + + # Assert that validate_pandas was called with the correct arguments + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + _ = expectation.validate(data_frame=data_frame) + expectation.validate_pandas.assert_called_once_with(data_frame=data_frame) + + with pytest.raises(ValueError): + expectation.validate(None) + + +def test_validate_pyspark_called(spark): + """ + Test that validate_pyspark method is called with right parameters. + """ + expectation = MyTestExpectation() + + # Mock the validate_pyspark method + expectation.validate_pyspark = MagicMock(return_value="mock_result") + + # Assert that validate_pyspark was called with the correct arguments + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + _ = expectation.validate(data_frame=data_frame) + expectation.validate_pyspark.assert_called_once_with(data_frame=data_frame) + + with pytest.raises(ValueError): + expectation.validate(None) + + +def test_num_data_frame_rows(spark): + """ + Test that the number of rows in a DataFrame are counted correctly. + """ + expectation = MyTestExpectation() + + # 1. Non empty DataFrames + # Mock a pandas DataFrame + pandas_df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + num_rows = expectation.num_data_frame_rows(pandas_df) + assert num_rows == 3, f"Expected 3 rows for pandas DataFrame but got: {num_rows}" + + # Mock a PySpark DataFrame + spark_df = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + num_rows = expectation.num_data_frame_rows(spark_df) + assert num_rows == 3, f"Expected 3 rows for PySpark DataFrame but got: {num_rows}" + + # Test unsupported DataFrame type + with pytest.raises(ValueError): + expectation.num_data_frame_rows(None) + + # 2. Empty DataFrames + # Mock an empty pandas DataFrame + empty_pandas_df = pd.DataFrame(columns=["col1", "col2"]) + num_rows = expectation.num_data_frame_rows(empty_pandas_df) + assert num_rows == 0, f"Expected 0 rows for empty pandas DataFrame but got: {num_rows}" + + # Mock an empty PySpark DataFrame + empty_spark_df = spark.createDataFrame([], "col1 INT, col2 STRING") + num_rows = expectation.num_data_frame_rows(empty_spark_df) + assert num_rows == 0, f"Expected 0 rows for empty PySpark DataFrame but got: {num_rows}" + + # Test unsupported DataFrame type + with pytest.raises(ValueError): + expectation.num_data_frame_rows(None) + + +def test_infer_data_frame_type(spark): + """ + Test that the DataFrame type is inferred correctly for all supported DataFrame types. + """ + expectation = MyTestExpectation() + + # Test pandas DataFrame + pandas_df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + data_frame_type = expectation.infer_data_frame_type(pandas_df) + assert data_frame_type == DataFrameType.PANDAS, ( + f"Expected PANDAS type but got: {data_frame_type}" + ) + + # Test PySpark DataFrame + spark_df = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + data_frame_type = expectation.infer_data_frame_type(spark_df) + assert data_frame_type == DataFrameType.PYSPARK, ( + f"Expected PYSPARK type but got: {data_frame_type}" + ) + + # Test empty pandas DataFrame + empty_pandas_df = pd.DataFrame(columns=["col1", "col2"]) + data_frame_type = expectation.infer_data_frame_type(empty_pandas_df) + assert data_frame_type == DataFrameType.PANDAS, ( + f"Expected PANDAS type for empty DataFrame but got: {data_frame_type}" + ) + + # Test empty PySpark DataFrame + empty_spark_df = spark.createDataFrame([], "col1 INT, col2 STRING") + data_frame_type = expectation.infer_data_frame_type(empty_spark_df) + assert data_frame_type == DataFrameType.PYSPARK, ( + f"Expected PYSPARK type for empty DataFrame but got: {data_frame_type}" + ) + + # Test unsupported DataFrame types + with pytest.raises(ValueError) as context: + expectation.infer_data_frame_type(None) + assert "Unsupported DataFrame type" in str(context.value), ( + f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}" + ) + + with pytest.raises(ValueError) as context: + expectation.infer_data_frame_type("not_a_dataframe") + assert "Unsupported DataFrame type" in str(context.value), ( + f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}" + ) + + with pytest.raises(ValueError) as context: + expectation.infer_data_frame_type([1, 2, 3]) + assert "Unsupported DataFrame type" in str(context.value), ( + f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}" + ) + + with pytest.raises(ValueError) as context: + expectation.infer_data_frame_type({"col1": [1, 2, 3]}) + assert "Unsupported DataFrame type" in str(context.value), ( + f"Expected 'Unsupported DataFrame type' in error message but got: {str(context.value)}" + ) + + # Test with objects that might have similar attributes but aren't DataFrames + class FakeDataFrame: + def count(self): + return 5 + + def collect(self): + return [] + + fake_df = FakeDataFrame() + with pytest.raises(ValueError): + expectation.infer_data_frame_type(fake_df) + + # Test with numeric types + with pytest.raises(ValueError): + expectation.infer_data_frame_type(42) + + # Test with boolean + with pytest.raises(ValueError): + expectation.infer_data_frame_type(True) + + +def test_infer_data_frame_type_with_connect_dataframe_available(): + """ + Test that PySpark Connect DataFrame is correctly identified when available. + """ + expectation = MyTestExpectation() + + # Patch the PySparkConnectDataFrame import to be our mock class + with patch( + "dataframe_expectations.expectations.PySparkConnectDataFrame", + MockConnectDataFrame, + ): + # Create an instance of our mock Connect DataFrame + mock_connect_df = MockConnectDataFrame() + + # Test that Connect DataFrame is identified as PYSPARK type + data_frame_type = expectation.infer_data_frame_type(mock_connect_df) + assert data_frame_type == DataFrameType.PYSPARK, ( + f"Expected PYSPARK type for Connect DataFrame but got: {data_frame_type}" + ) + + +@patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None) +def test_infer_data_frame_type_without_connect_support(spark): + """ + Test that the method works correctly when PySpark Connect is not available. + """ + expectation = MyTestExpectation() + + # Test that regular DataFrames still work when Connect is not available + pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) + data_frame_type = expectation.infer_data_frame_type(pandas_df) + assert data_frame_type == DataFrameType.PANDAS, ( + f"Expected PANDAS type but got: {data_frame_type}" + ) + + spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + data_frame_type = expectation.infer_data_frame_type(spark_df) + assert data_frame_type == DataFrameType.PYSPARK, ( + f"Expected PYSPARK type but got: {data_frame_type}" + ) + + +def test_infer_data_frame_type_connect_import_behavior(spark): + """ + Test that the Connect DataFrame import behavior works as expected. + """ + expectation = MyTestExpectation() + + # Test case 1: When PySparkConnectDataFrame is None (import failed) + with patch("dataframe_expectations.expectations.PySparkConnectDataFrame", None): + # Should still work with regular DataFrames + pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) + result_type = expectation.infer_data_frame_type(pandas_df) + assert result_type == DataFrameType.PANDAS, f"Expected PANDAS type but got: {result_type}" + + spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result_type = expectation.infer_data_frame_type(spark_df) + assert result_type == DataFrameType.PYSPARK, f"Expected PYSPARK type but got: {result_type}" + + # Test case 2: When PySparkConnectDataFrame is available (mocked) + with patch( + "dataframe_expectations.expectations.PySparkConnectDataFrame", + MockConnectDataFrame, + ): + # Regular DataFrames should still work + pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) + result_type = expectation.infer_data_frame_type(pandas_df) + assert result_type == DataFrameType.PANDAS, f"Expected PANDAS type but got: {result_type}" + + spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result_type = expectation.infer_data_frame_type(spark_df) + assert result_type == DataFrameType.PYSPARK, f"Expected PYSPARK type but got: {result_type}" + + # Mock Connect DataFrame should be identified as PYSPARK + mock_connect_df = MockConnectDataFrame() + result_type = expectation.infer_data_frame_type(mock_connect_df) + assert result_type == DataFrameType.PYSPARK, ( + f"Expected PYSPARK type for Connect DataFrame but got: {result_type}" + ) diff --git a/tests/expectations_helper_classes/test_utils.py b/tests/expectations_helper_classes/test_utils.py new file mode 100644 index 0000000..f435c4d --- /dev/null +++ b/tests/expectations_helper_classes/test_utils.py @@ -0,0 +1,90 @@ +from typing import Union +import pytest + +from dataframe_expectations.expectations.utils import requires_params + + +def test_requires_params_success(): + """ + Test that all required parameters are provided. + """ + + @requires_params("a", "b") + def func(**kwargs): + return kwargs["a"] + kwargs["b"] + + result = func(a=1, b=2) + assert result == 3, f"Expected 3 but got: {result}" + + +def test_requires_params_missing_param(): + """ + Test that a ValueError is raised when a required parameter is missing. + """ + + @requires_params("a", "b") + def func(**kwargs): + return kwargs["a"] + kwargs["b"] + + with pytest.raises(ValueError) as context: + func(a=1) + assert "missing required parameters" in str(context.value), ( + f"Expected 'missing required parameters' in error message but got: {str(context.value)}" + ) + + +def test_requires_params_type_success(): + """ + Test that type validation works correctly when types are specified. + """ + + @requires_params("a", "b", types={"a": int, "b": str}) + def func(**kwargs): + return f"{kwargs['a']}-{kwargs['b']}" + + result = func(a=5, b="hello") + assert result == "5-hello", f"Expected '5-hello' but got: {result}" + + +def test_requires_params_type_error(): + """ + Test that a TypeError is raised when a parameter does not match the expected type.""" + + @requires_params("a", "b", types={"a": int, "b": str}) + def func(**kwargs): + return f"{kwargs['a']}-{kwargs['b']}" + + with pytest.raises(TypeError) as context: + func(a="not-an-int", b="hello") + assert "type validation errors" in str(context.value), ( + f"Expected 'type validation errors' in error message but got: {str(context.value)}" + ) + + +def test_requires_params_union_type_success(): + """ + Test that Union types are handled correctly. + """ + + @requires_params("a", types={"a": Union[int, str]}) + def func(**kwargs): + return kwargs["a"] + + result1 = func(a=5) + assert result1 == 5, f"Expected 5 but got: {result1}" + + result2 = func(a="foo") + assert result2 == "foo", f"Expected 'foo' but got: {result2}" + + +def test_requires_params_union_type_error(): + """ + Test that a TypeError is raised when a parameter does not match any type in a Union. + """ + + @requires_params("a", types={"a": Union[int, str]}) + def func(**kwargs): + return kwargs["a"] + + with pytest.raises(TypeError): + func(a=3.14) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py new file mode 100644 index 0000000..076b0cf --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_between.py @@ -0,0 +1,562 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesBetween", ( + f"Expected 'ExpectationDistinctColumnValuesBetween' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is within range [2, 5] + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesBetween" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nulls(): + """ + Test the expectation for pandas DataFrame with NaN values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=4, + ) + # DataFrame with 3 distinct values [1, 2, NaN] which is within range [3, 4] + data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesBetween" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations_too_few(): + """ + Test the expectation for pandas DataFrame with too few distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=4, + max_value=6, + ) + # DataFrame with 2 distinct values [1, 2] which is below range [4, 6] + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 2 distinct values, expected between 4 and 6.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_violations_too_many(): + """ + Test the expectation for pandas DataFrame with too many distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=3, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is above range [2, 3] + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 5 distinct values, expected between 2 and 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_exact_boundaries(): + """ + Test the expectation for pandas DataFrame with distinct counts exactly at boundaries. + """ + # Test exact minimum boundary + expectation_min = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=5, + ) + data_frame_min = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values + result_min = expectation_min.validate(data_frame=data_frame_min) + assert isinstance(result_min, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_min)}" + ) + + # Test exact maximum boundary + expectation_max = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=5, + ) + data_frame_max = pd.DataFrame({"col1": [1, 2, 3, 4, 5, 1]}) # 5 distinct values + result_max = expectation_max.validate(data_frame=data_frame_max) + assert isinstance(result_max, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_max)}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is within range [2, 5] + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesBetween" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_with_nulls(spark): + """ + Test the expectation for PySpark DataFrame with null values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=4, + ) + # DataFrame with 3 distinct values [1, 2, null] which is within range [3, 4] + data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesBetween" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations_too_few(spark): + """ + Test the expectation for PySpark DataFrame with too few distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=4, + max_value=6, + ) + # DataFrame with 2 distinct values [1, 2] which is below range [4, 6] + data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 2 distinct values, expected between 4 and 6.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_violations_too_many(spark): + """ + Test the expectation for PySpark DataFrame with too many distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=3, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is above range [2, 3] + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 5 distinct values, expected between 2 and 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_exact_boundaries(spark): + """ + Test the expectation for PySpark DataFrame with distinct counts exactly at boundaries. + """ + # Test exact minimum boundary + expectation_min = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=5, + ) + data_frame_min = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # 3 distinct values + result_min = expectation_min.validate(data_frame=data_frame_min) + assert isinstance(result_min, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_min)}" + ) + + # Test exact maximum boundary + expectation_max = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=5, + ) + data_frame_max = spark.createDataFrame( + [(1,), (2,), (3,), (4,), (5,), (1,)], ["col1"] + ) # 5 distinct values + result_max = expectation_max.validate(data_frame=data_frame_max) + assert isinstance(result_max, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_max)}" + ) + + +def test_column_missing_error(): + """ + Test that an error is raised when the specified column is missing. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_invalid_parameters(): + """ + Test that appropriate errors are raised for invalid parameters. + """ + # Test negative min_value + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=-1, + max_value=5, + ) + assert "min_value must be non-negative" in str(context.value), ( + f"Expected 'min_value must be non-negative' in error message: {str(context.value)}" + ) + + # Test negative max_value + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=-1, + ) + assert "max_value must be non-negative" in str(context.value), ( + f"Expected 'max_value must be non-negative' in error message: {str(context.value)}" + ) + + # Test min_value > max_value + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=5, + max_value=2, + ) + assert "min_value (5) must be <= max_value (2)" in str(context.value), ( + f"Expected 'min_value (5) must be <= max_value (2)' in error message: {str(context.value)}" + ) + + +def test_edge_case_zero_range(): + """ + Test the expectation when min_value equals max_value (zero range). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=3, + ) + # DataFrame with exactly 3 distinct values + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + # DataFrame with 2 distinct values (should fail) + data_frame_fail = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) + result_fail = expectation.validate(data_frame=data_frame_fail) + assert isinstance(result_fail, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result_fail)}" + ) + + +def test_edge_case_empty_dataframe(): + """ + Test the expectation with an empty DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=0, + max_value=5, + ) + # Empty DataFrame should have 0 distinct values + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_edge_case_single_value(): + """ + Test the expectation with a DataFrame containing a single distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=1, + max_value=1, + ) + # DataFrame with 1 distinct value + data_frame = pd.DataFrame({"col1": [1, 1, 1, 1, 1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between( + column_name="col1", min_value=4, max_value=6 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, expected 4-6 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # 3 distinct values + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between( + column_name="col1", min_value=4, max_value=6 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (1,), (2,), (1,)], ["col1"] + ) # 2 distinct values, expected 4-6 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when the specified column is missing in PySpark DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_string_column_with_mixed_values(): + """ + Test the expectation with a string column containing mixed values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=3, + max_value=5, + ) + # String column with 4 distinct values ["A", "B", "C", None] + data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_column_with_floats(): + """ + Test the expectation with a numeric column containing floats. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=4, + ) + # Float column with 3 distinct values [1.1, 2.2, 3.3] + data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column(): + """ + Test the expectation with a boolean column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=2, + ) + # Boolean column with 2 distinct values [True, False] + data_frame = pd.DataFrame({"col1": [True, False, True, False, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_datetime_column(): + """ + Test the expectation with a datetime column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=2, + max_value=4, + ) + # Datetime column with 3 distinct values + data_frame = pd.DataFrame( + { + "col1": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-02", + "2023-01-01", + ] + ) + } + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_performance(): + """ + Test the expectation with a larger dataset to ensure reasonable performance. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesBetween", + column_name="col1", + min_value=900, + max_value=1100, + ) + # Create a DataFrame with exactly 1000 distinct values + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py new file mode 100644 index 0000000..3bb97c7 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_equals.py @@ -0,0 +1,640 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesEquals", ( + f"Expected 'ExpectationDistinctColumnValuesEquals' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nulls(): + """ + Test the expectation for pandas DataFrame with NaN values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, NaN] + data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations_too_few(): + """ + Test the expectation for pandas DataFrame with too few distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=5, + ) + # DataFrame with 2 distinct values [1, 2] when expecting 5 + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 2 distinct values, expected exactly 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_violations_too_many(): + """ + Test the expectation for pandas DataFrame with too many distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=2, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] when expecting 2 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 5 distinct values, expected exactly 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_zero_expected(): + """ + Test the expectation for pandas DataFrame expecting zero distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=0, + ) + # Empty DataFrame should have 0 distinct values + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pandas_one_expected(): + """ + Test the expectation for pandas DataFrame expecting exactly one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=1, + ) + # DataFrame with exactly 1 distinct value + data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_with_nulls(spark): + """ + Test the expectation for PySpark DataFrame with null values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, null] + data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationDistinctColumnValuesEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations_too_few(spark): + """ + Test the expectation for PySpark DataFrame with too few distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=5, + ) + # DataFrame with 2 distinct values [1, 2] when expecting 5 + data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 2 distinct values, expected exactly 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_violations_too_many(spark): + """ + Test the expectation for PySpark DataFrame with too many distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=2, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] when expecting 2 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 5 distinct values, expected exactly 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_zero_expected(spark): + """ + Test the expectation for PySpark DataFrame expecting zero distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=0, + ) + # Empty DataFrame should have 0 distinct values + data_frame = spark.createDataFrame([], "col1 INT") + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pyspark_one_expected(spark): + """ + Test the expectation for PySpark DataFrame expecting exactly one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=1, + ) + # DataFrame with exactly 1 distinct value + data_frame = spark.createDataFrame([(5,), (5,), (5,), (5,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_column_missing_error(): + """ + Test that an error is raised when the specified column is missing. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_invalid_parameters(): + """ + Test that appropriate errors are raised for invalid parameters. + """ + # Test negative expected_value + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=-1, + ) + assert "expected_value must be non-negative" in str(context.value), ( + f"Expected 'expected_value must be non-negative' in error message: {str(context.value)}" + ) + + +def test_string_column_with_mixed_values(): + """ + Test the expectation with a string column containing mixed values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=4, + ) + # String column with exactly 4 distinct values ["A", "B", "C", None] + data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_string_column_case_sensitive(): + """ + Test that string comparisons are case-sensitive for distinct counting. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=4, + ) + # String column with 4 distinct values ["a", "A", "b", "B"] (case-sensitive) + data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_column_with_floats(): + """ + Test the expectation with a numeric column containing floats. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Float column with exactly 3 distinct values [1.1, 2.2, 3.3] + data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_precision_handling(): + """ + Test that numeric precision is handled correctly for distinct counting. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Values that might have precision issues but should be treated as distinct + data_frame = pd.DataFrame({"col1": [1.0, 1.1, 1.2, 1.0, 1.1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column(): + """ + Test the expectation with a boolean column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=2, + ) + # Boolean column with exactly 2 distinct values [True, False] + data_frame = pd.DataFrame({"col1": [True, False, True, False, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column_with_none(): + """ + Test the expectation with a boolean column that includes None values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Boolean column with 3 distinct values [True, False, None] + data_frame = pd.DataFrame({"col1": [True, False, None, False, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_datetime_column(): + """ + Test the expectation with a datetime column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Datetime column with exactly 3 distinct values + data_frame = pd.DataFrame( + { + "col1": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-02", + "2023-01-01", + ] + ) + } + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_datetime_column_with_timezone(): + """ + Test the expectation with a datetime column including timezone information. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=2, + ) + # Datetime column with timezone - same time in different timezones should be distinct + data_frame = pd.DataFrame( + { + "col1": [ + pd.Timestamp("2023-01-01 12:00:00", tz="UTC"), + pd.Timestamp("2023-01-01 12:00:00", tz="US/Eastern"), + pd.Timestamp("2023-01-01 12:00:00", tz="UTC"), + pd.Timestamp("2023-01-01 12:00:00", tz="US/Eastern"), + ] + } + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_mixed_data_types_as_object(): + """ + Test the expectation with a column containing mixed data types (as object dtype). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=4, + ) + # Mixed data types: string, int, float, None + data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_performance(): + """ + Test the expectation with a larger dataset to ensure reasonable performance. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=1000, + ) + # Create a DataFrame with exactly 1000 distinct values + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals( + column_name="col1", expected_value=3 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # exactly 3 distinct values + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals( + column_name="col1", expected_value=5 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, expected 5 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals( + column_name="col1", expected_value=3 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # exactly 3 distinct values + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals( + column_name="col1", expected_value=5 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (1,), (2,), (1,)], ["col1"] + ) # 2 distinct values, expected 5 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when the specified column is missing in PySpark DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_equals( + column_name="col1", expected_value=3 + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_categorical_data(): + """ + Test the expectation with categorical data. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Categorical data with 3 distinct categories + data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_duplicate_nan_handling(): + """ + Test that multiple NaN values are counted as one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=3, + ) + # Multiple NaN values should be counted as 1 distinct value + data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_very_large_expected_distinct_values(): + """ + Test the expectation with a very large expected count that doesn't match actual. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=1000000, + ) + # Small DataFrame with only 3 distinct values + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 distinct values, expected exactly 1000000.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_string_with_whitespace_handling(): + """ + Test that strings with different whitespace are treated as distinct. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=4, + ) + # Strings with different whitespace should be distinct + data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_string_vs_numeric(): + """ + Test that numeric strings and numeric values are treated as distinct when in object column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesEquals", + column_name="col1", + expected_value=2, + ) + # String "1" and integer 1 should be distinct in object column + data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py new file mode 100644 index 0000000..5be1b82 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_greater_than.py @@ -0,0 +1,691 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesGreaterThan", ( + f"Expected 'ExpectationDistinctColumnValuesGreaterThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is > 2 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesGreaterThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nulls(): + """ + Test the expectation for pandas DataFrame with NaN values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 3 distinct values [1, 2, NaN] which is > 2 + data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesGreaterThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_exact_boundary(): + """ + Test the expectation for pandas DataFrame with distinct count exactly at boundary (exclusive). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is > 2 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pandas_violations_equal_to_threshold(): + """ + Test the expectation for pandas DataFrame with distinct count equal to threshold (should fail). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT > 3 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 distinct values, expected more than 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_violations_below_threshold(): + """ + Test the expectation for pandas DataFrame with distinct count below threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 2 distinct values [1, 2] which is NOT > 5 + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 2 distinct values, expected more than 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_zero_threshold(): + """ + Test the expectation for pandas DataFrame with zero threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=0, + ) + # Any non-empty DataFrame should have > 0 distinct values + data_frame = pd.DataFrame({"col1": [1, 1, 1]}) # 1 distinct value > 0 + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pandas_empty_dataframe(): + """ + Test the expectation for pandas DataFrame that is empty. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=0, + ) + # Empty DataFrame has 0 distinct values, which is NOT > 0 + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 0 distinct values, expected more than 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is > 2 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesGreaterThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_with_nulls(spark): + """ + Test the expectation for PySpark DataFrame with null values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 3 distinct values [1, 2, null] which is > 2 + data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesGreaterThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations_equal_to_threshold(spark): + """ + Test the expectation for PySpark DataFrame with distinct count equal to threshold (should fail). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT > 3 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 3 distinct values, expected more than 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_violations_below_threshold(spark): + """ + Test the expectation for PySpark DataFrame with distinct count below threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 2 distinct values [1, 2] which is NOT > 5 + data_frame = spark.createDataFrame([(1,), (2,), (1,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 2 distinct values, expected more than 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_empty_dataframe(spark): + """ + Test the expectation for PySpark DataFrame that is empty. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=0, + ) + # Empty DataFrame has 0 distinct values, which is NOT > 0 + data_frame = spark.createDataFrame([], "col1 INT") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 0 distinct values, expected more than 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_column_missing_error(): + """ + Test that an error is raised when the specified column is missing. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_invalid_parameters(): + """ + Test that appropriate errors are raised for invalid parameters. + """ + # Test negative threshold + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=-1, + ) + assert "threshold must be non-negative" in str(context.value), ( + f"Expected 'threshold must be non-negative' in error message: {str(context.value)}" + ) + + +def test_string_column_with_mixed_values(): + """ + Test the expectation with a string column containing mixed values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # String column with 4 distinct values ["A", "B", "C", None] which is > 3 + data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_string_column_case_sensitive(): + """ + Test that string comparisons are case-sensitive for distinct counting. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # String column with 4 distinct values ["a", "A", "b", "B"] which is > 3 + data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_column_with_floats(): + """ + Test the expectation with a numeric column containing floats. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # Float column with 3 distinct values [1.1, 2.2, 3.3] which is > 2 + data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column(): + """ + Test the expectation with a boolean column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=1, + ) + # Boolean column with 2 distinct values [True, False] which is > 1 + data_frame = pd.DataFrame({"col1": [True, False, True, False, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column_failure(): + """ + Test the expectation with a boolean column that fails the threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # Boolean column with only 1 distinct value [True] which is NOT > 2 + data_frame = pd.DataFrame({"col1": [True, True, True, True, True]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 1 distinct values, expected more than 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_datetime_column(): + """ + Test the expectation with a datetime column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # Datetime column with 3 distinct values which is > 2 + data_frame = pd.DataFrame( + { + "col1": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-02", + "2023-01-01", + ] + ) + } + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_mixed_data_types_as_object(): + """ + Test the expectation with a column containing mixed data types. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # Mixed data types: 4 distinct values ["text", 42, 3.14, None] which is > 3 + data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_performance(): + """ + Test the expectation with a larger dataset to ensure reasonable performance. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=999, + ) + # Create a DataFrame with exactly 1000 distinct values (> 999) + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_failure(): + """ + Test the expectation with a larger dataset that fails the threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=1000, + ) + # Create a DataFrame with exactly 1000 distinct values (NOT > 1000) + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 1000 distinct values, expected more than 1000.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than( + column_name="col1", threshold=2 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values > 2 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than( + column_name="col1", threshold=5 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 1, 2, 1]}) # 2 distinct values, need > 5 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than( + column_name="col1", threshold=2 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # 3 distinct values > 2 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than( + column_name="col1", threshold=5 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (1,), (2,), (1,)], ["col1"] + ) # 2 distinct values, need > 5 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when the specified column is missing in PySpark DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_greater_than( + column_name="col1", threshold=2 + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_categorical_data(): + """ + Test the expectation with categorical data. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # Categorical data with 3 distinct categories which is > 2 + data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_duplicate_nan_handling(): + """ + Test that multiple NaN values are counted as one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=2, + ) + # Multiple NaN values should be counted as 1, total = 3 distinct values > 2 + data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_single_distinct_value_success(): + """ + Test the expectation with only one distinct value that passes threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=0, + ) + # Single distinct value (1) which is > 0 + data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_string_with_whitespace_handling(): + """ + Test that strings with different whitespace are treated as distinct. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=3, + ) + # 4 distinct strings with different whitespace > 3 + data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_string_vs_numeric(): + """ + Test that numeric strings and numeric values are treated as distinct. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=1, + ) + # String "1" and integer 1 are distinct, so 2 distinct values > 1 + data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_very_high_threshold(): + """ + Test the expectation with a very high threshold that cannot be met. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=1000000, + ) + # Small DataFrame with only 3 distinct values + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 distinct values, expected more than 1000000.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_exclusive_boundary_validation(): + """ + Test that the boundary is truly exclusive (not inclusive). + """ + # Test with threshold = 5, actual = 5 (should fail because 5 is NOT > 5) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=5, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5, 1, 2]}) # exactly 5 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result)}" + ) + + # Test with threshold = 4, actual = 5 (should pass because 5 > 4) + expectation_pass = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesGreaterThan", + column_name="col1", + threshold=4, + ) + result_pass = expectation_pass.validate(data_frame=data_frame) + assert isinstance(result_pass, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py new file mode 100644 index 0000000..5f15e10 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_distinct_column_values_less_than.py @@ -0,0 +1,732 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + assert expectation.get_expectation_name() == "ExpectationDistinctColumnValuesLessThan", ( + f"Expected 'ExpectationDistinctColumnValuesLessThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is < 5 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesLessThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nulls(): + """ + Test the expectation for pandas DataFrame with NaN values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 3 distinct values [1, 2, NaN] which is < 5 + data_frame = pd.DataFrame({"col1": [1, 2, None, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesLessThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_zero_values(): + """ + Test the expectation for pandas DataFrame with zero distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1, + ) + # Empty DataFrame has 0 distinct values which is < 1 + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pandas_violations_equal_to_threshold(): + """ + Test the expectation for pandas DataFrame with distinct count equal to threshold (should fail). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT < 3 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 distinct values, expected fewer than 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_violations_above_threshold(): + """ + Test the expectation for pandas DataFrame with distinct count above threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is NOT < 2 + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 5 distinct values, expected fewer than 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_single_value_success(): + """ + Test the expectation for pandas DataFrame with single distinct value below threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # DataFrame with 1 distinct value which is < 3 + data_frame = pd.DataFrame({"col1": [5, 5, 5, 5, 5]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 3 distinct values [1, 2, 3] which is < 5 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesLessThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_with_nulls(spark): + """ + Test the expectation for PySpark DataFrame with null values included in distinct count. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # DataFrame with 3 distinct values [1, 2, null] which is < 5 + data_frame = spark.createDataFrame([(1,), (2,), (None,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage( + expectation_name="ExpectationDistinctColumnValuesLessThan" + ) + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_zero_values(spark): + """ + Test the expectation for PySpark DataFrame with zero distinct values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1, + ) + # Empty DataFrame has 0 distinct values which is < 1 + data_frame = spark.createDataFrame([], "col1 INT") + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_expectation_pyspark_violations_equal_to_threshold(spark): + """ + Test the expectation for PySpark DataFrame with distinct count equal to threshold (should fail). + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # DataFrame with exactly 3 distinct values [1, 2, 3] which is NOT < 3 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 3 distinct values, expected fewer than 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_violations_above_threshold(spark): + """ + Test the expectation for PySpark DataFrame with distinct count above threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=2, + ) + # DataFrame with 5 distinct values [1, 2, 3, 4, 5] which is NOT < 2 + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 5 distinct values, expected fewer than 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_single_value_success(spark): + """ + Test the expectation for PySpark DataFrame with single distinct value below threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # DataFrame with 1 distinct value which is < 3 + data_frame = spark.createDataFrame([(5,), (5,), (5,), (5,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_column_missing_error(): + """ + Test that an error is raised when the specified column is missing. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_invalid_parameters(): + """ + Test that appropriate errors are raised for invalid parameters. + """ + # Test negative threshold + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=-1, + ) + assert "threshold must be non-negative" in str(context.value), ( + f"Expected 'threshold must be non-negative' in error message: {str(context.value)}" + ) + + +def test_zero_threshold_edge_case(): + """ + Test the expectation with zero threshold - only empty DataFrames should pass. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=0, + ) + + # Empty DataFrame has 0 distinct values, which is NOT < 0 + data_frame_empty = pd.DataFrame({"col1": []}) + result_empty = expectation.validate(data_frame=data_frame_empty) + assert isinstance(result_empty, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result_empty)}" + ) + + # Any non-empty DataFrame will have >= 1 distinct values, which is NOT < 0 + data_frame_non_empty = pd.DataFrame({"col1": [1, 1, 1]}) + result_non_empty = expectation.validate(data_frame=data_frame_non_empty) + assert isinstance(result_non_empty, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result_non_empty)}" + ) + + +def test_string_column_with_mixed_values(): + """ + Test the expectation with a string column containing mixed values. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # String column with 4 distinct values ["A", "B", "C", None] which is < 5 + data_frame = pd.DataFrame({"col1": ["A", "B", "C", "B", "A", None]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_string_column_case_sensitive(): + """ + Test that string comparisons are case-sensitive for distinct counting. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # String column with 4 distinct values ["a", "A", "b", "B"] which is < 5 + data_frame = pd.DataFrame({"col1": ["a", "A", "b", "B", "a", "A"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_column_with_floats(): + """ + Test the expectation with a numeric column containing floats. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # Float column with 3 distinct values [1.1, 2.2, 3.3] which is < 5 + data_frame = pd.DataFrame({"col1": [1.1, 2.2, 3.3, 2.2, 1.1]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column_success(): + """ + Test the expectation with a boolean column that passes. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # Boolean column with 2 distinct values [True, False] which is < 3 + data_frame = pd.DataFrame({"col1": [True, False, True, False, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_boolean_column_failure(): + """ + Test the expectation with a boolean column that fails the threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=2, + ) + # Boolean column with 2 distinct values [True, False] which is NOT < 2 + data_frame = pd.DataFrame({"col1": [True, False, True, False, True]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 2 distinct values, expected fewer than 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_boolean_single_value(): + """ + Test the expectation with a boolean column having only one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=2, + ) + # Boolean column with only 1 distinct value [True] which is < 2 + data_frame = pd.DataFrame({"col1": [True, True, True, True, True]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_datetime_column(): + """ + Test the expectation with a datetime column. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # Datetime column with 3 distinct values which is < 5 + data_frame = pd.DataFrame( + { + "col1": pd.to_datetime( + [ + "2023-01-01", + "2023-01-02", + "2023-01-03", + "2023-01-02", + "2023-01-01", + ] + ) + } + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_mixed_data_types_as_object(): + """ + Test the expectation with a column containing mixed data types. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # Mixed data types: 4 distinct values ["text", 42, 3.14, None] which is < 5 + data_frame = pd.DataFrame({"col1": ["text", 42, 3.14, None, "text", 42]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_success(): + """ + Test the expectation with a larger dataset that passes. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1001, + ) + # Create a DataFrame with exactly 1000 distinct values (< 1001) + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_large_dataset_failure(): + """ + Test the expectation with a larger dataset that fails the threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1000, + ) + # Create a DataFrame with exactly 1000 distinct values (NOT < 1000) + data_frame = pd.DataFrame({"col1": list(range(1000)) * 5}) # 5000 rows, 1000 distinct values + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 1000 distinct values, expected fewer than 1000.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than( + column_name="col1", threshold=5 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values < 5 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than( + column_name="col1", threshold=2 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) # 3 distinct values, need < 2 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than( + column_name="col1", threshold=5 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # 3 distinct values < 5 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than( + column_name="col1", threshold=2 + ) + data_frame = spark.createDataFrame( + [(1,), (2,), (3,), (2,), (1,)], ["col1"] + ) # 3 distinct values, need < 2 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when the specified column is missing in PySpark DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_distinct_column_values_less_than( + column_name="col1", threshold=5 + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_categorical_data(): + """ + Test the expectation with categorical data. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # Categorical data with 3 distinct categories which is < 5 + data_frame = pd.DataFrame({"col1": pd.Categorical(["A", "B", "C", "A", "B", "C", "A"])}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_duplicate_nan_handling(): + """ + Test that multiple NaN values are counted as one distinct value. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # Multiple NaN values should be counted as 1, total = 3 distinct values < 5 + data_frame = pd.DataFrame({"col1": [1, 2, None, None, None, 1, 2]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_string_with_whitespace_handling(): + """ + Test that strings with different whitespace are treated as distinct. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=5, + ) + # 4 distinct strings with different whitespace < 5 + data_frame = pd.DataFrame({"col1": ["test", " test", "test ", " test ", "test"]}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_numeric_string_vs_numeric(): + """ + Test that numeric strings and numeric values are treated as distinct. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + # String "1" and integer 1 are distinct, so 2 distinct values < 3 + data_frame = pd.DataFrame({"col1": ["1", 1, "1", 1]}, dtype=object) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result)}" + ) + + +def test_very_low_threshold(): + """ + Test the expectation with a very low threshold. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1, + ) + # DataFrame with 3 distinct values, need < 1 (only empty DataFrames can pass) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 distinct values, expected fewer than 1.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_exclusive_boundary_validation(): + """ + Test that the boundary is truly exclusive (not inclusive). + """ + # Test with threshold = 3, actual = 3 (should fail because 3 is NOT < 3) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=3, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 1, 2]}) # exactly 3 distinct values + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result)}" + ) + + # Test with threshold = 4, actual = 3 (should pass because 3 < 4) + expectation_pass = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=4, + ) + result_pass = expectation_pass.validate(data_frame=data_frame) + assert isinstance(result_pass, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}" + ) + + +def test_boundary_with_one_distinct_value(): + """ + Test boundary conditions with a single distinct value. + """ + # Single distinct value should pass when threshold > 1 + expectation_pass = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=2, + ) + data_frame = pd.DataFrame({"col1": [5, 5, 5, 5]}) # 1 distinct value < 2 + result_pass = expectation_pass.validate(data_frame=data_frame) + assert isinstance(result_pass, DataFrameExpectationSuccessMessage), ( + f"Expected DataFrameExpectationSuccessMessage but got: {type(result_pass)}" + ) + + # Single distinct value should fail when threshold = 1 + expectation_fail = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDistinctColumnValuesLessThan", + column_name="col1", + threshold=1, + ) + result_fail = expectation_fail.validate(data_frame=data_frame) + assert isinstance(result_fail, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result_fail)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py new file mode 100644 index 0000000..8a4e6b5 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_count.py @@ -0,0 +1,564 @@ +import pytest +import numpy as np +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_pandas_success_no_nulls(): + """Test pandas success case with no null values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=5, + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col2": ["a", "b", "c", "d", "e"], + "col3": [1.1, 2.2, 3.3, 4.4, 5.5], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_within_threshold(): + """Test pandas success case with null count within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=3, + ) + # 2 null values in col1, which is less than max_count of 3 + data_frame = pd.DataFrame( + { + "col1": [1, None, 3, None, 5], + "col2": ["a", "b", "c", "d", "e"], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_exactly_at_threshold(): + """Test pandas success case with null count exactly at threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=2, + ) + # Exactly 2 null values in col1 + data_frame = pd.DataFrame({"col1": [1, 2, None, 4, None], "col2": [None, "b", "c", "d", "e"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nan(): + """Test pandas success case with NaN values within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col2", + max_count=2, + ) + # 1 NaN value in col2, which is less than max_count of 2 + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4.0, np.nan, 6.0]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_failure_exceeds_threshold(): + """Test pandas failure case when null count exceeds threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1, + ) + # 3 null values in col1, which exceeds max_count of 1 + data_frame = pd.DataFrame( + {"col1": [1, None, None, None, 5], "col2": [None, "b", "c", "d", "e"]} + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 null values, expected at most 1.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_failure_all_nulls_in_column(): + """Test pandas failure case with all null values in the specified column.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1, + ) + data_frame = pd.DataFrame({"col1": [None, None, None], "col2": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 3 null values, expected at most 1.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_boundary_zero_threshold(): + """Test pandas boundary case with 0 threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = pd.DataFrame({"col1": [1, None, 3]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 1 null values, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_boundary_zero_threshold_success(): + """Test pandas boundary case with 0 threshold and no nulls.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_empty_dataframe(): + """Test pandas edge case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=5, + ) + data_frame = pd.DataFrame(columns=["col1"]) + result = expectation.validate(data_frame=data_frame) + # Empty DataFrame should have 0 nulls and pass + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_single_value_null(): + """Test pandas edge case with single null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = pd.DataFrame({"col1": [None]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 1 null values, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_single_value_not_null(): + """Test pandas edge case with single non-null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_different_column_nulls_not_affecting(): + """Test that nulls in other columns don't affect the result.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1, + ) + # col1 has 0 nulls, col2 has 3 nulls - should pass since we're only checking col1 + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_no_nulls(spark): + """Test PySpark success case with no null values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=5, + ) + data_frame = spark.createDataFrame( + [(1, "a", 1.1), (2, "b", 2.2), (3, "c", 3.3), (4, "d", 4.4), (5, "e", 5.5)], + ["col1", "col2", "col3"], + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_within_threshold(spark): + """Test PySpark success case with null count within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=3, + ) + # 2 null values in col1, which is less than max_count of 3 + data_frame = spark.createDataFrame([(1,), (None,), (3,), (None,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_exactly_at_threshold(spark): + """Test PySpark success case with null count exactly at threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=2, + ) + # Exactly 2 null values in col1 + data_frame = spark.createDataFrame( + [(1, "a"), (2, None), (None, "c"), (4, "d"), (None, "e")], ["col1", "col2"] + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_failure_exceeds_threshold(spark): + """Test PySpark failure case when null count exceeds threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1, + ) + # 2 null values in col1, which exceeds max_count of 1 + data_frame = spark.createDataFrame([(1, None), (None, "b"), (None, "c")], ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 2 null values, expected at most 1.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_failure_all_nulls_in_column(spark): + """Test PySpark failure case with all null values in the specified column.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=2, + ) + data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 3 null values, expected at most 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_boundary_zero_threshold(spark): + """Test PySpark boundary case with 0 threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = spark.createDataFrame([(1,), (None,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 1 null values, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_boundary_zero_threshold_success(spark): + """Test PySpark boundary case with 0 threshold and no nulls.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = spark.createDataFrame( + [ + {"col1": 1, "col2": None}, + {"col1": 2, "col2": None}, + {"col1": 3, "col2": None}, + ], + schema="col1 int, col2 string", + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_empty_dataframe(spark): + """Test PySpark edge case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=5, + ) + data_frame = spark.createDataFrame([], "col1 INT") + result = expectation.validate(data_frame=data_frame) + # Empty DataFrame should have 0 nulls and pass + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_single_value_null(spark): + """Test PySpark edge case with single null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = spark.createDataFrame([{"col1": None}], schema="col1 int") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 1 null values, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_single_value_not_null(spark): + """Test PySpark edge case with single non-null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=0, + ) + data_frame = spark.createDataFrame([(1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_different_column_nulls_not_affecting(spark): + """Test that nulls in other columns don't affect the result.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1, + ) + # col1 has 0 nulls, col2 has nulls - should pass since we're only checking col1 + data_frame = spark.createDataFrame( + [ + {"col1": 1, "col2": None}, + {"col1": 2, "col2": None}, + {"col1": 3, "col2": None}, + ], + schema="col1 int, col2 string", + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_suite_pandas_success(): + """Test the expectation suite for pandas DataFrame with no violations.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_count( + column_name="col1", max_count=2 + ) + data_frame = pd.DataFrame( + {"col1": [1, None, 3], "col2": ["a", "b", "c"]} + ) # 1 null value, which is less than max_count of 2 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """Test the expectation suite for pandas DataFrame with violations.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_count( + column_name="col1", max_count=1 + ) + data_frame = pd.DataFrame( + {"col1": [1, None, None], "col2": ["a", "b", "c"]} + ) # 2 null values, which exceeds max_count of 1 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """Test the expectation suite for PySpark DataFrame with no violations.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_count( + column_name="col1", max_count=2 + ) + data_frame = spark.createDataFrame( + [(1, "a"), (None, "b"), (3, "c")], ["col1", "col2"] + ) # 1 null value, which is less than max_count of 2 + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """Test the expectation suite for PySpark DataFrame with violations.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_count( + column_name="col1", max_count=1 + ) + data_frame = spark.createDataFrame( + [(1, "a"), (None, "b"), (None, "c")], ["col1", "col2"] + ) # 2 null values, which exceeds max_count of 1 + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """Test that an error is raised when the specified column is missing in PySpark DataFrame.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_count( + column_name="col1", max_count=5 + ) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col2", "col3"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_expectation_parameter_validation(): + """Test that appropriate errors are raised for invalid parameters.""" + # Test negative max_count + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=-1, + ) + assert "max_count must be non-negative" in str(context.value), ( + f"Expected 'max_count must be non-negative' in error message: {str(context.value)}" + ) + + +def test_expectation_mixed_data_types(): + """Test the expectation with mixed data types including nulls.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=2, + ) + # Mixed data types with nulls + data_frame = pd.DataFrame({"col1": [1, "text", None, 3.14, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_large_dataset(): + """Test the expectation with a larger dataset.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=100, + ) + # Create a DataFrame with 1000 rows and 50 nulls + data = [None if i % 20 == 0 else i for i in range(1000)] # Every 20th value is None + data_frame = pd.DataFrame({"col1": data}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_large_threshold(): + """Test the expectation with a very large threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=1000000, + ) + # Small DataFrame with few nulls should pass with large threshold + data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullCount") + ), f"Expected success message but got: {result}" + + +def test_expectation_column_not_exists_error(): + """Test that an error is raised when the specified column does not exist.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullCount", + column_name="col1", + max_count=5, + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + # The error message might vary slightly depending on pandas version + assert isinstance(result, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result)}" + ) + result_str = str(result) + assert "col1" in result_str, f"Expected 'col1' in result message: {result_str}" diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py new file mode 100644 index 0000000..cbe34aa --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_null_percentage.py @@ -0,0 +1,547 @@ +import numpy as np +import pandas as pd +import pytest + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_pandas_success_no_nulls(): + """Test pandas success case with no null values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col2": ["a", "b", "c", "d", "e"], + "col3": [1.1, 2.2, 3.3, 4.4, 5.5], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_within_threshold(): + """Test pandas success case with null percentage within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=25.0, + ) + # 4 values in col1, 1 null = 25% null + data_frame = pd.DataFrame( + { + "col1": [1, None, 3, 4], + "col2": ["a", "b", "c", "d"], # Other columns don't affect the test + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_exactly_at_threshold(): + """Test pandas success case with null percentage exactly at threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=20.0, + ) + # 5 values in col1, 1 null = 20% null + data_frame = pd.DataFrame({"col1": [1, 2, None, 4, 5], "col2": [None, "b", "c", "d", "e"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_with_nan(): + """Test pandas success case with NaN values within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col2", + max_percentage=50.0, + ) + # 3 values in col2, 1 NaN = 33.33% null (less than 50%) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4.0, np.nan, 6.0]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_failure_exceeds_threshold(): + """Test pandas failure case when null percentage exceeds threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=20.0, + ) + # 4 values in col1, 2 nulls = 50% null (exceeds 20%) + data_frame = pd.DataFrame({"col1": [1, None, 3, None], "col2": [None, "b", "c", "d"]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 50.00% null values, expected at most 20.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_failure_all_nulls_in_column(): + """Test pandas failure case with 100% null values in the specified column.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=50.0, + ) + data_frame = pd.DataFrame({"col1": [None, None], "col2": [1, 2]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 100.00% null values, expected at most 50.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_boundary_zero_threshold(): + """Test pandas boundary case with 0.0% threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=0.0, + ) + data_frame = pd.DataFrame({"col1": [1, None, 3]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 33.33% null values, expected at most 0.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_boundary_hundred_threshold(): + """Test pandas boundary case with 100.0% threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=100.0, + ) + data_frame = pd.DataFrame({"col1": [None, None, None], "col2": [None, None, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_empty_dataframe(): + """Test pandas edge case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + data_frame = pd.DataFrame(columns=["col1"]) + result = expectation.validate(data_frame=data_frame) + # Empty DataFrame should have 0% nulls and pass + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_single_value_null(): + """Test pandas edge case with single null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=50.0, + ) + data_frame = pd.DataFrame({"col1": [None]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' has 100.00% null values, expected at most 50.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_single_value_not_null(): + """Test pandas edge case with single non-null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_different_column_nulls_not_affecting(): + """Test that nulls in other columns don't affect the result.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + # col1 has 0% nulls, col2 has 100% nulls - should pass since we're only checking col1 + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [None, None, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_no_nulls(spark): + """Test PySpark success case with no null values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + data_frame = spark.createDataFrame( + [(1, "a", 1.1), (2, "b", 2.2), (3, "c", 3.3), (4, "d", 4.4), (5, "e", 5.5)], + ["col1", "col2", "col3"], + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_within_threshold(spark): + """Test PySpark success case with null percentage within threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=30.0, + ) + # 4 values in col1, 1 null = 25% null + data_frame = spark.createDataFrame([(1,), (None,), (3,), (4,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_exactly_at_threshold(spark): + """Test PySpark success case with null percentage exactly at threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=40.0, + ) + # 5 values in col1, 2 nulls = 40% null + data_frame = spark.createDataFrame( + [(1, "a"), (2, None), (None, "c"), (4, "d"), (None, None)], ["col1", "col2"] + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_failure_exceeds_threshold(spark): + """Test PySpark failure case when null percentage exceeds threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=25.0, + ) + # 3 values in col1, 2 nulls = 66.67% null (exceeds 25%) + data_frame = spark.createDataFrame([(1, None), (None, "b"), (None, "c")], ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 66.67% null values, expected at most 25.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_failure_all_nulls_in_column(spark): + """Test PySpark failure case with 100% null values in the specified column.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=75.0, + ) + data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 100.00% null values, expected at most 75.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_boundary_zero_threshold(spark): + """Test PySpark boundary case with 0.0% threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=0.0, + ) + data_frame = spark.createDataFrame([(1,), (None,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 33.33% null values, expected at most 0.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_boundary_hundred_threshold(spark): + """Test PySpark boundary case with 100.0% threshold.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=100.0, + ) + data_frame = spark.createDataFrame( + [ + {"col1": None, "col2": None}, + {"col1": None, "col2": None}, + ], + schema="col1: int, col2: string", + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_empty_dataframe(spark): + """Test PySpark edge case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + # Create empty DataFrame with schema + data_frame = spark.createDataFrame([], "col1: int") + result = expectation.validate(data_frame=data_frame) + # Empty DataFrame should have 0% nulls and pass + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_single_value_null(spark): + """Test PySpark edge case with single null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=50.0, + ) + data_frame = spark.createDataFrame([(None,)], "col1: int") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'col1' has 100.00% null values, expected at most 50.00%.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_single_value_not_null(spark): + """Test PySpark edge case with single non-null value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + data_frame = spark.createDataFrame([(1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_different_column_nulls_not_affecting(spark): + """Test that nulls in other columns don't affect the result.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=10.0, + ) + # col1 has 0% nulls, col2 has 100% nulls - should pass since we're only checking col1 + data_frame = spark.createDataFrame( + [ + {"col1": 1, "col2": None}, + {"col1": 2, "col2": None}, + {"col1": 3, "col2": None}, + ], + schema="col1: int, col2: int", + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_suite_pandas_success(): + """Test integration with expectations suite for pandas success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage( + column_name="col1", max_percentage=30.0 + ) + # 4 values in col1, 1 nulls = 25% null (should pass) + data_frame = pd.DataFrame({"col1": [1, 2, None, 4], "col2": ["a", "b", "c", "d"]}) + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pandas_violations(): + """Test integration with expectations suite for pandas failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage( + column_name="col1", max_percentage=10.0 + ) + # 2 values in col1, 1 null = 50% null (exceeds 10%) + data_frame = pd.DataFrame({"col1": [1, None], "col2": ["a", "b"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """Test integration with expectations suite for PySpark success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage( + column_name="col1", max_percentage=50.0 + ) + # 2 values in col1, 1 null = 50% null (equals 50%) + data_frame = spark.createDataFrame([(1, "a"), (None, "b")], ["col1", "col2"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """Test integration with expectations suite for PySpark failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_null_percentage( + column_name="col1", max_percentage=20.0 + ) + # 2 values in col1, 1 null = 50% null (exceeds 20%) + data_frame = spark.createDataFrame([(None, "a"), (2, None)], ["col1", "col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_expectation_parameter_validation(): + """Test parameter validation for column_name and max_percentage.""" + # Test with valid parameters + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="test_col", + max_percentage=50.0, + ) + assert expectation is not None, "Expected expectation to be created successfully" + + # Test string representation + expectation_str = str(expectation) + assert "50.0" in expectation_str, f"Expected '50.0' in expectation string: {expectation_str}" + assert "test_col" in expectation_str, ( + f"Expected 'test_col' in expectation string: {expectation_str}" + ) + assert "ExpectationMaxNullPercentage" in expectation_str, ( + f"Expected 'ExpectationMaxNullPercentage' in expectation string: {expectation_str}" + ) + + +def test_expectation_mixed_data_types(): + """Test expectation with mixed data types including various null representations.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="float_col", + max_percentage=50.0, + ) + # 4 values in float_col, 1 NaN = 25% null (less than 50%) + data_frame = pd.DataFrame( + { + "int_col": [1, None, 3, 4], + "str_col": ["a", "b", None, "d"], + "float_col": [1.1, 2.2, 3.3, np.nan], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_precision_boundary(): + """Test expectation with very precise percentage boundaries.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="col1", + max_percentage=25.0, + ) + # 4 values in col1, 1 null = 25.00% null (exactly at boundary) + data_frame = pd.DataFrame({"col1": [1, None, 3, 4]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxNullPercentage") + ), f"Expected success message but got: {result}" + + +def test_expectation_column_not_exists_error(): + """Test expectation with non-existent column should fail gracefully.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxNullPercentage", + column_name="nonexistent_col", + max_percentage=50.0, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]}) + result = expectation.validate(data_frame=data_frame) + + # Should get a failure message with error info + assert isinstance(result, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result)}" + ) + result_str = str(result) + assert "nonexistent_col" in result_str, ( + f"Expected 'nonexistent_col' in result message: {result_str}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py new file mode 100644 index 0000000..0578cf0 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_max_rows.py @@ -0,0 +1,485 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_pandas_success_exact_count(): + """Test pandas success case with exact maximum row count.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_below_max(): + """Test pandas success case with row count below maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=10, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_single_row(): + """Test pandas success case with single row and max count of 1.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=1, + ) + data_frame = pd.DataFrame({"col1": [42]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_empty_dataframe(): + """Test pandas success case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=5, + ) + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_failure_exceeds_max(): + """Test pandas failure case when row count exceeds maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 5 rows, expected at most 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_failure_zero_max_with_data(): + """Test pandas failure case with zero max count but data present.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=0, + ) + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 1 rows, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_boundary_zero_max_empty_df(): + """Test pandas boundary case with zero max count and empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=0, + ) + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_large_dataset(): + """Test pandas with larger dataset exceeding maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=100, + ) + # Create DataFrame with 150 rows + data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 150 rows, expected at most 100.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_with_nulls(): + """Test pandas expectation with null values (should still count rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=4, + ) + data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5], "col2": [None, "b", None, "d", None]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 5 rows, expected at most 4.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_success_exact_count(spark): + """Test PySpark success case with exact maximum row count.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_below_max(spark): + """Test PySpark success case with row count below maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=10, + ) + data_frame = spark.createDataFrame( + [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"] + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_single_row(spark): + """Test PySpark success case with single row.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=1, + ) + data_frame = spark.createDataFrame([(42,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_empty_dataframe(spark): + """Test PySpark success case with empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=5, + ) + # Create empty DataFrame with schema + data_frame = spark.createDataFrame([], "col1: int") + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_failure_exceeds_max(spark): + """Test PySpark failure case when row count exceeds maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = spark.createDataFrame( + [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"] + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 5 rows, expected at most 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_failure_zero_max_with_data(spark): + """Test PySpark failure case with zero max count but data present.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=0, + ) + data_frame = spark.createDataFrame([(1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 1 rows, expected at most 0.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_boundary_zero_max_empty_df(spark): + """Test PySpark boundary case with zero max count and empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=0, + ) + data_frame = spark.createDataFrame([], "col1: int") + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_large_dataset(spark): + """Test PySpark with larger dataset exceeding maximum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=50, + ) + # Create DataFrame with 75 rows + data = [(i, f"value_{i}") for i in range(75)] + data_frame = spark.createDataFrame(data, ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 75 rows, expected at most 50.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_with_nulls(spark): + """Test PySpark expectation with null values (should still count rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=4, + ) + data_frame = spark.createDataFrame( + [(1, None), (None, "b"), (3, None), (None, "d"), (5, None)], + ["col1", "col2"], + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 5 rows, expected at most 4.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """Test integration with expectations suite for pandas success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=5) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """Test integration with expectations suite for pandas failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=2) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """Test integration with expectations suite for PySpark success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=5) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """Test integration with expectations suite for PySpark failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_max_rows(max_rows=2) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c"), (4, "d")], ["col1", "col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_expectation_parameter_validation(): + """Test parameter validation for max_rows.""" + # Test with valid parameters + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=100, + ) + assert expectation is not None, "Expected expectation to be created successfully" + + # Test string representation + expectation_str = str(expectation) + assert "100" in expectation_str, f"Expected '100' in expectation string: {expectation_str}" + assert "ExpectationMaxRows" in expectation_str, ( + f"Expected 'ExpectationMaxRows' in expectation string: {expectation_str}" + ) + + +def test_expectation_boundary_conditions(): + """Test various boundary conditions for max_rows.""" + # Test with max_rows = 1 + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=1, + ) + + # Single row - should pass + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + # Two rows - should fail + data_frame = pd.DataFrame({"col1": [1, 2]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 2 rows, expected at most 1.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_multiple_columns(): + """Test expectation with multiple columns (should still count total rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.1, 2.2, 3.3, 4.4], + "col4": [True, False, True, False], + } + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 4 rows, expected at most 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_mixed_data_types(): + """Test expectation with mixed data types in columns.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=10, + ) + data_frame = pd.DataFrame( + { + "int_col": [1, 2, 3, 4, 5], + "str_col": ["a", "b", "c", "d", "e"], + "float_col": [1.1, 2.2, 3.3, 4.4, 5.5], + "bool_col": [True, False, True, False, True], + "null_col": [None, None, None, None, None], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_high_max_rows(): + """Test expectation with very high max_rows value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=1000000, # 1 million + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_identical_values(): + """Test expectation with DataFrame containing identical values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=3, + ) + data_frame = pd.DataFrame( + { + "col1": [42, 42, 42, 42], # All same values + "col2": ["same", "same", "same", "same"], + } + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 4 rows, expected at most 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_edge_case_max_rows_equals_actual(): + """Test edge case where max_rows exactly equals actual row count.""" + for count in [1, 5, 10, 100]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMaxRows", + max_rows=count, + ) + # Create DataFrame with exactly 'count' rows + data_frame = pd.DataFrame({"col1": list(range(count))}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMaxRows") + ), f"Expected success message for count {count} but got: {result}" diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py new file mode 100644 index 0000000..e112ccc --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_min_rows.py @@ -0,0 +1,616 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_pandas_success_exact_count(): + """Test pandas success case with exact minimum row count.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_above_min(): + """Test pandas success case with row count above minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5], "col2": ["a", "b", "c", "d", "e"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_single_row(): + """Test pandas success case with single row and min count of 1.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=1, + ) + data_frame = pd.DataFrame({"col1": [42]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_zero_min_empty_df(): + """Test pandas success case with zero minimum and empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=0, + ) + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_success_zero_min_with_data(): + """Test pandas success case with zero minimum and data present.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=0, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_failure_below_min(): + """Test pandas failure case when row count is below minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=5, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 3 rows, expected at least 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_failure_empty_with_min(): + """Test pandas failure case with empty DataFrame but minimum required.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=2, + ) + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 0 rows, expected at least 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_failure_single_row_needs_more(): + """Test pandas failure case with single row but higher minimum required.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 1 rows, expected at least 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_large_dataset(): + """Test pandas with larger dataset meeting minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=100, + ) + # Create DataFrame with 150 rows + data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_large_dataset_failure(): + """Test pandas with dataset not meeting large minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=200, + ) + # Create DataFrame with 150 rows + data_frame = pd.DataFrame({"col1": range(150), "col2": [f"value_{i}" for i in range(150)]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 150 rows, expected at least 200.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pandas_with_nulls(): + """Test pandas expectation with null values (should still count rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame({"col1": [1, None, 3, None, 5], "col2": [None, "b", None, "d", None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_exact_count(spark): + """Test PySpark success case with exact minimum row count.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_above_min(spark): + """Test PySpark success case with row count above minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = spark.createDataFrame( + [(1, "a"), (2, "b"), (3, "c"), (4, "d"), (5, "e")], ["col1", "col2"] + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_single_row(spark): + """Test PySpark success case with single row.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=1, + ) + data_frame = spark.createDataFrame([(42,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_zero_min_empty_df(spark): + """Test PySpark success case with zero minimum and empty DataFrame.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=0, + ) + # Create empty DataFrame with schema + data_frame = spark.createDataFrame([], "col1: int") + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_success_zero_min_with_data(spark): + """Test PySpark success case with zero minimum and data present.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=0, + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_failure_below_min(spark): + """Test PySpark failure case when row count is below minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=5, + ) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 3 rows, expected at least 5.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_failure_empty_with_min(spark): + """Test PySpark failure case with empty DataFrame but minimum required.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=2, + ) + data_frame = spark.createDataFrame([], "col1: int") + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 0 rows, expected at least 2.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_failure_single_row_needs_more(spark): + """Test PySpark failure case with single row but higher minimum required.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = spark.createDataFrame([(1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 1 rows, expected at least 3.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_large_dataset(spark): + """Test PySpark with larger dataset meeting minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=50, + ) + # Create DataFrame with 75 rows + data = [(i, f"value_{i}") for i in range(75)] + data_frame = spark.createDataFrame(data, ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_large_dataset_failure(spark): + """Test PySpark with dataset not meeting large minimum.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=100, + ) + # Create DataFrame with 75 rows + data = [(i, f"value_{i}") for i in range(75)] + data_frame = spark.createDataFrame(data, ["col1", "col2"]) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="DataFrame has 75 rows, expected at least 100.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_with_nulls(spark): + """Test PySpark expectation with null values (should still count rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = spark.createDataFrame( + [(1, None), (None, "b"), (3, None), (None, "d"), (5, None)], + ["col1", "col2"], + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_suite_pandas_success(): + """Test integration with expectations suite for pandas success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=2) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """Test integration with expectations suite for pandas failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=5) + data_frame = pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """Test integration with expectations suite for PySpark success case.""" + expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=2) + data_frame = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """Test integration with expectations suite for PySpark failure case.""" + expectations_suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=5) + data_frame = spark.createDataFrame([(1, "a"), (2, "b")], ["col1", "col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_expectation_parameter_validation(): + """Test parameter validation for min_rows.""" + # Test with valid parameters + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=10, + ) + assert expectation is not None, "Expected expectation to be created successfully" + + # Test string representation + expectation_str = str(expectation) + assert "10" in expectation_str, f"Expected '10' in expectation string: {expectation_str}" + assert "ExpectationMinRows" in expectation_str, ( + f"Expected 'ExpectationMinRows' in expectation string: {expectation_str}" + ) + + +def test_expectation_boundary_conditions(): + """Test various boundary conditions for min_rows.""" + # Test with min_rows = 1 + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=1, + ) + + # Single row - should pass + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + # Empty DataFrame - should fail + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationFailureMessage), ( + f"Expected DataFrameExpectationFailureMessage but got: {type(result)}" + ) + + +def test_expectation_multiple_columns(): + """Test expectation with multiple columns (should still count total rows).""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": ["a", "b", "c", "d"], + "col3": [1.1, 2.2, 3.3, 4.4], + "col4": [True, False, True, False], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_mixed_data_types(): + """Test expectation with mixed data types in columns.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame( + { + "int_col": [1, 2, 3, 4, 5], + "str_col": ["a", "b", "c", "d", "e"], + "float_col": [1.1, 2.2, 3.3, 4.4, 5.5], + "bool_col": [True, False, True, False, True], + "null_col": [None, None, None, None, None], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_low_min_count(): + """Test expectation with very low min_rows value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=1, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_high_min_count(): + """Test expectation with very high min_rows value.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=1000000, # 1 million + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="DataFrame has 3 rows, expected at least 1000000.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_identical_values(): + """Test expectation with DataFrame containing identical values.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + data_frame = pd.DataFrame( + { + "col1": [42, 42, 42, 42], # All same values + "col2": ["same", "same", "same", "same"], + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_edge_case_min_count_equals_actual(): + """Test edge case where min_rows exactly equals actual row count.""" + for count in [1, 5, 10, 100]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=count, + ) + # Create DataFrame with exactly 'count' rows + data_frame = pd.DataFrame({"col1": list(range(count))}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message for count {count} but got: {result}" + + +def test_expectation_zero_min_count_edge_cases(): + """Test edge cases with zero minimum count.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=0, + ) + + # Empty DataFrame should pass + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + # DataFrame with data should also pass + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_progressive_min_counts(): + """Test expectation with progressively increasing minimum counts.""" + data_frame = pd.DataFrame({"col1": [1, 2, 3, 4, 5]}) # 5 rows + + # Should pass for min_rows <= 5 + for min_rows in [0, 1, 2, 3, 4, 5]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=min_rows, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message for min_rows {min_rows} but got: {result}" + + # Should fail for min_rows > 5 + for min_rows in [6, 7, 10, 100]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=min_rows, + ) + result = expectation.validate(data_frame=data_frame) + + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=f"DataFrame has 5 rows, expected at least {min_rows}.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message for min_rows {min_rows} but got: {result}" + ) + + +def test_expectation_dataframe_structure_irrelevant(): + """Test that DataFrame structure doesn't affect row counting.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationMinRows", + min_rows=3, + ) + + # Single column DataFrame + df1 = pd.DataFrame({"col1": [1, 2, 3]}) + result1 = expectation.validate(data_frame=df1) + + # Multi-column DataFrame + df2 = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"], "col3": [1.1, 2.2, 3.3]}) + result2 = expectation.validate(data_frame=df2) + + # Both should have same result (success) + assert str(result1) == str(result2), f"Expected same results but got: {result1} vs {result2}" + assert str(result1) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationMinRows") + ), f"Expected success message but got: {result1}" diff --git a/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py new file mode 100644 index 0000000..d86c475 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/any_value_expectations/test_expect_unique_rows.py @@ -0,0 +1,577 @@ +import pytest +import pandas as pd +from pyspark.sql.types import IntegerType, StructField, StructType + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + assert expectation.get_expectation_name() == "ExpectationUniqueRows", ( + f"Expected 'ExpectationUniqueRows' but got: {expectation.get_expectation_name()}" + ) + + +# Tests for specific columns - Pandas +def test_expectation_pandas_success_specific_columns(): + """ + Test the expectation for pandas DataFrame with no violations on specific columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 3, 1], + "col2": [10, 20, 30, 20], # Different combination + "col3": [100, 100, 100, 100], # Same values but not checked + } + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations_specific_columns(): + """ + Test the expectation for pandas DataFrame with violations on specific columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 1, 3], + "col2": [10, 20, 10, 30], # Duplicate combination (1, 10) + "col3": [100, 200, 300, 400], + } + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows only one row per duplicate group with count + expected_violations = pd.DataFrame({"col1": [1], "col2": [10], "#duplicates": [2]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Tests for all columns (empty list) - Pandas +def test_expectation_pandas_success_all_columns(): + """ + Test the expectation for pandas DataFrame with no violations on all columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=[], + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 20, 30], "col3": [100, 200, 300]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations_all_columns(): + """ + Test the expectation for pandas DataFrame with violations on all columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=[], + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 1], + "col2": [10, 20, 10], # Duplicate combination (1, 10) + "col3": [100, 200, 100], + } + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows only one row per duplicate group with count + expected_violations = pd.DataFrame( + {"col1": [1], "col2": [10], "col3": [100], "#duplicates": [2]} + ) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Tests for specific columns - PySpark +def test_expectation_pyspark_success_specific_columns(spark): + """ + Test the expectation for PySpark DataFrame with no violations on specific columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = spark.createDataFrame( + [ + (1, 10, 100), + (2, 20, 100), + (3, 30, 100), + (1, 20, 100), # Different combination + ], + ["col1", "col2", "col3"], + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations_specific_columns(spark): + """ + Test the expectation for PySpark DataFrame with violations on specific columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = spark.createDataFrame( + [ + (1, 10, 100), + (2, 20, 200), + (1, 10, 300), # Duplicate combination (1, 10) + (3, 30, 400), + ], + ["col1", "col2", "col3"], + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows only one row per duplicate group with count + expected_violations = spark.createDataFrame([(1, 10, 2)], ["col1", "col2", "#duplicates"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Tests for all columns (empty list) - PySpark +def test_expectation_pyspark_success_all_columns(spark): + """ + Test the expectation for PySpark DataFrame with no violations on all columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=[], + ) + data_frame = spark.createDataFrame( + [(1, 10, 100), (2, 20, 200), (3, 30, 300)], ["col1", "col2", "col3"] + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations_all_columns(spark): + """ + Test the expectation for PySpark DataFrame with violations on all columns. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=[], + ) + data_frame = spark.createDataFrame( + [ + (1, 10, 100), + (2, 20, 200), + (1, 10, 100), + ], # Duplicate combination (1, 10, 100) + ["col1", "col2", "col3"], + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows only one row per duplicate group with count + expected_violations = spark.createDataFrame( + [(1, 10, 100, 2)], ["col1", "col2", "col3", "#duplicates"] + ) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Edge case tests +def test_column_missing_error_pandas(): + """ + Test that an error is raised when specified columns are missing in pandas DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["nonexistent_col"], + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'nonexistent_col' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_column_missing_error_pyspark(spark): + """ + Test that an error is raised when specified columns are missing in PySpark DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["nonexistent_col"], + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message="Column 'nonexistent_col' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_empty_dataframe_pandas(): + """ + Test the expectation on an empty pandas DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + data_frame = pd.DataFrame({"col1": []}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_empty_dataframe_pyspark(spark): + """ + Test the expectation on an empty PySpark DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + + schema = StructType([StructField("col1", IntegerType(), True)]) + data_frame = spark.createDataFrame([], schema) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_single_row_dataframe_pandas(): + """ + Test the expectation on a single-row pandas DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + data_frame = pd.DataFrame({"col1": [1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_single_row_dataframe_pyspark(spark): + """ + Test the expectation on a single-row PySpark DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + data_frame = spark.createDataFrame([(1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationUniqueRows") + ), f"Expected success message but got: {result}" + + +def test_with_nulls_pandas(): + """ + Test the expectation with null values in pandas DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = pd.DataFrame( + { + "col1": [1, None, 1, None], + "col2": [10, None, 20, None], # (None, None) appears twice + } + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows only one row per duplicate group with count + expected_violations = pd.DataFrame({"col1": [None], "col2": [None], "#duplicates": [2]}) + + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_with_nulls_pyspark(spark): + """ + Test the expectation with null values in PySpark DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1", "col2"], + ) + data_frame = spark.createDataFrame( + [ + (1, 10), + (None, None), + (1, 20), + (None, None), # (None, None) appears twice + ], + ["col1", "col2"], + ) + result = expectation.validate(data_frame=data_frame) + + schema = StructType( + [ + StructField("col1", IntegerType(), True), + StructField("col2", IntegerType(), True), + StructField("#duplicates", IntegerType(), True), + ] + ) + # Expected violations shows only one row per duplicate group with count + expected_violations = spark.createDataFrame([(None, None, 2)], schema) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 duplicate row(s). duplicate rows found for columns ['col1', 'col2']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Test with multiple duplicate groups +def test_expectation_pandas_multiple_duplicate_groups(): + """ + Test the expectation with multiple groups of duplicates in pandas DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + data_frame = pd.DataFrame( + { + "col1": [1, 2, 1, 3, 2, 3], # Three groups: (1,1), (2,2), (3,3) + "col2": [10, 20, 30, 40, 50, 60], + } + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows one row per duplicate group with count, ordered by count then by values + expected_violations = pd.DataFrame({"col1": [1, 2, 3], "#duplicates": [2, 2, 2]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 6 duplicate row(s). duplicate rows found for columns ['col1']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_multiple_duplicate_groups(spark): + """ + Test the expectation with multiple groups of duplicates in PySpark DataFrame. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationUniqueRows", + column_names=["col1"], + ) + data_frame = spark.createDataFrame( + [ + (1, 10), + (2, 20), + (1, 30), # Duplicate group 1 + (3, 40), + (2, 50), # Duplicate group 2 + (3, 60), # Duplicate group 3 + ], + ["col1", "col2"], + ) + result = expectation.validate(data_frame=data_frame) + + # Expected violations shows one row per duplicate group with count, ordered by count then by values + expected_violations = spark.createDataFrame([(1, 2), (2, 2), (3, 2)], ["col1", "#duplicates"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 6 duplicate row(s). duplicate rows found for columns ['col1']", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +# Suite-level tests +def test_suite_pandas_success_specific_columns(): + """ + Test the expectation suite for pandas DataFrame with no violations on specific columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"]) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 10, 10]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations_specific_columns(): + """ + Test the expectation suite for pandas DataFrame with violations on specific columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"]) + data_frame = pd.DataFrame({"col1": [1, 1, 3], "col2": [10, 20, 30]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pandas_success_all_columns(): + """ + Test the expectation suite for pandas DataFrame with no violations on all columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[]) + data_frame = pd.DataFrame({"col1": [1, 2, 3], "col2": [10, 20, 30]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations_all_columns(): + """ + Test the expectation suite for pandas DataFrame with violations on all columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[]) + data_frame = pd.DataFrame({"col1": [1, 1, 3], "col2": [10, 10, 30]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success_specific_columns(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations on specific columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"]) + data_frame = spark.createDataFrame([(1, 10), (2, 10), (3, 10)], ["col1", "col2"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations_specific_columns(spark): + """ + Test the expectation suite for PySpark DataFrame with violations on specific columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=["col1"]) + data_frame = spark.createDataFrame([(1, 10), (1, 20), (3, 30)], ["col1", "col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success_all_columns(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations on all columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[]) + data_frame = spark.createDataFrame([(1, 10), (2, 20), (3, 30)], ["col1", "col2"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations_all_columns(spark): + """ + Test the expectation suite for PySpark DataFrame with violations on all columns. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows(column_names=[]) + data_frame = spark.createDataFrame([(1, 10), (1, 10), (3, 30)], ["col1", "col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pandas_column_missing_error(): + """ + Test that an error is raised when specified columns are missing in pandas DataFrame suite. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows( + column_names=["nonexistent_col"] + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when specified columns are missing in PySpark DataFrame suite. + """ + expectations_suite = DataFrameExpectationsSuite().expect_unique_rows( + column_names=["nonexistent_col"] + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py new file mode 100644 index 0000000..fb338a8 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_max_between.py @@ -0,0 +1,364 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name_and_description(): + """Test that the expectation name and description are correctly returned.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="test_col", + min_value=10, + max_value=20, + ) + + # Test expectation name + assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", ( + f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}" + ) + + # Test description + description = expectation.get_description() + assert "maximum" in description, f"Expected 'maximum' in description: {description}" + assert "test_col" in description, f"Expected 'test_col' in description: {description}" + assert "10" in description, f"Expected '10' in description: {description}" + assert "20" in description, f"Expected '20' in description: {description}" + + +def test_pandas_success_registry_and_suite(): + """Test successful validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 30, 40, "basic success case"), + ([35], 30, 40, "single row"), + ([-20, -15, -10, -3], -5, 0, "negative values"), + ([1.1, 2.5, 3.7, 3.8], 3.5, 4.0, "float values"), + ([25, 25, 25, 25], 24, 26, "identical values"), + ([20, 25.5, 30, 37], 35, 40, "mixed data types"), + ([-5, 0, 0, -2], -1, 1, "zero values"), + ([20, None, 35, None, 25], 30, 40, "with nulls"), + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pandas_failure_registry_and_suite(): + """Test failure validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_message) + ( + [20, 25, 30, 35], + 40, + 50, + "Column 'col1' maximum value 35 is not between 40 and 50.", + ), + ([None, None, None], 30, 40, "Column 'col1' contains only null values."), + ([], 30, 40, "Column 'col1' contains only null values."), + ] + + for data, min_val, max_val, expected_message in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for data {data}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pandas_missing_column_registry_and_suite(): + """Test missing column error for pandas DataFrames through both registry and suite.""" + data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]}) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="nonexistent_col", + min_value=30, + max_value=40, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="nonexistent_col", min_value=30, max_value=40 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_success_registry_and_suite(spark): + """Test successful validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 30, 40, "basic success case"), + ([35], 30, 40, "single row"), + ([-20, -15, -10, -3], -5, 0, "negative values"), + ([20, None, 35, None, 25], 30, 40, "with nulls"), + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pyspark_failure_registry_and_suite(spark): + """Test failure validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_message) + ( + [20, 25, 30, 35], + 40, + 50, + "Column 'col1' maximum value 35 is not between 40 and 50.", + ), + ] + + for data, min_val, max_val, expected_message in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_null_scenarios_registry_and_suite(spark): + """Test null scenarios for PySpark DataFrames through both registry and suite.""" + from pyspark.sql.types import IntegerType, StructField, StructType + + # Test scenarios + test_scenarios = [ + # (data_frame_creation, expected_message, description) + ( + lambda: spark.createDataFrame( + [{"col1": None}, {"col1": None}, {"col1": None}], + schema="struct", + ), + "Column 'col1' contains only null values.", + "all nulls", + ), + ( + lambda: spark.createDataFrame( + [], StructType([StructField("col1", IntegerType(), True)]) + ), + "Column 'col1' contains only null values.", + "empty dataframe", + ), + ] + + for df_creator, expected_message, description in test_scenarios: + data_frame = df_creator() + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=30, + max_value=40, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="col1", min_value=30, max_value=40 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_missing_column_registry_and_suite(spark): + """Test missing column error for PySpark DataFrames through both registry and suite.""" + data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"]) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="nonexistent_col", + min_value=30, + max_value=40, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_max_between( + column_name="nonexistent_col", min_value=30, max_value=40 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_boundary_values_both_dataframes(spark): + """Test boundary values for both pandas and PySpark DataFrames.""" + test_data = [20, 25, 30, 35] # max = 35 + + # Test exact minimum boundary + for df_type, data_frame in [ + ("pandas", pd.DataFrame({"col1": test_data})), + ("pyspark", spark.createDataFrame([(val,) for val in test_data], ["col1"])), + ]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=35, # exact minimum boundary + max_value=40, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Minimum boundary test failed for {df_type}: expected success but got {type(result)}" + ) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=30, + max_value=35, # exact maximum boundary + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Maximum boundary test failed for {df_type}: expected success but got {type(result)}" + ) + + +def test_suite_chaining(): + """Test that the suite method returns self for method chaining.""" + suite = DataFrameExpectationsSuite() + result = suite.expect_column_max_between(column_name="col1", min_value=30, max_value=40) + assert result is suite, f"Expected suite chaining to return same instance but got: {result}" + + +def test_large_dataset_performance(): + """Test the expectation with a larger dataset to ensure performance.""" + import numpy as np + + # Create a larger dataset with max around 60 + large_data = np.random.uniform(10, 60, 1000).tolist() + data_frame = pd.DataFrame({"col1": large_data}) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMaxBetween", + column_name="col1", + min_value=55, + max_value=65, + ) + + result = expectation.validate(data_frame=data_frame) + # Should succeed as the max of uniform(10, 60) should be around 60 + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Large dataset test failed: expected success but got {type(result)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py new file mode 100644 index 0000000..7e25ffa --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_mean_between.py @@ -0,0 +1,433 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name_and_description(): + """Test that the expectation name and description are correctly returned.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="test_col", + min_value=10, + max_value=20, + ) + + # Test expectation name + assert expectation.get_expectation_name() == "ExpectationColumnMeanBetween", ( + f"Expected 'ExpectationColumnMeanBetween' but got: {expectation.get_expectation_name()}" + ) + + # Test description + description = expectation.get_description() + assert "mean" in description, f"Expected 'mean' in description: {description}" + assert "test_col" in description, f"Expected 'test_col' in description: {description}" + assert "10" in description, f"Expected '10' in description: {description}" + assert "20" in description, f"Expected '20' in description: {description}" + + +def test_pandas_success_registry_and_suite(): + """Test successful validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 25, 30, "basic success case"), # mean = 27.5 + ([25], 20, 30, "single row"), # mean = 25 + ([-20, -15, -10, -5], -15, -10, "negative values"), # mean = -12.5 + ([1.1, 2.5, 3.7, 3.8], 2.5, 3.0, "float values"), # mean = 2.775 + ([25, 25, 25, 25], 24, 26, "identical values"), # mean = 25 + ([20, 25.5, 30, 37], 27, 29, "mixed data types"), # mean = 28.125 + ([-5, 0, 0, 5], -2, 2, "with zeros"), # mean = 0 + ( + [20, None, 30, None, 40], + 25, + 35, + "with nulls", + ), # mean = 30 (nulls ignored) + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnMeanBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pandas_failure_registry_and_suite(): + """Test failure validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_mean, description) + ([20, 25, 30, 35], 30, 35, 27.5, "mean too low"), + ([20, 25, 30, 35], 20, 25, 27.5, "mean too high"), + ([None, None, None], 25, 30, None, "all nulls"), + ([], 25, 30, None, "empty dataframe"), + ] + + for data, min_val, max_val, expected_mean, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Determine expected message + if expected_mean is None: + expected_message = "Column 'col1' contains only null values." + else: + expected_message = ( + f"Column 'col1' mean value {expected_mean} is not between {min_val} and {max_val}." + ) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pandas_missing_column_registry_and_suite(): + """Test missing column error for pandas DataFrames through both registry and suite.""" + data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]}) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="nonexistent_col", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="nonexistent_col", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_success_registry_and_suite(spark): + """Test successful validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 25, 30, "basic success case"), # mean = 27.5 + ([25], 20, 30, "single row"), # mean = 25 + ([-20, -15, -10, -5], -15, -10, "negative values"), # mean = -12.5 + ([20, None, 30, None, 40], 25, 35, "with nulls"), # mean = 30 + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnMeanBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pyspark_failure_registry_and_suite(spark): + """Test failure validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_mean, description) + ([20, 25, 30, 35], 30, 35, 27.5, "mean too low"), + ([20, 25, 30, 35], 20, 25, 27.5, "mean too high"), + ] + + for data, min_val, max_val, expected_mean, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + expected_message = ( + f"Column 'col1' mean value {expected_mean} is not between {min_val} and {max_val}." + ) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_null_scenarios_registry_and_suite(spark): + """Test null scenarios for PySpark DataFrames through both registry and suite.""" + from pyspark.sql.types import IntegerType, StructField, StructType + + # Test scenarios + test_scenarios = [ + # (data_frame_creation, expected_message, description) + ( + lambda: spark.createDataFrame( + [{"col1": None}, {"col1": None}, {"col1": None}], + schema="struct", + ), + "Column 'col1' contains only null values.", + "all nulls", + ), + ( + lambda: spark.createDataFrame( + [], StructType([StructField("col1", IntegerType(), True)]) + ), + "Column 'col1' contains only null values.", + "empty dataframe", + ), + ] + + for df_creator, expected_message, description in test_scenarios: + data_frame = df_creator() + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="col1", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_missing_column_registry_and_suite(spark): + """Test missing column error for PySpark DataFrames through both registry and suite.""" + data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"]) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="nonexistent_col", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_mean_between( + column_name="nonexistent_col", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_boundary_values_both_dataframes(spark): + """Test boundary values for both pandas and PySpark DataFrames.""" + test_data = [20, 25, 30, 35] # mean = 27.5 + + # Test boundary scenarios + boundary_tests = [ + (27.5, 30, "exact minimum boundary"), # mean exactly at min + (25, 27.5, "exact maximum boundary"), # mean exactly at max + ] + + for min_val, max_val, boundary_desc in boundary_tests: + for df_type, data_frame in [ + ("pandas", pd.DataFrame({"col1": test_data})), + ( + "pyspark", + spark.createDataFrame([(val,) for val in test_data], ["col1"]), + ), + ]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}" + ) + + +def test_precision_handling(): + """Test mean calculation precision with various numeric types.""" + # Test scenarios with different levels of precision + precision_tests = [ + # (data, description) + ([1.1111, 2.2222, 3.3333], "high precision decimals"), + ([1, 2, 3, 4, 5, 6, 7, 8, 9], "integer sequence"), + ([0.1, 0.2, 0.3, 0.4, 0.5], "decimal sequence"), + ([1e-6, 2e-6, 3e-6], "scientific notation"), + ] + + for data, description in precision_tests: + data_frame = pd.DataFrame({"col1": data}) + calculated_mean = sum(data) / len(data) + + # Use a range around the calculated mean + min_val = calculated_mean - 0.1 + max_val = calculated_mean + 0.1 + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Precision test failed for {description}: expected success but got {type(result)}" + ) + + +def test_suite_chaining(): + """Test that the suite method returns self for method chaining.""" + suite = DataFrameExpectationsSuite() + result = suite.expect_column_mean_between(column_name="col1", min_value=25, max_value=30) + assert result is suite, f"Expected suite chaining to return same instance but got: {result}" + + +def test_large_dataset_performance(): + """Test the expectation with a larger dataset to ensure performance.""" + import numpy as np + + # Create a larger dataset with mean around 50 + large_data = np.random.normal(50, 10, 1000).tolist() + data_frame = pd.DataFrame({"col1": large_data}) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=45, + max_value=55, + ) + + result = expectation.validate(data_frame=data_frame) + # Should succeed as the mean of normal(50, 10) should be around 50 + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Large dataset test failed: expected success but got {type(result)}" + ) + + +def test_outlier_handling(spark): + """Test mean calculation with outliers.""" + # Test data with outliers + outlier_scenarios = [ + # (data, min_val, max_val, description) + ([1, 2, 3, 100], 20, 30, "single high outlier"), # mean = 26.5 + ([-100, 10, 20, 30], -15, -5, "single low outlier"), # mean = -10 + ([1, 2, 3, 4, 5, 1000], 150, 200, "extreme outlier"), # mean ≈ 169.17 + ] + + for data, min_val, max_val, description in outlier_scenarios: + # Test with pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMeanBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas outlier test failed for {description}: expected success but got {type(result)}" + ) + + # Test with PySpark + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py new file mode 100644 index 0000000..786cfb5 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_median_between.py @@ -0,0 +1,511 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name_and_description(): + """Test that the expectation name and description are correctly returned.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="test_col", + min_value=10, + max_value=20, + ) + + # Test expectation name (should delegate to quantile expectation) + assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", ( + f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}" + ) + + # Test description + description = expectation.get_description() + assert "median" in description, f"Expected 'median' in description: {description}" + assert "test_col" in description, f"Expected 'test_col' in description: {description}" + assert "10" in description, f"Expected '10' in description: {description}" + assert "20" in description, f"Expected '20' in description: {description}" + + # Test that quantile is correctly set to 0.5 + assert expectation.quantile == 0.5, ( + f"Expected quantile to be 0.5 but got: {expectation.quantile}" + ) + assert expectation.quantile_desc == "median", ( + f"Expected quantile_desc to be 'median' but got: {expectation.quantile_desc}" + ) + + +def test_pandas_success_registry_and_suite(): + """Test successful validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 25, 30, "basic success case"), # median = 27.5 + ([25], 20, 30, "single row"), # median = 25 + ([-20, -15, -10, -5], -15, -10, "negative values"), # median = -12.5 + ([1.1, 2.5, 3.7, 3.8], 2.5, 3.5, "float values"), # median = 3.1 + ([25, 25, 25, 25], 24, 26, "identical values"), # median = 25 + ([20, 25.5, 30, 37], 27, 29, "mixed data types"), # median = 27.75 + ([-5, 0, 0, 5], -1, 1, "with zeros"), # median = 0 + ( + [20, None, 30, None, 40], + 25, + 35, + "with nulls", + ), # median = 30 (nulls ignored) + ([10, 20, 30], 19, 21, "odd number of values"), # median = 20 + ([10, 20, 30, 40], 24, 26, "even number of values"), # median = 25 + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pandas_failure_registry_and_suite(): + """Test failure validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_median, description) + ([20, 25, 30, 35], 30, 35, 27.5, "median too low"), + ([20, 25, 30, 35], 20, 25, 27.5, "median too high"), + ([10, 20, 30], 25, 30, 20.0, "odd count median out of range"), + ([None, None, None], 25, 30.0, None, "all nulls"), + ([], 25, 30.0, None, "empty dataframe"), + ] + + for data, min_val, max_val, expected_median, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Determine expected message + if expected_median is None: + expected_message = "Column 'col1' contains only null values." + else: + expected_message = f"Column 'col1' median value {expected_median} is not between {min_val} and {max_val}." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pandas_missing_column_registry_and_suite(): + """Test missing column error for pandas DataFrames through both registry and suite.""" + data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]}) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="nonexistent_col", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="nonexistent_col", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_success_registry_and_suite(spark): + """Test successful validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 25, 30, "basic success case"), # median ≈ 27.5 + ([25], 20, 30, "single row"), # median = 25 + ([-20, -15, -10, -5], -15, -10, "negative values"), # median ≈ -12.5 + ([20, None, 30, None, 40], 25, 35, "with nulls"), # median ≈ 30 + ([10, 20, 30], 19, 21, "odd number of values"), # median ≈ 20 + ([10, 20, 30, 40], 24, 26, "even number of values"), # median ≈ 25 + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pyspark_failure_registry_and_suite(spark): + """Test failure validation for PySpark DataFrames through both registry and suite.""" + import numpy as np + + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 30, 35, "median too low"), + ([20, 25, 30, 35], 20, 25, "median too high"), + ([10, 20, 30], 25, 30, "odd count median out of range"), + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Calculate expected median for error message + expected_median = np.median(data) + expected_message = ( + f"Column 'col1' median value {expected_median} is not between {min_val} and {max_val}." + ) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_null_scenarios_registry_and_suite(spark): + """Test null scenarios for PySpark DataFrames through both registry and suite.""" + from pyspark.sql.types import IntegerType, StructField, StructType + + # Test scenarios + test_scenarios = [ + # (data_frame_creation, expected_message, description) + ( + lambda: spark.createDataFrame( + [{"col1": None}, {"col1": None}, {"col1": None}], + schema="struct", + ), + "Column 'col1' contains only null values.", + "all nulls", + ), + ( + lambda: spark.createDataFrame( + [], StructType([StructField("col1", IntegerType(), True)]) + ), + "Column 'col1' contains only null values.", + "empty dataframe", + ), + ] + + for df_creator, expected_message, description in test_scenarios: + data_frame = df_creator() + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="col1", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_missing_column_registry_and_suite(spark): + """Test missing column error for PySpark DataFrames through both registry and suite.""" + data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"]) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="nonexistent_col", + min_value=25, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_median_between( + column_name="nonexistent_col", min_value=25, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_boundary_values_both_dataframes(spark): + """Test boundary values for both pandas and PySpark DataFrames.""" + test_data = [20, 25, 30, 35] # median = 27.5 + + # Test boundary scenarios + boundary_tests = [ + (27.5, 30, "exact minimum boundary"), # median exactly at min + (25, 27.5, "exact maximum boundary"), # median exactly at max + ] + + for min_val, max_val, boundary_desc in boundary_tests: + for df_type, data_frame in [ + ("pandas", pd.DataFrame({"col1": test_data})), + ( + "pyspark", + spark.createDataFrame([(val,) for val in test_data], ["col1"]), + ), + ]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}" + ) + + +def test_median_calculation_specifics(spark): + """Test median calculation specifics for odd vs even number of elements.""" + median_scenarios = [ + # (data, expected_median, description) + ([1, 2, 3], 2, "odd count - middle element"), + ([1, 2, 3, 4], 2.5, "even count - average of middle two"), + ([5], 5, "single element"), + ([10, 10, 10], 10, "all identical values"), + ([1, 100], 50.5, "two elements - average"), + ([1, 2, 100], 2, "odd count with outlier"), + ([1, 2, 99, 100], 50.5, "even count with outliers"), + ] + + for data, expected_median, description in median_scenarios: + # Set bounds around expected median + min_val = expected_median - 0.1 + max_val = expected_median + 0.1 + + # Test pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas median test failed for {description}: expected success but got {type(result)}" + ) + + # Test PySpark (for non-single element cases) + if len(data) > 1: + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark median test failed for {description}: expected success but got {type(result_pyspark)}" + ) + + +def test_precision_handling(): + """Test median calculation precision with various numeric types.""" + # Test scenarios with different levels of precision + precision_tests = [ + # (data, description) + ([1.1111, 2.2222, 3.3333], "high precision decimals"), + ([0.1, 0.2, 0.3, 0.4, 0.5], "decimal sequence"), + ([1e-6, 2e-6, 3e-6, 4e-6, 5e-6], "scientific notation"), + ([1.0, 1.5, 2.0, 2.5, 3.0], "half increments"), + ] + + for data, description in precision_tests: + data_frame = pd.DataFrame({"col1": data}) + import numpy as np + + calculated_median = np.median(data) + + # Use a small range around the calculated median + min_val = calculated_median - 0.001 + max_val = calculated_median + 0.001 + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Precision test failed for {description}: expected success but got {type(result)}" + ) + + +def test_suite_chaining(): + """Test that the suite method returns self for method chaining.""" + suite = DataFrameExpectationsSuite() + result = suite.expect_column_median_between(column_name="col1", min_value=25, max_value=30) + assert result is suite, f"Expected suite chaining to return same instance but got: {result}" + + +def test_large_dataset_performance(): + """Test the expectation with a larger dataset to ensure performance.""" + import numpy as np + + # Create a larger dataset with median around 50 + large_data = np.random.normal(50, 10, 1001).tolist() # Use odd count for deterministic median + data_frame = pd.DataFrame({"col1": large_data}) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=45, + max_value=55, + ) + + result = expectation.validate(data_frame=data_frame) + # Should succeed as the median of normal(50, 10) should be around 50 + assert isinstance(result, DataFrameExpectationSuccessMessage) + + +def test_outlier_resistance(spark): + """Test that median is resistant to outliers (unlike mean).""" + # Test data where median is stable despite extreme outliers + outlier_scenarios = [ + # (data, min_val, max_val, description) + ( + [1, 2, 3, 1000], + 1.5, + 2.5, + "high outlier doesn't affect median", + ), # median = 2.5 + ( + [-1000, 10, 20, 30], + 14, + 16, + "low outlier doesn't affect median", + ), # median = 15 + ( + [1, 2, 3, 4, 5, 1000000], + 2.5, + 3.5, + "extreme outlier ignored", + ), # median = 3.5 + ( + [-1000000, 1, 2, 3, 4, 5], + 2.5, + 3.5, + "extreme negative outlier ignored", + ), # median = 2.5 + ] + + for data, min_val, max_val, description in outlier_scenarios: + # Test with pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMedianBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas outlier test failed for {description}: expected success but got {type(result)}" + ) + + # Test with PySpark + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py new file mode 100644 index 0000000..fe438f2 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_min_between.py @@ -0,0 +1,480 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name_and_description(): + """Test that the expectation name and description are correctly returned.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="test_col", + min_value=10, + max_value=20, + ) + + # Test expectation name (should delegate to quantile expectation) + assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", ( + f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}" + ) + + # Test description + description = expectation.get_description() + assert "minimum" in description, f"Expected 'minimum' in description: {description}" + assert "test_col" in description, f"Expected 'test_col' in description: {description}" + assert "10" in description, f"Expected '10' in description: {description}" + assert "20" in description, f"Expected '20' in description: {description}" + + # Test that quantile is correctly set to 0.0 + assert expectation.quantile == 0.0, ( + f"Expected quantile to be 0.0 but got: {expectation.quantile}" + ) + assert expectation.quantile_desc == "minimum", ( + f"Expected quantile_desc to be 'minimum' but got: {expectation.quantile_desc}" + ) + + +def test_pandas_success_registry_and_suite(): + """Test successful validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 15, 25, "basic success case"), # min = 20 + ([25], 20, 30, "single row"), # min = 25 + ([-20, -15, -10, -5], -25, -15, "negative values"), # min = -20 + ([1.1, 2.5, 3.7, 3.8], 1.0, 1.5, "float values"), # min = 1.1 + ([25, 25, 25, 25], 24, 26, "identical values"), # min = 25 + ([20, 25.5, 30, 37], 15, 25, "mixed data types"), # min = 20 + ([-5, 0, 0, 2], -10, -1, "with zeros"), # min = -5 + ( + [20, None, 35, None, 25], + 15, + 25, + "with nulls", + ), # min = 20 (nulls ignored) + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pandas_failure_registry_and_suite(): + """Test failure validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_min, description) + ([20, 25, 30, 35], 25, 35, 20, "minimum too low"), + ([20, 25, 30, 35], 10, 15, 20, "minimum too high"), + ([None, None, None], 15, 25, None, "all nulls"), + ([], 15, 25, None, "empty dataframe"), + ] + + for data, min_val, max_val, expected_min, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Determine expected message + if expected_min is None: + expected_message = "Column 'col1' contains only null values." + else: + expected_message = f"Column 'col1' minimum value {expected_min} is not between {min_val} and {max_val}." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pandas_missing_column_registry_and_suite(): + """Test missing column error for pandas DataFrames through both registry and suite.""" + data_frame = pd.DataFrame({"col1": [20, 25, 30, 35]}) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="nonexistent_col", + min_value=15, + max_value=25, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="nonexistent_col", min_value=15, max_value=25 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_success_registry_and_suite(spark): + """Test successful validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, description) + ([20, 25, 30, 35], 15, 25, "basic success case"), # min = 20 + ([25], 20, 30, "single row"), # min = 25 + ([-20, -15, -10, -5], -25, -15, "negative values"), # min = -20 + ([20, None, 35, None, 25], 15, 25, "with nulls"), # min = 20 + ] + + for data, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pyspark_failure_registry_and_suite(spark): + """Test failure validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios + test_scenarios = [ + # (data, min_value, max_value, expected_min, description) + ([20, 25, 30, 35], 25, 35, 20, "minimum too low"), + ([20, 25, 30, 35], 10, 15, 20, "minimum too high"), + ] + + for data, min_val, max_val, expected_min, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + expected_message = ( + f"Column 'col1' minimum value {expected_min} is not between {min_val} and {max_val}." + ) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="col1", min_value=min_val, max_value=max_val + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_null_scenarios_registry_and_suite(spark): + """Test null scenarios for PySpark DataFrames through both registry and suite.""" + from pyspark.sql.types import IntegerType, StructField, StructType + + # Test scenarios + test_scenarios = [ + # (data_frame_creation, expected_message, description) + ( + lambda: spark.createDataFrame( + [{"col1": None}, {"col1": None}, {"col1": None}], + schema="struct", + ), + "Column 'col1' contains only null values.", + "all nulls", + ), + ( + lambda: spark.createDataFrame( + [], StructType([StructField("col1", IntegerType(), True)]) + ), + "Column 'col1' contains only null values.", + "empty dataframe", + ), + ] + + for df_creator, expected_message, description in test_scenarios: + data_frame = df_creator() + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=15, + max_value=25, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="col1", min_value=15, max_value=25 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_missing_column_registry_and_suite(spark): + """Test missing column error for PySpark DataFrames through both registry and suite.""" + data_frame = spark.createDataFrame([(20,), (25,), (30,), (35,)], ["col1"]) + expected_message = "Column 'nonexistent_col' does not exist in the DataFrame." + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="nonexistent_col", + min_value=15, + max_value=25, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_min_between( + column_name="nonexistent_col", min_value=15, max_value=25 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_boundary_values_both_dataframes(spark): + """Test boundary values for both pandas and PySpark DataFrames.""" + test_data = [20, 25, 30, 35] # min = 20 + + # Test boundary scenarios + boundary_tests = [ + (20, 25, "exact minimum boundary"), # min exactly at min + (15, 20, "exact maximum boundary"), # min exactly at max + ] + + for min_val, max_val, boundary_desc in boundary_tests: + for df_type, data_frame in [ + ("pandas", pd.DataFrame({"col1": test_data})), + ( + "pyspark", + spark.createDataFrame([(val,) for val in test_data], ["col1"]), + ), + ]: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Boundary test failed for {df_type} with {boundary_desc}: expected success but got {type(result)}" + ) + + +def test_minimum_specific_scenarios(spark): + """Test minimum-specific scenarios including edge cases.""" + min_scenarios = [ + # (data, expected_min, description) + ([100, 50, 75, 25], 25, "minimum with mixed order"), + ([0, 1, 2, 3], 0, "minimum is zero"), + ([-10, -5, -1, -20], -20, "minimum with negatives"), + ([1.001, 1.002, 1.003], 1.001, "minimum with small differences"), + ([1e6, 1e5, 1e4], 1e4, "minimum with large numbers"), + ([1e-6, 1e-5, 1e-4], 1e-6, "minimum with very small numbers"), + ] + + for data, expected_min, description in min_scenarios: + # Set bounds around expected minimum + min_val = expected_min - 0.1 + max_val = expected_min + 0.1 + + # Test pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas minimum test failed for {description}: expected success but got {type(result)}" + ) + + # Test PySpark + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark minimum test failed for {description}: expected success but got {type(result_pyspark)}" + ) + + +def test_suite_chaining(): + """Test that the suite method returns self for method chaining.""" + suite = DataFrameExpectationsSuite() + result = suite.expect_column_min_between(column_name="col1", min_value=15, max_value=25) + assert result is suite, f"Expected suite chaining to return same instance but got: {result}" + + +def test_large_dataset_performance(): + """Test the expectation with a larger dataset to ensure performance.""" + import numpy as np + + # Create a larger dataset with minimum around 10 + large_data = np.random.uniform(10, 60, 1000).tolist() + data_frame = pd.DataFrame({"col1": large_data}) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=9, + max_value=12, + ) + + result = expectation.validate(data_frame=data_frame) + # Should succeed as the minimum of uniform(10, 60) should be around 10 + assert isinstance(result, DataFrameExpectationSuccessMessage) + + +def test_outlier_impact_on_minimum(spark): + """Test how outliers affect minimum values (unlike median, minimum is sensitive to outliers).""" + # Test data where outliers affect the minimum + outlier_scenarios = [ + # (data, min_val, max_val, description) + ([1, 2, 3, -1000], -1100, -900, "extreme low outlier becomes minimum"), + ([100, 200, 300, 50], 40, 60, "outlier changes minimum significantly"), + ([1.5, 2.0, 2.5, 0.1], 0.05, 0.15, "small outlier affects minimum"), + ] + + for data, min_val, max_val, description in outlier_scenarios: + # Test with pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas outlier test failed for {description}: expected success but got {type(result)}" + ) + + # Test with PySpark + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark outlier test failed for {description}: expected success but got {type(result_pyspark)}" + ) + + +def test_edge_case_single_unique_value(spark): + """Test minimum when all values are the same.""" + # When all values are identical, min = max = that value + identical_scenarios = [ + ([42, 42, 42, 42], "integer repetition"), + ([3.14, 3.14, 3.14], "float repetition"), + ([-7, -7, -7, -7, -7], "negative repetition"), + ([0, 0, 0], "zero repetition"), + ] + + for data, description in identical_scenarios: + expected_value = data[0] # All values are the same + min_val = expected_value - 0.1 + max_val = expected_value + 0.1 + + # Test pandas + data_frame = pd.DataFrame({"col1": data}) + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnMinBetween", + column_name="col1", + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Pandas identical values test failed for {description}: expected success but got {type(result)}" + ) + + # Test PySpark + pyspark_df = spark.createDataFrame([(val,) for val in data], ["col1"]) + result_pyspark = expectation.validate(data_frame=pyspark_df) + assert isinstance(result_pyspark, DataFrameExpectationSuccessMessage), ( + f"PySpark identical values test failed for {description}: expected success but got {type(result_pyspark)}" + ) diff --git a/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py new file mode 100644 index 0000000..d2e59d2 --- /dev/null +++ b/tests/expectations_implemented/aggregation_expectations/numerical_expectations/test_expect_column_quantile_between.py @@ -0,0 +1,393 @@ +import pytest +import numpy as np +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name_and_description(): + """Test that the expectation name and description are correctly returned.""" + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="test_col", + quantile=0.5, + min_value=20, + max_value=30, + ) + + # Test expectation name + assert expectation.get_expectation_name() == "ExpectationColumnQuantileBetween", ( + f"Expected 'ExpectationColumnQuantileBetween' but got: {expectation.get_expectation_name()}" + ) + + # Test description messages for different quantiles + test_cases = [ + (0.0, "minimum"), + (0.25, "25th percentile"), + (0.5, "median"), + (0.75, "75th percentile"), + (1.0, "maximum"), + (0.9, "0.9 quantile"), + ] + + for quantile, expected_desc in test_cases: + exp = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="test_col", + quantile=quantile, + min_value=10, + max_value=20, + ) + assert exp.quantile_desc == expected_desc, ( + f"Expected quantile_desc '{expected_desc}' for quantile {quantile} but got: {exp.quantile_desc}" + ) + assert expected_desc in exp.get_description(), ( + f"Expected '{expected_desc}' in description: {exp.get_description()}" + ) + + +def test_pandas_success_registry_and_suite(): + """Test successful validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios for different quantiles + test_scenarios = [ + # (data, quantile, min_value, max_value, description) + ([20, 25, 30, 35], 0.0, 15, 25, "minimum success"), # min = 20 + ([20, 25, 30, 35], 1.0, 30, 40, "maximum success"), # max = 35 + ([20, 25, 30, 35], 0.5, 25, 30, "median success"), # median = 27.5 + ([20, 25, 30, 35], 0.25, 20, 25, "25th percentile success"), # 25th = 22.5 + ([10, 20, 30, 40, 50], 0.33, 20, 30, "33rd percentile success"), # ~23.2 + ([25], 0.5, 20, 30, "single row median"), # median = 25 + ([20, None, 25, None, 30], 0.5, 20, 30, "with nulls median"), # median = 25 + ] + + for data, quantile, min_val, max_val, description in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_quantile_between( + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pandas_failure_registry_and_suite(): + """Test failure validation for pandas DataFrames through both registry and suite.""" + # Test data scenarios for different quantiles + test_scenarios = [ + # (data, quantile, min_value, max_value, expected_message) + ( + [20, 25, 30, 35], + 0.0, + 25, + 35, + "Column 'col1' minimum value 20 is not between 25 and 35.", + ), + ( + [20, 25, 30, 35], + 1.0, + 40, + 50, + "Column 'col1' maximum value 35 is not between 40 and 50.", + ), + ( + [20, 25, 30, 35], + 0.5, + 30, + 35, + "Column 'col1' median value 27.5 is not between 30 and 35.", + ), + ( + [20, 25, 30, 35], + 0.75, + 25, + 30, + f"Column 'col1' 75th percentile value {np.quantile([20, 25, 30, 35], 0.75)} is not between 25 and 30.", + ), + ( + [None, None, None], + 0.5, + 20, + 30, + "Column 'col1' contains only null values.", + ), + ([], 0.5, 20, 30, "Column 'col1' contains only null values."), + ] + + for data, quantile, min_val, max_val, expected_message in test_scenarios: + data_frame = pd.DataFrame({"col1": data}) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for quantile {quantile}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_quantile_between( + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_success_registry_and_suite(spark): + """Test successful validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios for different quantiles + test_scenarios = [ + # (data, quantile, min_value, max_value, description) + ([20, 25, 30, 35], 0.0, 15, 25, "minimum success"), # min = 20 + ([20, 25, 30, 35], 1.0, 30, 40, "maximum success"), # max = 35 + ([20, 25, 30, 35], 0.5, 25, 30, "median success"), # median ≈ 27.5 + ([20, 25, 30, 35], 0.9, 30, 40, "90th percentile success"), # ≈ 34 + ([25], 0.5, 20, 30, "single row median"), # median = 25 + ([20, None, 25, None, 30], 0.5, 20, 30, "with nulls median"), # median ≈ 25 + ] + + for data, quantile, min_val, max_val, description in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationColumnQuantileBetween") + ), f"Registry test failed for {description}: expected success but got {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_quantile_between( + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + suite_result = suite.run(data_frame=data_frame) + assert suite_result is None, ( + f"Suite test failed for {description}: expected None but got {suite_result}" + ) + + +def test_pyspark_failure_registry_and_suite(spark): + """Test failure validation for PySpark DataFrames through both registry and suite.""" + # Test data scenarios for different quantiles + test_scenarios = [ + # (data, quantile, min_value, max_value, expected_message) + ( + [20, 25, 30, 35], + 0.0, + 25, + 35, + "Column 'col1' minimum value 20 is not between 25 and 35.", + ), + ( + [20, 25, 30, 35], + 1.0, + 40, + 50, + "Column 'col1' maximum value 35 is not between 40 and 50.", + ), + ( + [20, 25, 30, 35], + 0.5, + 30, + 35, + f"Column 'col1' median value {np.median([20, 25, 30, 35])} is not between 30 and 35.", + ), + ] + + for data, quantile, min_val, max_val, expected_message in test_scenarios: + data_frame = spark.createDataFrame([(val,) for val in data], ["col1"]) + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), f"Expected failure message but got: {result}" + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_quantile_between( + column_name="col1", + quantile=quantile, + min_value=min_val, + max_value=max_val, + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_pyspark_null_scenarios_registry_and_suite(spark): + """Test null scenarios for PySpark DataFrames through both registry and suite.""" + from pyspark.sql.types import IntegerType, StructField, StructType + + # Test scenarios + test_scenarios = [ + # (data_frame_creation, expected_message, description) + ( + lambda: spark.createDataFrame( + [{"col1": None}, {"col1": None}, {"col1": None}], + schema="struct", + ), + "Column 'col1' contains only null values.", + "all nulls", + ), + ( + lambda: spark.createDataFrame( + [], StructType([StructField("col1", IntegerType(), True)]) + ), + "Column 'col1' contains only null values.", + "empty dataframe", + ), + ] + + for df_creator, expected_message, description in test_scenarios: + data_frame = df_creator() + + # Test through registry + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=0.5, + min_value=20, + max_value=30, + ) + result = expectation.validate(data_frame=data_frame) + expected_failure = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + message=expected_message, + ) + assert str(result) == str(expected_failure), ( + f"Registry test failed for {description}: expected failure message but got {result}" + ) + + # Test through suite + suite = DataFrameExpectationsSuite().expect_column_quantile_between( + column_name="col1", quantile=0.5, min_value=20, max_value=30 + ) + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_frame) + + +def test_invalid_quantile_range(): + """Test that invalid quantile values raise ValueError.""" + invalid_quantiles = [ + (1.5, "greater than 1.0"), + (-0.1, "less than 0.0"), + ] + + for invalid_quantile, description in invalid_quantiles: + with pytest.raises(ValueError) as context: + DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=invalid_quantile, + min_value=20, + max_value=30, + ) + assert "Quantile must be between 0.0 and 1.0" in str(context.value), ( + f"Expected quantile validation error for {description} but got: {str(context.value)}" + ) + + +def test_boundary_quantile_values(): + """Test quantile values at the boundaries (0.0 and 1.0).""" + boundary_cases = [ + (0.0, "minimum"), + (1.0, "maximum"), + ] + + for quantile, expected_desc in boundary_cases: + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=quantile, + min_value=15, + max_value=25, + ) + assert expectation.quantile == quantile, ( + f"Expected quantile {quantile} but got: {expectation.quantile}" + ) + assert expectation.quantile_desc == expected_desc, ( + f"Expected quantile_desc '{expected_desc}' but got: {expectation.quantile_desc}" + ) + + +def test_large_dataset_performance(): + """Test the expectation with a larger dataset to ensure performance.""" + # Create a larger dataset + large_data = np.random.normal(50, 10, 1000).tolist() + data_frame = pd.DataFrame({"col1": large_data}) + + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationColumnQuantileBetween", + column_name="col1", + quantile=0.5, # median + min_value=45, + max_value=55, + ) + + result = expectation.validate(data_frame=data_frame) + # Should succeed as the median of normal(50, 10) should be around 50 + assert isinstance(result, DataFrameExpectationSuccessMessage), ( + f"Large dataset test failed: expected success but got {type(result)}" + ) diff --git a/tests/expectations_implemented/column_expectations/__init__.py b/tests/expectations_implemented/column_expectations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/__init__.py b/tests/expectations_implemented/column_expectations/any_value_expectations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py new file mode 100644 index 0000000..6e6618e --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_equals.py @@ -0,0 +1,193 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + assert expectation.get_expectation_name() == "ExpectationValueEquals", ( + f"Expected 'ExpectationValueEquals' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col1": [5, 5, 5]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + """ + Test the expectation for pandas DataFrame with violations. + This method should be implemented in the subclass. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [3, 4]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not equal to 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + """ + Test the expectation for PySpark DataFrame with violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(3,), (4,)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not equal to 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + """ + Test that an error is raised when the specified column is missing. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col2": [5, 5, 5]}) + + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_equals( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col1": [5, 5, 5]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_equals( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + """ + Test that an error is raised when the specified column is missing in PySpark DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(5,), (5,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py new file mode 100644 index 0000000..3621f0a --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_in.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + assert expectation.get_expectation_name() == "ExpectationValueIn", ( + f"Expected 'ExpectationValueIn' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueIn") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col1": [1, 4, 5, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [4, 5]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not in [1, 2, 3].", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueIn") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = spark.createDataFrame([(1,), (4,), (5,), (2,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(4,), (5,)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not in [1, 2, 3].", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 2, 1]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = pd.DataFrame({"col1": [1, 4, 5, 2, 3]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (2,), (1,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(1,), (4,), (5,), (2,), (3,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py new file mode 100644 index 0000000..2fff4b9 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_equals.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + assert expectation.get_expectation_name() == "ExpectationValueNotEquals", ( + f"Expected 'ExpectationValueNotEquals' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 6]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col1": [3, 5, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [5, 5]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is equal to 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + data_frame = spark.createDataFrame([(3,), (4,), (6,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + data_frame = spark.createDataFrame([(3,), (5,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(5,), (5,)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is equal to 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotEquals", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col2": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col1": [3, 4, 6]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col1": [3, 5, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(3,), (4,), (6,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(3,), (5,), (5,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_equals( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py new file mode 100644 index 0000000..ef40723 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_in.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + assert expectation.get_expectation_name() == "ExpectationValueNotIn", ( + f"Expected 'ExpectationValueNotIn' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col1": [4, 5, 6, 7]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotIn") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col1": [1, 2, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [1, 2]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is in [1, 2, 3].", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = spark.createDataFrame([(4,), (5,), (6,), (7,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotIn") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = spark.createDataFrame([(1,), (2,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(1,), (2,)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is in [1, 2, 3].", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotIn", + column_name="col1", + values=[1, 2, 3], + ) + data_frame = pd.DataFrame({"col2": [4, 5, 6]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = pd.DataFrame({"col1": [4, 5, 6, 7]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = pd.DataFrame({"col1": [1, 2, 4, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(4,), (5,), (6,), (7,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(1,), (2,), (4,), (5,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_in( + column_name="col1", values=[1, 2, 3] + ) + data_frame = spark.createDataFrame([(4,), (5,), (6,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py new file mode 100644 index 0000000..2f5afe3 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_not_null.py @@ -0,0 +1,142 @@ +import pytest +import numpy as np +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + assert expectation.get_expectation_name() == "ExpectationValueNotNull", ( + f"Expected 'ExpectationValueNotNull' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotNull") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col1": [1, None, np.nan]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [None, np.nan]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is null.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNotNull") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + data_frame = spark.createDataFrame([(1,), (None,), (None,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(None,), (None,)], "col1: int") + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is null.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNotNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col2": [1, 2, 3]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1") + data_frame = pd.DataFrame({"col1": [1, 2, 3]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1") + data_frame = pd.DataFrame({"col1": [1, None, np.nan]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1") + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1") + data_frame = spark.createDataFrame([(1,), (None,), (None,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_not_null(column_name="col1") + data_frame = spark.createDataFrame([(1,), (2,), (3,)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py new file mode 100644 index 0000000..7cf276f --- /dev/null +++ b/tests/expectations_implemented/column_expectations/any_value_expectations/test_expect_value_null.py @@ -0,0 +1,142 @@ +import pytest +import numpy as np +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + assert expectation.get_expectation_name() == "ExpectationValueNull", ( + f"Expected 'ExpectationValueNull' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col1": [None, np.nan, None]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNull") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col1": [None, 1, 2]}, dtype="Int64") + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [1, 2]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not null.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int") + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueNull") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + data_frame = spark.createDataFrame([(None,), (1,), (2,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(1,), (2,)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not null.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueNull", + column_name="col1", + ) + data_frame = pd.DataFrame({"col2": [None, None, None]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1") + data_frame = pd.DataFrame({"col1": [None, None, None]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1") + data_frame = pd.DataFrame({"col1": [None, 1, 2]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1") + data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col1: int") + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1") + data_frame = spark.createDataFrame([(None,), (1,), (2,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_null(column_name="col1") + data_frame = spark.createDataFrame([(None,), (None,), (None,)], "col2: int") + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/__init__.py b/tests/expectations_implemented/column_expectations/numerical_expectations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py new file mode 100644 index 0000000..dbde80b --- /dev/null +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_between.py @@ -0,0 +1,163 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + assert expectation.get_expectation_name() == "ExpectationValueBetween", ( + f"Expected 'ExpectationValueBetween' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = pd.DataFrame({"col1": [2, 3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueBetween") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 6]}) + expected_violations = pd.DataFrame({"col1": [1, 6]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not between 2 and 5.", + limit_violations=5, + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = spark.createDataFrame([(2,), (3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueBetween") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (6,)], ["col1"]) + expected_violations = spark.createDataFrame([(1,), (6,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' is not between 2 and 5.", + limit_violations=5, + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueBetween", + column_name="col1", + min_value=2, + max_value=5, + ) + data_frame = pd.DataFrame({"col2": [2, 3, 4]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_value_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = pd.DataFrame({"col1": [2, 3, 4, 5]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_value_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = pd.DataFrame({"col1": [1, 2, 3, 6]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = spark.createDataFrame([(2,), (3,), (4,), (5,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_value_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = spark.createDataFrame([(1,), (2,), (3,), (6,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_column_missing_error(): + expectations_suite = DataFrameExpectationsSuite().expect_value_between( + column_name="col1", min_value=2, max_value=5 + ) + data_frame = pd.DataFrame({"col2": [2, 3, 4]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py new file mode 100644 index 0000000..19c5d9e --- /dev/null +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_greater_than.py @@ -0,0 +1,196 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=2, + ) + assert expectation.get_expectation_name() == "ExpectationValueGreaterThan", ( + f"Expected 'ExpectationValueGreaterThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the greater than expectation for pandas dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=2, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueGreaterThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + """ + Test the greater than expectation for pandas dataframe with violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=3, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [3]}) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' is not greater than 3.", + limit_violations=5, + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the greater than expectation for pyspark dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=2, + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueGreaterThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + """ + Test the greater than expectation for pyspark dataframe with violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=3, + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(3,)], ["col1"]) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' is not greater than 3.", + limit_violations=5, + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_column_missing_error(): + """ + Test the error when the specified column is missing in the dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueGreaterThan", + column_name="col1", + value=2, + ) + data_frame = pd.DataFrame({"col2": [3, 4, 5]}) + + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """ + Test the greater than expectation for pandas dataframe with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than( + column_name="col1", value=2 + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the greater than expectation for pandas dataframe with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than( + column_name="col1", value=3 + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the greater than expectation for pyspark dataframe with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than( + column_name="col1", value=2 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the greater than expectation for pyspark dataframe with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than( + column_name="col1", value=3 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_column_missing_error(): + """ + Test the greater than expectation for dataframe with missing column. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_greater_than( + column_name="col1", value=2 + ) + data_frame = pd.DataFrame({"col2": [3, 4, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py new file mode 100644 index 0000000..e1f4292 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/numerical_expectations/test_expect_value_less_than.py @@ -0,0 +1,198 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=2, + ) + assert expectation.get_expectation_name() == "ExpectationValueLessThan", ( + f"Expected 'ExpectationValueLessThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the less than expectation for pandas dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=6, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueLessThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + """ + Test the less than expectation for pandas dataframe with violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": [5]}) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' is not less than 5.", + limit_violations=5, + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_expectation_pyspark_success(spark): + """ + Test the less than expectation for pyspark dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=6, + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationValueLessThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + """ + Test the less than expectation for pyspark dataframe with violations. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=5, + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([(5,)], ["col1"]) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' is not less than 5.", + limit_violations=5, + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_column_missing_error(): + """ + Test the error when the specified column is missing in the dataframe. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationValueLessThan", + column_name="col1", + value=5, + ) + data_frame = pd.DataFrame({"col2": [3, 4, 5]}) + + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_less_than( + column_name="col1", value=6 + ) + + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + """ + Test the expectation for pandas DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_less_than( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col1": [3, 4, 5]}) + + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_less_than( + column_name="col1", value=6 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation for PySpark DataFrame with violations. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_less_than( + column_name="col1", value=5 + ) + data_frame = spark.createDataFrame([(3,), (4,), (5,)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_column_missing_error(): + """ + Test the error when the specified column is missing in the DataFrame. + """ + expectations_suite = DataFrameExpectationsSuite().expect_value_less_than( + column_name="col1", value=5 + ) + data_frame = pd.DataFrame({"col2": [3, 4, 5]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/__init__.py b/tests/expectations_implemented/column_expectations/string_expectations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py new file mode 100644 index 0000000..808c481 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_contains.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + assert expectation.get_expectation_name() == "ExpectationStringContains", ( + f"Expected 'ExpectationStringContains' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "barfoo"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringContains") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["bar", "baz"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' does not contain 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringContains") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("bar",), ("baz",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' does not contain 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col2": ["foobar", "foo123", "barfoo"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_contains( + column_name="col1", substring="foo" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "barfoo"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_contains( + column_name="col1", substring="foo" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("barfoo",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py new file mode 100644 index 0000000..e2f3ca9 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_ends_with.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + assert expectation.get_expectation_name() == "ExpectationStringEndsWith", ( + f"Expected 'ExpectationStringEndsWith' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbar"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringEndsWith") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["baz"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' does not end with 'bar'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringEndsWith") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("baz",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 1 row(s) where 'col1' does not end with 'bar'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringEndsWith", + column_name="col1", + suffix="bar", + ) + data_frame = pd.DataFrame({"col2": ["foobar", "bar", "bazbar"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with( + column_name="col1", suffix="bar" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbar"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with( + column_name="col1", suffix="bar" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "baz"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with( + column_name="col1", suffix="bar" + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with( + column_name="col1", suffix="bar" + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("baz",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_ends_with( + column_name="col1", suffix="bar" + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbar",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py new file mode 100644 index 0000000..7ccd6eb --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_between.py @@ -0,0 +1,163 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + assert expectation.get_expectation_name() == "ExpectationStringLengthBetween", ( + f"Expected 'ExpectationStringLengthBetween' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + data_frame = pd.DataFrame({"col1": ["foo", "bazz", "hello", "foobar"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthBetween") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + data_frame = pd.DataFrame({"col1": ["fo", "bazz", "hellothere", "foobar"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["fo", "hellothere"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not between 3 and 6.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",), ("foobar",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthBetween") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + data_frame = spark.createDataFrame([("fo",), ("bazz",), ("hellothere",), ("foobar",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("fo",), ("hellothere",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not between 3 and 6.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthBetween", + column_name="col1", + min_length=3, + max_length=6, + ) + data_frame = pd.DataFrame({"col2": ["foo", "bazz", "hello"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_between( + column_name="col1", min_length=3, max_length=6 + ) + data_frame = pd.DataFrame({"col1": ["foo", "bazz", "hello", "foobar"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_between( + column_name="col1", min_length=3, max_length=6 + ) + data_frame = pd.DataFrame({"col1": ["fo", "bazz", "hellothere", "foobar"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_between( + column_name="col1", min_length=3, max_length=6 + ) + data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",), ("foobar",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_between( + column_name="col1", min_length=3, max_length=6 + ) + data_frame = spark.createDataFrame([("fo",), ("bazz",), ("hellothere",), ("foobar",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_between( + column_name="col1", min_length=3, max_length=6 + ) + data_frame = spark.createDataFrame([("foo",), ("bazz",), ("hello",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py new file mode 100644 index 0000000..ff9c3e7 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_equals.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + assert expectation.get_expectation_name() == "ExpectationStringLengthEquals", ( + f"Expected 'ExpectationStringLengthEquals' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazz", "foobar"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["bazz", "foobar"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not equal to 3.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthEquals") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazz",), ("foobar",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("bazz",), ("foobar",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not equal to 3.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthEquals", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col2": ["foo", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals( + column_name="col1", length=3 + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals( + column_name="col1", length=3 + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazz", "foobar"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazz",), ("foobar",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_equals( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py new file mode 100644 index 0000000..41cc74a --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_greater_than.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + assert expectation.get_expectation_name() == "ExpectationStringLengthGreaterThan", ( + f"Expected 'ExpectationStringLengthGreaterThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bazz", "hello"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthGreaterThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazzz"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["foo", "bar"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not greater than 3.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthGreaterThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazzz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("foo",), ("bar",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not greater than 3.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthGreaterThan", + column_name="col1", + length=3, + ) + data_frame = pd.DataFrame({"col2": ["foobar", "bazz", "hello"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than( + column_name="col1", length=3 + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bazz", "hello"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than( + column_name="col1", length=3 + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "bazzz"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("bazzz",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_greater_than( + column_name="col1", length=3 + ) + data_frame = spark.createDataFrame([("foobar",), ("bazz",), ("hello",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py new file mode 100644 index 0000000..5fe0712 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_length_less_than.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + assert expectation.get_expectation_name() == "ExpectationStringLengthLessThan", ( + f"Expected 'ExpectationStringLengthLessThan' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthLessThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbaz"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["foobar", "bazbaz"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not less than 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringLengthLessThan") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbaz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("foobar",), ("bazbaz",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' length is not less than 5.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringLengthLessThan", + column_name="col1", + length=5, + ) + data_frame = pd.DataFrame({"col2": ["foo", "bar", "baz"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than( + column_name="col1", length=5 + ) + data_frame = pd.DataFrame({"col1": ["foo", "bar", "baz"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than( + column_name="col1", length=5 + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "bazbaz"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than( + column_name="col1", length=5 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than( + column_name="col1", length=5 + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("bazbaz",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_length_less_than( + column_name="col1", length=5 + ) + data_frame = spark.createDataFrame([("foo",), ("bar",), ("baz",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py new file mode 100644 index 0000000..0402f04 --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_not_contains.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + assert expectation.get_expectation_name() == "ExpectationStringNotContains", ( + f"Expected 'ExpectationStringNotContains' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col1": ["bar", "baz", "qux"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringNotContains") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "foo"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["foobar", "foo"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' contains 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringNotContains") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("foo",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("foobar",), ("foo",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' contains 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringNotContains", + column_name="col1", + substring="foo", + ) + data_frame = pd.DataFrame({"col2": ["bar", "baz", "qux"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains( + column_name="col1", substring="foo" + ) + data_frame = pd.DataFrame({"col1": ["bar", "baz", "qux"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains( + column_name="col1", substring="foo" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "bar", "foo"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("bar",), ("foo",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_not_contains( + column_name="col1", substring="foo" + ) + data_frame = spark.createDataFrame([("bar",), ("baz",), ("qux",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py new file mode 100644 index 0000000..766506f --- /dev/null +++ b/tests/expectations_implemented/column_expectations/string_expectations/test_expect_string_starts_with.py @@ -0,0 +1,157 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationSuccessMessage, +) + + +def test_expectation_name(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + assert expectation.get_expectation_name() == "ExpectationStringStartsWith", ( + f"Expected 'ExpectationStringStartsWith' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "foobaz"]}) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringStartsWith") + ), f"Expected success message but got: {result}" + + +def test_expectation_pandas_violations(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + data_frame = pd.DataFrame({"col1": ["foobar", "barfoo", "baz"]}) + result = expectation.validate(data_frame=data_frame) + + expected_violations = pd.DataFrame({"col1": ["barfoo", "baz"]}) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' does not start with 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_expectation_pyspark_success(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + assert str(result) == str( + DataFrameExpectationSuccessMessage(expectation_name="ExpectationStringStartsWith") + ), f"Expected success message but got: {result}" + + +def test_expectation_pyspark_violations(spark): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + data_frame = spark.createDataFrame([("foobar",), ("barfoo",), ("baz",)], ["col1"]) + result = expectation.validate(data_frame=data_frame) + + expected_violations = spark.createDataFrame([("barfoo",), ("baz",)], ["col1"]) + assert str(result) == str( + DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PYSPARK, + violations_data_frame=expected_violations, + message="Found 2 row(s) where 'col1' does not start with 'foo'.", + limit_violations=5, + ) + ), f"Expected failure message but got: {result}" + + +def test_column_missing_error(): + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationStringStartsWith", + column_name="col1", + prefix="foo", + ) + data_frame = pd.DataFrame({"col2": ["foobar", "foo123", "foobaz"]}) + result = expectation.validate(data_frame=data_frame) + expected_failure_message = DataFrameExpectationFailureMessage( + expectation_str=str(expectation), + data_frame_type=DataFrameType.PANDAS, + message="Column 'col1' does not exist in the DataFrame.", + ) + assert str(result) == str(expected_failure_message), ( + f"Expected failure message but got: {result}" + ) + + +def test_suite_pandas_success(): + expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with( + column_name="col1", prefix="foo" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "foo123", "foobaz"]}) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pandas_violations(): + expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with( + column_name="col1", prefix="foo" + ) + data_frame = pd.DataFrame({"col1": ["foobar", "barfoo", "baz"]}) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_success(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with( + column_name="col1", prefix="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col1"]) + result = expectations_suite.run(data_frame=data_frame) + assert result is None, "Expected no exceptions to be raised" + + +def test_suite_pyspark_violations(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with( + column_name="col1", prefix="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("barfoo",), ("baz",)], ["col1"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) + + +def test_suite_pyspark_column_missing_error(spark): + expectations_suite = DataFrameExpectationsSuite().expect_string_starts_with( + column_name="col1", prefix="foo" + ) + data_frame = spark.createDataFrame([("foobar",), ("foo123",), ("foobaz",)], ["col2"]) + with pytest.raises(DataFrameExpectationsSuiteFailure): + expectations_suite.run(data_frame=data_frame) diff --git a/tests/expectations_implemented/template_test_expectation.py b/tests/expectations_implemented/template_test_expectation.py new file mode 100644 index 0000000..0e0da24 --- /dev/null +++ b/tests/expectations_implemented/template_test_expectation.py @@ -0,0 +1,82 @@ +from dataframe_expectations.expectations.expectation_registry import ( + DataFrameExpectationRegistry, +) + + +def test_expectation_name(): + """ + Test that the expectation name is correctly returned. + This method should be implemented in the subclass. + """ + expectation = DataFrameExpectationRegistry.get_expectation( + expectation_name="ExpectationDoesSomeCheck", + column_name="col1", + value=5, + ) + assert expectation.get_expectation_name() == "ExpectationDoesSomeCheck", ( + f"Expected 'ExpectationDoesSomeCheck' but got: {expectation.get_expectation_name()}" + ) + + +def test_expectation_pandas_success(): + """ + Test the expectation for pandas DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_expectation_pandas_violations(): + """ + Test the expectation for pandas DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_expectation_pyspark_success(spark): + """ + Test the expectation for PySpark DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_expectation_pyspark_violations(spark): + """ + Test the expectation for PySpark DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_suite_pandas_success(): + """ + Test the expectation suite for pandas DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_suite_pandas_violations(): + """ + Test the expectation suite for pandas DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_suite_pyspark_success(spark): + """ + Test the expectation suite for PySpark DataFrame with no violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") + + +def test_suite_pyspark_violations(spark): + """ + Test the expectation suite for PySpark DataFrame with violations. + This method should be implemented in the subclass. + """ + raise NotImplementedError("Subclasses should implement this method.") diff --git a/tests/test_expectations_suite.py b/tests/test_expectations_suite.py new file mode 100644 index 0000000..c3f571f --- /dev/null +++ b/tests/test_expectations_suite.py @@ -0,0 +1,184 @@ +import pytest +import pandas as pd + +from dataframe_expectations import DataFrameType +from dataframe_expectations.expectations_suite import ( + DataFrameExpectationsSuite, + DataFrameExpectationsSuiteFailure, +) +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, +) + + +def test_suite_success(): + """ + Test the ExpectationsSuite with a successful expectation. + """ + + # No expectations + suite = DataFrameExpectationsSuite() + result = suite.run(data_frame=pd.DataFrame()) + assert result is None, "Expected no result for empty suite" + + # All succeeding expectations + suite = ( + DataFrameExpectationsSuite() + .expect_value_greater_than(column_name="col1", value=2) + .expect_value_less_than(column_name="col1", value=10) + ) + data_Frame = pd.DataFrame({"col1": [3, 4, 5]}) + result = suite.run(data_frame=data_Frame) + assert result is None, "Expected no result for successful suite" + + +def test_suite_failure(): + """ + Test the ExpectationsSuite with a failing expectation. + """ + + # Any 1 violation causes the suite to fail + suite = ( + DataFrameExpectationsSuite() + .expect_value_greater_than(column_name="col1", value=2) + .expect_value_less_than(column_name="col1", value=3) + ) + data_Frame = pd.DataFrame({"col1": [3, 4, 5]}) + + with pytest.raises(DataFrameExpectationsSuiteFailure): + suite.run(data_frame=data_Frame) + + +def test_invalid_data_frame_type(): + """ + Test that an invalid DataFrame type raises a ValueError. + """ + + suite = ( + DataFrameExpectationsSuite() + .expect_value_greater_than(column_name="col1", value=2) + .expect_value_less_than(column_name="col1", value=10) + ) + data_Frame = None + + with pytest.raises(ValueError): + suite.run(data_frame=data_Frame) + + +def test_suite_with_supported_dataframe_types(spark): + """ + Test the ExpectationsSuite with all supported DataFrame types. + """ + + suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=1) + + # Test with pandas DataFrame + pandas_df = pd.DataFrame({"col1": [1, 2, 3]}) + result = suite.run(data_frame=pandas_df) + assert result is None, "Expected success for pandas DataFrame" + + # Test with PySpark DataFrame + spark_df = spark.createDataFrame([(1,), (2,), (3,)], ["col1"]) + result = suite.run(data_frame=spark_df) + assert result is None, "Expected success for PySpark DataFrame" + + +def test_suite_with_unsupported_dataframe_types(): + """ + Test the ExpectationsSuite with unsupported DataFrame types. + """ + suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=1) + + # Test various unsupported types + unsupported_types = [ + None, + "not_a_dataframe", + [1, 2, 3], + {"col1": [1, 2, 3]}, + 42, + True, + ] + + for unsupported_data in unsupported_types: + with pytest.raises(ValueError) as context: + suite.run(data_frame=unsupported_data) + assert "Unsupported DataFrame type" in str(context.value), ( + f"Expected unsupported type error for {type(unsupported_data)}" + ) + + +def test_suite_with_pyspark_connect_dataframe(): + """ + Test the ExpectationsSuite with PySpark Connect DataFrame (if available). + """ + from unittest.mock import patch + + # Mock a Connect DataFrame + class MockConnectDataFrame: + def __init__(self): + self.is_cached = False + + def cache(self): + self.is_cached = True + return self + + def unpersist(self): + self.is_cached = False + return self + + suite = DataFrameExpectationsSuite().expect_min_rows(min_rows=0) + + with patch( + "dataframe_expectations.expectations.PySparkConnectDataFrame", + MockConnectDataFrame, + ): + # Create mock expectation that can handle Connect DataFrame + with patch.object( + suite._DataFrameExpectationsSuite__expectations[0], "validate" + ) as mock_validate: + from dataframe_expectations.result_message import ( + DataFrameExpectationSuccessMessage, + ) + + mock_validate.return_value = DataFrameExpectationSuccessMessage( + expectation_name="MockExpectation" + ) + + mock_connect_df = MockConnectDataFrame() + result = suite.run(data_frame=mock_connect_df) + assert result is None, "Expected success for mock Connect DataFrame" + + +def test_expectation_suite_failure_message(): + failed_expectation_messages = [ + DataFrameExpectationFailureMessage( + expectation_str="ExpectationValueGreaterThan", + data_frame_type=DataFrameType.PANDAS, + message="Failed expectation 1", + ), + DataFrameExpectationFailureMessage( + expectation_str="ExpectationValueGreaterThan", + data_frame_type=DataFrameType.PANDAS, + message="Failed expectation 2", + ), + ] + + suite_failure = DataFrameExpectationsSuiteFailure( + total_expectations=4, + failures=failed_expectation_messages, + ) + + expected_str = ( + "(2/4) expectations failed.\n\n" + f"{'=' * 80}\n" + "List of violations:\n" + f"{'-' * 80}" + "\n[Failed 1/2] ExpectationValueGreaterThan: Failed expectation 1\n" + f"{'-' * 80}\n" + "[Failed 2/2] ExpectationValueGreaterThan: Failed expectation 2\n" + f"{'=' * 80}" + ) + + assert str(suite_failure) == expected_str, ( + f"Expected suite failure message but got: {str(suite_failure)}" + ) diff --git a/tests/test_result_message.py b/tests/test_result_message.py new file mode 100644 index 0000000..def309f --- /dev/null +++ b/tests/test_result_message.py @@ -0,0 +1,194 @@ +import pytest +import pandas as pd +from tabulate import tabulate # type: ignore + +from dataframe_expectations import DataFrameType +from dataframe_expectations.result_message import ( + DataFrameExpectationFailureMessage, + DataFrameExpectationResultMessage, + DataFrameExpectationSuccessMessage, +) +from tests.conftest import assert_pandas_df_equal + + +def test_result_message_empty(): + """ + By default the result message should be empty. + """ + result_message = DataFrameExpectationResultMessage() + + assert str(result_message) == "", ( + f"Expected empty result message but got: {str(result_message)}" + ) + + +def test_data_frame_to_str_pandas(): + """ + Test the dataframe_to_str method with a mock DataFrame. + """ + pandas_dataframe = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + result_message = DataFrameExpectationResultMessage() + + expected_dataframe_str = tabulate( + pandas_dataframe.head(2), headers="keys", tablefmt="pretty", showindex=False + ) + + actual_str = result_message.dataframe_to_str( + data_frame_type=DataFrameType.PANDAS, + data_frame=pandas_dataframe, + rows=2, + ) + assert actual_str == expected_dataframe_str, ( + f"Expected pandas dataframe string but got: {actual_str}" + ) + + +def test_dataframe_to_str_pyspark(spark): + """ + Test the dataframe_to_str method with a mock PySpark DataFrame. + """ + pyspark_dataframe = spark.createDataFrame([(1, "a"), (2, "b"), (3, "c")], ["col1", "col2"]) + + result_message = DataFrameExpectationResultMessage() + + expected_dataframe_str = tabulate( + pyspark_dataframe.limit(2).toPandas(), + headers="keys", + tablefmt="pretty", + showindex=False, + ) + + actual_str = result_message.dataframe_to_str( + data_frame_type=DataFrameType.PYSPARK, + data_frame=pyspark_dataframe, + rows=2, + ) + assert actual_str == expected_dataframe_str, ( + f"Expected pyspark dataframe string but got: {actual_str}" + ) + + +def test_dataframe_to_str_invalid_type(): + """ + Test the dataframe_to_str method with an invalid DataFrame type. + """ + result_message = DataFrameExpectationResultMessage() + + with pytest.raises(ValueError) as context: + result_message.dataframe_to_str( + data_frame_type="invalid_type", data_frame=pd.DataFrame(), rows=2 + ) + + assert str(context.value) == "Unsupported DataFrame type: invalid_type", ( + f"Expected ValueError message but got: {str(context.value)}" + ) + + +def test_success_message_no_additional_message(): + """ + Test the success message initialization and string representation. Test with no additional message + """ + expectation_name = "TestExpectation" + success_message = DataFrameExpectationSuccessMessage(expectation_name) + message_str = str(success_message) + assert expectation_name in message_str, ( + f"Expectation name should be in the message: {message_str}" + ) + + +def test_success_message_with_additional_message(): + """ + Test the success message initialization and string representation. Test with an additional message + """ + expectation_name = "TestExpectation" + additional_message = "This is a success message." + success_message_with_additional = DataFrameExpectationSuccessMessage( + expectation_name, additional_message + ) + message_str = str(success_message_with_additional) + assert expectation_name in message_str, ( + f"Expectation name should be in the message: {message_str}" + ) + assert additional_message in message_str, ( + f"Additional message should be in the success message: {message_str}" + ) + + +def test_failure_message_default_params(): + """ + Test the failure message initialization and string representation with default parameters. + """ + expectation_name = "TestExpectation" + data_frame_type = None + failure_message = DataFrameExpectationFailureMessage(expectation_name, data_frame_type) + + message_str = str(failure_message) + assert expectation_name in message_str, ( + f"Expectation name should be in the message: {message_str}" + ) + + violations_df = failure_message.get_violations_data_frame() + assert violations_df is None, ( + f"Violations DataFrame should be None when not provided but got: {violations_df}" + ) + + +def test_failure_message_custom_message(): + """ + Test the failure message initialization and string representation with a custom message. + """ + expectation_name = "TestExpectation" + data_frame_type = None + custom_message = "This is a custom failure message." + failure_message = DataFrameExpectationFailureMessage( + expectation_str=expectation_name, + data_frame_type=data_frame_type, + message=custom_message, + ) + + message_str = str(failure_message) + assert expectation_name in message_str, ( + f"Expectation name should be in the message: {message_str}" + ) + assert custom_message in message_str, ( + f"Custom message should be in the failure message: {message_str}" + ) + + violations_df = failure_message.get_violations_data_frame() + assert violations_df is None, ( + f"Violations DataFrame should be None when not provided but got: {violations_df}" + ) + + +def test_failure_message_with_violations_dataframe(): + """ + Test the failure message initialization and string representation with a violations DataFrame. + """ + expectation_name = "TestExpectation" + data_frame_type = DataFrameType.PANDAS + violations_dataframe = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) + failure_message = DataFrameExpectationFailureMessage( + expectation_str=expectation_name, + data_frame_type=data_frame_type, + violations_data_frame=violations_dataframe, + limit_violations=2, + ) + + expected_dataframe = violations_dataframe + expected_dataframe_str = tabulate( + expected_dataframe.head(2), + headers="keys", + tablefmt="pretty", + showindex=False, + ) + + message_str = str(failure_message) + assert expectation_name in message_str, ( + f"Expectation name should be in the message: {message_str}" + ) + assert expected_dataframe_str in message_str, ( + f"Violations DataFrame should be included in the message: {message_str}" + ) + + actual_violations_df = failure_message.get_violations_data_frame() + assert_pandas_df_equal(actual_violations_df, expected_dataframe) diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..2a6924d --- /dev/null +++ b/uv.lock @@ -0,0 +1,1331 @@ +version = 1 +requires-python = ">=3.10" +resolution-markers = [ + "python_full_version < '3.11'", + "python_full_version == '3.11.*'", + "python_full_version >= '3.12'", +] + +[[package]] +name = "accessible-pygments" +version = "0.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/c1/bbac6a50d02774f91572938964c582fff4270eee73ab822a4aeea4d8b11b/accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872", size = 1377899 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903 }, +] + +[[package]] +name = "alabaster" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 }, +] + +[[package]] +name = "anyio" +version = "4.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097 }, +] + +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392 }, +] + +[[package]] +name = "certifi" +version = "2025.10.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286 }, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709 }, + { url = "https://files.pythonhosted.org/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814 }, + { url = "https://files.pythonhosted.org/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467 }, + { url = "https://files.pythonhosted.org/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280 }, + { url = "https://files.pythonhosted.org/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454 }, + { url = "https://files.pythonhosted.org/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609 }, + { url = "https://files.pythonhosted.org/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849 }, + { url = "https://files.pythonhosted.org/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586 }, + { url = "https://files.pythonhosted.org/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290 }, + { url = "https://files.pythonhosted.org/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663 }, + { url = "https://files.pythonhosted.org/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964 }, + { url = "https://files.pythonhosted.org/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064 }, + { url = "https://files.pythonhosted.org/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015 }, + { url = "https://files.pythonhosted.org/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792 }, + { url = "https://files.pythonhosted.org/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198 }, + { url = "https://files.pythonhosted.org/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262 }, + { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988 }, + { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324 }, + { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742 }, + { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863 }, + { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837 }, + { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550 }, + { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162 }, + { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019 }, + { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310 }, + { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022 }, + { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383 }, + { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098 }, + { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991 }, + { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456 }, + { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978 }, + { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969 }, + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +] + +[[package]] +name = "click" +version = "8.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "coverage" +version = "7.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/38/ee22495420457259d2f3390309505ea98f98a5eed40901cf62196abad006/coverage-7.11.0.tar.gz", hash = "sha256:167bd504ac1ca2af7ff3b81d245dfea0292c5032ebef9d66cc08a7d28c1b8050", size = 811905 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/95/c49df0aceb5507a80b9fe5172d3d39bf23f05be40c23c8d77d556df96cec/coverage-7.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eb53f1e8adeeb2e78962bade0c08bfdc461853c7969706ed901821e009b35e31", size = 215800 }, + { url = "https://files.pythonhosted.org/packages/dc/c6/7bb46ce01ed634fff1d7bb53a54049f539971862cc388b304ff3c51b4f66/coverage-7.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9a03ec6cb9f40a5c360f138b88266fd8f58408d71e89f536b4f91d85721d075", size = 216198 }, + { url = "https://files.pythonhosted.org/packages/94/b2/75d9d8fbf2900268aca5de29cd0a0fe671b0f69ef88be16767cc3c828b85/coverage-7.11.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0d7f0616c557cbc3d1c2090334eddcbb70e1ae3a40b07222d62b3aa47f608fab", size = 242953 }, + { url = "https://files.pythonhosted.org/packages/65/ac/acaa984c18f440170525a8743eb4b6c960ace2dbad80dc22056a437fc3c6/coverage-7.11.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e44a86a47bbdf83b0a3ea4d7df5410d6b1a0de984fbd805fa5101f3624b9abe0", size = 244766 }, + { url = "https://files.pythonhosted.org/packages/d8/0d/938d0bff76dfa4a6b228c3fc4b3e1c0e2ad4aa6200c141fcda2bd1170227/coverage-7.11.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:596763d2f9a0ee7eec6e643e29660def2eef297e1de0d334c78c08706f1cb785", size = 246625 }, + { url = "https://files.pythonhosted.org/packages/38/54/8f5f5e84bfa268df98f46b2cb396b1009734cfb1e5d6adb663d284893b32/coverage-7.11.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ef55537ff511b5e0a43edb4c50a7bf7ba1c3eea20b4f49b1490f1e8e0e42c591", size = 243568 }, + { url = "https://files.pythonhosted.org/packages/68/30/8ba337c2877fe3f2e1af0ed7ff4be0c0c4aca44d6f4007040f3ca2255e99/coverage-7.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cbabd8f4d0d3dc571d77ae5bdbfa6afe5061e679a9d74b6797c48d143307088", size = 244665 }, + { url = "https://files.pythonhosted.org/packages/cc/fb/c6f1d6d9a665536b7dde2333346f0cc41dc6a60bd1ffc10cd5c33e7eb000/coverage-7.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e24045453384e0ae2a587d562df2a04d852672eb63051d16096d3f08aa4c7c2f", size = 242681 }, + { url = "https://files.pythonhosted.org/packages/be/38/1b532319af5f991fa153c20373291dc65c2bf532af7dbcffdeef745c8f79/coverage-7.11.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:7161edd3426c8d19bdccde7d49e6f27f748f3c31cc350c5de7c633fea445d866", size = 242912 }, + { url = "https://files.pythonhosted.org/packages/67/3d/f39331c60ef6050d2a861dc1b514fa78f85f792820b68e8c04196ad733d6/coverage-7.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d4ed4de17e692ba6415b0587bc7f12bc80915031fc9db46a23ce70fc88c9841", size = 243559 }, + { url = "https://files.pythonhosted.org/packages/4b/55/cb7c9df9d0495036ce582a8a2958d50c23cd73f84a23284bc23bd4711a6f/coverage-7.11.0-cp310-cp310-win32.whl", hash = "sha256:765c0bc8fe46f48e341ef737c91c715bd2a53a12792592296a095f0c237e09cf", size = 218266 }, + { url = "https://files.pythonhosted.org/packages/68/a8/b79cb275fa7bd0208767f89d57a1b5f6ba830813875738599741b97c2e04/coverage-7.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:24d6f3128f1b2d20d84b24f4074475457faedc3d4613a7e66b5e769939c7d969", size = 219169 }, + { url = "https://files.pythonhosted.org/packages/49/3a/ee1074c15c408ddddddb1db7dd904f6b81bc524e01f5a1c5920e13dbde23/coverage-7.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d58ecaa865c5b9fa56e35efc51d1014d4c0d22838815b9fce57a27dd9576847", size = 215912 }, + { url = "https://files.pythonhosted.org/packages/70/c4/9f44bebe5cb15f31608597b037d78799cc5f450044465bcd1ae8cb222fe1/coverage-7.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b679e171f1c104a5668550ada700e3c4937110dbdd153b7ef9055c4f1a1ee3cc", size = 216310 }, + { url = "https://files.pythonhosted.org/packages/42/01/5e06077cfef92d8af926bdd86b84fb28bf9bc6ad27343d68be9b501d89f2/coverage-7.11.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ca61691ba8c5b6797deb221a0d09d7470364733ea9c69425a640f1f01b7c5bf0", size = 246706 }, + { url = "https://files.pythonhosted.org/packages/40/b8/7a3f1f33b35cc4a6c37e759137533119560d06c0cc14753d1a803be0cd4a/coverage-7.11.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:aef1747ede4bd8ca9cfc04cc3011516500c6891f1b33a94add3253f6f876b7b7", size = 248634 }, + { url = "https://files.pythonhosted.org/packages/7a/41/7f987eb33de386bc4c665ab0bf98d15fcf203369d6aacae74f5dd8ec489a/coverage-7.11.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1839d08406e4cba2953dcc0ffb312252f14d7c4c96919f70167611f4dee2623", size = 250741 }, + { url = "https://files.pythonhosted.org/packages/23/c1/a4e0ca6a4e83069fb8216b49b30a7352061ca0cb38654bd2dc96b7b3b7da/coverage-7.11.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e0eb0a2dcc62478eb5b4cbb80b97bdee852d7e280b90e81f11b407d0b81c4287", size = 246837 }, + { url = "https://files.pythonhosted.org/packages/5d/03/ced062a17f7c38b4728ff76c3acb40d8465634b20b4833cdb3cc3a74e115/coverage-7.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fbea96343b53f65d5351d8fd3b34fd415a2670d7c300b06d3e14a5af4f552", size = 248429 }, + { url = "https://files.pythonhosted.org/packages/97/af/a7c6f194bb8c5a2705ae019036b8fe7f49ea818d638eedb15fdb7bed227c/coverage-7.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:214b622259dd0cf435f10241f1333d32caa64dbc27f8790ab693428a141723de", size = 246490 }, + { url = "https://files.pythonhosted.org/packages/ab/c3/aab4df02b04a8fde79068c3c41ad7a622b0ef2b12e1ed154da986a727c3f/coverage-7.11.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:258d9967520cca899695d4eb7ea38be03f06951d6ca2f21fb48b1235f791e601", size = 246208 }, + { url = "https://files.pythonhosted.org/packages/30/d8/e282ec19cd658238d60ed404f99ef2e45eed52e81b866ab1518c0d4163cf/coverage-7.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cf9e6ff4ca908ca15c157c409d608da77a56a09877b97c889b98fb2c32b6465e", size = 247126 }, + { url = "https://files.pythonhosted.org/packages/d1/17/a635fa07fac23adb1a5451ec756216768c2767efaed2e4331710342a3399/coverage-7.11.0-cp311-cp311-win32.whl", hash = "sha256:fcc15fc462707b0680cff6242c48625da7f9a16a28a41bb8fd7a4280920e676c", size = 218314 }, + { url = "https://files.pythonhosted.org/packages/2a/29/2ac1dfcdd4ab9a70026edc8d715ece9b4be9a1653075c658ee6f271f394d/coverage-7.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:865965bf955d92790f1facd64fe7ff73551bd2c1e7e6b26443934e9701ba30b9", size = 219203 }, + { url = "https://files.pythonhosted.org/packages/03/21/5ce8b3a0133179115af4c041abf2ee652395837cb896614beb8ce8ddcfd9/coverage-7.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:5693e57a065760dcbeb292d60cc4d0231a6d4b6b6f6a3191561e1d5e8820b745", size = 217879 }, + { url = "https://files.pythonhosted.org/packages/c4/db/86f6906a7c7edc1a52b2c6682d6dd9be775d73c0dfe2b84f8923dfea5784/coverage-7.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9c49e77811cf9d024b95faf86c3f059b11c0c9be0b0d61bc598f453703bd6fd1", size = 216098 }, + { url = "https://files.pythonhosted.org/packages/21/54/e7b26157048c7ba555596aad8569ff903d6cd67867d41b75287323678ede/coverage-7.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a61e37a403a778e2cda2a6a39abcc895f1d984071942a41074b5c7ee31642007", size = 216331 }, + { url = "https://files.pythonhosted.org/packages/b9/19/1ce6bf444f858b83a733171306134a0544eaddf1ca8851ede6540a55b2ad/coverage-7.11.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c79cae102bb3b1801e2ef1511fb50e91ec83a1ce466b2c7c25010d884336de46", size = 247825 }, + { url = "https://files.pythonhosted.org/packages/71/0b/d3bcbbc259fcced5fb67c5d78f6e7ee965f49760c14afd931e9e663a83b2/coverage-7.11.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:16ce17ceb5d211f320b62df002fa7016b7442ea0fd260c11cec8ce7730954893", size = 250573 }, + { url = "https://files.pythonhosted.org/packages/58/8d/b0ff3641a320abb047258d36ed1c21d16be33beed4152628331a1baf3365/coverage-7.11.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:80027673e9d0bd6aef86134b0771845e2da85755cf686e7c7c59566cf5a89115", size = 251706 }, + { url = "https://files.pythonhosted.org/packages/59/c8/5a586fe8c7b0458053d9c687f5cff515a74b66c85931f7fe17a1c958b4ac/coverage-7.11.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4d3ffa07a08657306cd2215b0da53761c4d73cb54d9143b9303a6481ec0cd415", size = 248221 }, + { url = "https://files.pythonhosted.org/packages/d0/ff/3a25e3132804ba44cfa9a778cdf2b73dbbe63ef4b0945e39602fc896ba52/coverage-7.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a3b6a5f8b2524fd6c1066bc85bfd97e78709bb5e37b5b94911a6506b65f47186", size = 249624 }, + { url = "https://files.pythonhosted.org/packages/c5/12/ff10c8ce3895e1b17a73485ea79ebc1896a9e466a9d0f4aef63e0d17b718/coverage-7.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fcc0a4aa589de34bc56e1a80a740ee0f8c47611bdfb28cd1849de60660f3799d", size = 247744 }, + { url = "https://files.pythonhosted.org/packages/16/02/d500b91f5471b2975947e0629b8980e5e90786fe316b6d7299852c1d793d/coverage-7.11.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dba82204769d78c3fd31b35c3d5f46e06511936c5019c39f98320e05b08f794d", size = 247325 }, + { url = "https://files.pythonhosted.org/packages/77/11/dee0284fbbd9cd64cfce806b827452c6df3f100d9e66188e82dfe771d4af/coverage-7.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81b335f03ba67309a95210caf3eb43bd6fe75a4e22ba653ef97b4696c56c7ec2", size = 249180 }, + { url = "https://files.pythonhosted.org/packages/59/1b/cdf1def928f0a150a057cab03286774e73e29c2395f0d30ce3d9e9f8e697/coverage-7.11.0-cp312-cp312-win32.whl", hash = "sha256:037b2d064c2f8cc8716fe4d39cb705779af3fbf1ba318dc96a1af858888c7bb5", size = 218479 }, + { url = "https://files.pythonhosted.org/packages/ff/55/e5884d55e031da9c15b94b90a23beccc9d6beee65e9835cd6da0a79e4f3a/coverage-7.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:d66c0104aec3b75e5fd897e7940188ea1892ca1d0235316bf89286d6a22568c0", size = 219290 }, + { url = "https://files.pythonhosted.org/packages/23/a8/faa930cfc71c1d16bc78f9a19bb73700464f9c331d9e547bfbc1dbd3a108/coverage-7.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:d91ebeac603812a09cf6a886ba6e464f3bbb367411904ae3790dfe28311b15ad", size = 217924 }, + { url = "https://files.pythonhosted.org/packages/60/7f/85e4dfe65e400645464b25c036a26ac226cf3a69d4a50c3934c532491cdd/coverage-7.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:cc3f49e65ea6e0d5d9bd60368684fe52a704d46f9e7fc413918f18d046ec40e1", size = 216129 }, + { url = "https://files.pythonhosted.org/packages/96/5d/dc5fa98fea3c175caf9d360649cb1aa3715e391ab00dc78c4c66fabd7356/coverage-7.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f39ae2f63f37472c17b4990f794035c9890418b1b8cca75c01193f3c8d3e01be", size = 216380 }, + { url = "https://files.pythonhosted.org/packages/b2/f5/3da9cc9596708273385189289c0e4d8197d37a386bdf17619013554b3447/coverage-7.11.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7db53b5cdd2917b6eaadd0b1251cf4e7d96f4a8d24e174bdbdf2f65b5ea7994d", size = 247375 }, + { url = "https://files.pythonhosted.org/packages/65/6c/f7f59c342359a235559d2bc76b0c73cfc4bac7d61bb0df210965cb1ecffd/coverage-7.11.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10ad04ac3a122048688387828b4537bc9cf60c0bf4869c1e9989c46e45690b82", size = 249978 }, + { url = "https://files.pythonhosted.org/packages/e7/8c/042dede2e23525e863bf1ccd2b92689692a148d8b5fd37c37899ba882645/coverage-7.11.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4036cc9c7983a2b1f2556d574d2eb2154ac6ed55114761685657e38782b23f52", size = 251253 }, + { url = "https://files.pythonhosted.org/packages/7b/a9/3c58df67bfa809a7bddd786356d9c5283e45d693edb5f3f55d0986dd905a/coverage-7.11.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7ab934dd13b1c5e94b692b1e01bd87e4488cb746e3a50f798cb9464fd128374b", size = 247591 }, + { url = "https://files.pythonhosted.org/packages/26/5b/c7f32efd862ee0477a18c41e4761305de6ddd2d49cdeda0c1116227570fd/coverage-7.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59a6e5a265f7cfc05f76e3bb53eca2e0dfe90f05e07e849930fecd6abb8f40b4", size = 249411 }, + { url = "https://files.pythonhosted.org/packages/76/b5/78cb4f1e86c1611431c990423ec0768122905b03837e1b4c6a6f388a858b/coverage-7.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:df01d6c4c81e15a7c88337b795bb7595a8596e92310266b5072c7e301168efbd", size = 247303 }, + { url = "https://files.pythonhosted.org/packages/87/c9/23c753a8641a330f45f221286e707c427e46d0ffd1719b080cedc984ec40/coverage-7.11.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:8c934bd088eed6174210942761e38ee81d28c46de0132ebb1801dbe36a390dcc", size = 247157 }, + { url = "https://files.pythonhosted.org/packages/c5/42/6e0cc71dc8a464486e944a4fa0d85bdec031cc2969e98ed41532a98336b9/coverage-7.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5a03eaf7ec24078ad64a07f02e30060aaf22b91dedf31a6b24d0d98d2bba7f48", size = 248921 }, + { url = "https://files.pythonhosted.org/packages/e8/1c/743c2ef665e6858cccb0f84377dfe3a4c25add51e8c7ef19249be92465b6/coverage-7.11.0-cp313-cp313-win32.whl", hash = "sha256:695340f698a5f56f795b2836abe6fb576e7c53d48cd155ad2f80fd24bc63a040", size = 218526 }, + { url = "https://files.pythonhosted.org/packages/ff/d5/226daadfd1bf8ddbccefbd3aa3547d7b960fb48e1bdac124e2dd13a2b71a/coverage-7.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2727d47fce3ee2bac648528e41455d1b0c46395a087a229deac75e9f88ba5a05", size = 219317 }, + { url = "https://files.pythonhosted.org/packages/97/54/47db81dcbe571a48a298f206183ba8a7ba79200a37cd0d9f4788fcd2af4a/coverage-7.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:0efa742f431529699712b92ecdf22de8ff198df41e43aeaaadf69973eb93f17a", size = 217948 }, + { url = "https://files.pythonhosted.org/packages/e5/8b/cb68425420154e7e2a82fd779a8cc01549b6fa83c2ad3679cd6c088ebd07/coverage-7.11.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:587c38849b853b157706407e9ebdca8fd12f45869edb56defbef2daa5fb0812b", size = 216837 }, + { url = "https://files.pythonhosted.org/packages/33/55/9d61b5765a025685e14659c8d07037247de6383c0385757544ffe4606475/coverage-7.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b971bdefdd75096163dd4261c74be813c4508477e39ff7b92191dea19f24cd37", size = 217061 }, + { url = "https://files.pythonhosted.org/packages/52/85/292459c9186d70dcec6538f06ea251bc968046922497377bf4a1dc9a71de/coverage-7.11.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:269bfe913b7d5be12ab13a95f3a76da23cf147be7fa043933320ba5625f0a8de", size = 258398 }, + { url = "https://files.pythonhosted.org/packages/1f/e2/46edd73fb8bf51446c41148d81944c54ed224854812b6ca549be25113ee0/coverage-7.11.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:dadbcce51a10c07b7c72b0ce4a25e4b6dcb0c0372846afb8e5b6307a121eb99f", size = 260574 }, + { url = "https://files.pythonhosted.org/packages/07/5e/1df469a19007ff82e2ca8fe509822820a31e251f80ee7344c34f6cd2ec43/coverage-7.11.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ed43fa22c6436f7957df036331f8fe4efa7af132054e1844918866cd228af6c", size = 262797 }, + { url = "https://files.pythonhosted.org/packages/f9/50/de216b31a1434b94d9b34a964c09943c6be45069ec704bfc379d8d89a649/coverage-7.11.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9516add7256b6713ec08359b7b05aeff8850c98d357784c7205b2e60aa2513fa", size = 257361 }, + { url = "https://files.pythonhosted.org/packages/82/1e/3f9f8344a48111e152e0fd495b6fff13cc743e771a6050abf1627a7ba918/coverage-7.11.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb92e47c92fcbcdc692f428da67db33337fa213756f7adb6a011f7b5a7a20740", size = 260349 }, + { url = "https://files.pythonhosted.org/packages/65/9b/3f52741f9e7d82124272f3070bbe316006a7de1bad1093f88d59bfc6c548/coverage-7.11.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d06f4fc7acf3cabd6d74941d53329e06bab00a8fe10e4df2714f0b134bfc64ef", size = 258114 }, + { url = "https://files.pythonhosted.org/packages/0b/8b/918f0e15f0365d50d3986bbd3338ca01178717ac5678301f3f547b6619e6/coverage-7.11.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:6fbcee1a8f056af07ecd344482f711f563a9eb1c2cad192e87df00338ec3cdb0", size = 256723 }, + { url = "https://files.pythonhosted.org/packages/44/9e/7776829f82d3cf630878a7965a7d70cc6ca94f22c7d20ec4944f7148cb46/coverage-7.11.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dbbf012be5f32533a490709ad597ad8a8ff80c582a95adc8d62af664e532f9ca", size = 259238 }, + { url = "https://files.pythonhosted.org/packages/9a/b8/49cf253e1e7a3bedb85199b201862dd7ca4859f75b6cf25ffa7298aa0760/coverage-7.11.0-cp313-cp313t-win32.whl", hash = "sha256:cee6291bb4fed184f1c2b663606a115c743df98a537c969c3c64b49989da96c2", size = 219180 }, + { url = "https://files.pythonhosted.org/packages/ac/e1/1a541703826be7ae2125a0fb7f821af5729d56bb71e946e7b933cc7a89a4/coverage-7.11.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a386c1061bf98e7ea4758e4313c0ab5ecf57af341ef0f43a0bf26c2477b5c268", size = 220241 }, + { url = "https://files.pythonhosted.org/packages/d5/d1/5ee0e0a08621140fd418ec4020f595b4d52d7eb429ae6a0c6542b4ba6f14/coverage-7.11.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f9ea02ef40bb83823b2b04964459d281688fe173e20643870bb5d2edf68bc836", size = 218510 }, + { url = "https://files.pythonhosted.org/packages/f4/06/e923830c1985ce808e40a3fa3eb46c13350b3224b7da59757d37b6ce12b8/coverage-7.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c770885b28fb399aaf2a65bbd1c12bf6f307ffd112d6a76c5231a94276f0c497", size = 216110 }, + { url = "https://files.pythonhosted.org/packages/42/82/cdeed03bfead45203fb651ed756dfb5266028f5f939e7f06efac4041dad5/coverage-7.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a3d0e2087dba64c86a6b254f43e12d264b636a39e88c5cc0a01a7c71bcfdab7e", size = 216395 }, + { url = "https://files.pythonhosted.org/packages/fc/ba/e1c80caffc3199aa699813f73ff097bc2df7b31642bdbc7493600a8f1de5/coverage-7.11.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:73feb83bb41c32811973b8565f3705caf01d928d972b72042b44e97c71fd70d1", size = 247433 }, + { url = "https://files.pythonhosted.org/packages/80/c0/5b259b029694ce0a5bbc1548834c7ba3db41d3efd3474489d7efce4ceb18/coverage-7.11.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c6f31f281012235ad08f9a560976cc2fc9c95c17604ff3ab20120fe480169bca", size = 249970 }, + { url = "https://files.pythonhosted.org/packages/8c/86/171b2b5e1aac7e2fd9b43f7158b987dbeb95f06d1fbecad54ad8163ae3e8/coverage-7.11.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9570ad567f880ef675673992222746a124b9595506826b210fbe0ce3f0499cd", size = 251324 }, + { url = "https://files.pythonhosted.org/packages/1a/7e/7e10414d343385b92024af3932a27a1caf75c6e27ee88ba211221ff1a145/coverage-7.11.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8badf70446042553a773547a61fecaa734b55dc738cacf20c56ab04b77425e43", size = 247445 }, + { url = "https://files.pythonhosted.org/packages/c4/3b/e4f966b21f5be8c4bf86ad75ae94efa0de4c99c7bbb8114476323102e345/coverage-7.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a09c1211959903a479e389685b7feb8a17f59ec5a4ef9afde7650bd5eabc2777", size = 249324 }, + { url = "https://files.pythonhosted.org/packages/00/a2/8479325576dfcd909244d0df215f077f47437ab852ab778cfa2f8bf4d954/coverage-7.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:5ef83b107f50db3f9ae40f69e34b3bd9337456c5a7fe3461c7abf8b75dd666a2", size = 247261 }, + { url = "https://files.pythonhosted.org/packages/7b/d8/3a9e2db19d94d65771d0f2e21a9ea587d11b831332a73622f901157cc24b/coverage-7.11.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:f91f927a3215b8907e214af77200250bb6aae36eca3f760f89780d13e495388d", size = 247092 }, + { url = "https://files.pythonhosted.org/packages/b3/b1/bbca3c472544f9e2ad2d5116b2379732957048be4b93a9c543fcd0207e5f/coverage-7.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cdbcd376716d6b7fbfeedd687a6c4be019c5a5671b35f804ba76a4c0a778cba4", size = 248755 }, + { url = "https://files.pythonhosted.org/packages/89/49/638d5a45a6a0f00af53d6b637c87007eb2297042186334e9923a61aa8854/coverage-7.11.0-cp314-cp314-win32.whl", hash = "sha256:bab7ec4bb501743edc63609320aaec8cd9188b396354f482f4de4d40a9d10721", size = 218793 }, + { url = "https://files.pythonhosted.org/packages/30/cc/b675a51f2d068adb3cdf3799212c662239b0ca27f4691d1fff81b92ea850/coverage-7.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d4ba9a449e9364a936a27322b20d32d8b166553bfe63059bd21527e681e2fad", size = 219587 }, + { url = "https://files.pythonhosted.org/packages/93/98/5ac886876026de04f00820e5094fe22166b98dcb8b426bf6827aaf67048c/coverage-7.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:ce37f215223af94ef0f75ac68ea096f9f8e8c8ec7d6e8c346ee45c0d363f0479", size = 218168 }, + { url = "https://files.pythonhosted.org/packages/14/d1/b4145d35b3e3ecf4d917e97fc8895bcf027d854879ba401d9ff0f533f997/coverage-7.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f413ce6e07e0d0dc9c433228727b619871532674b45165abafe201f200cc215f", size = 216850 }, + { url = "https://files.pythonhosted.org/packages/ca/d1/7f645fc2eccd318369a8a9948acc447bb7c1ade2911e31d3c5620544c22b/coverage-7.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:05791e528a18f7072bf5998ba772fe29db4da1234c45c2087866b5ba4dea710e", size = 217071 }, + { url = "https://files.pythonhosted.org/packages/54/7d/64d124649db2737ceced1dfcbdcb79898d5868d311730f622f8ecae84250/coverage-7.11.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cacb29f420cfeb9283b803263c3b9a068924474ff19ca126ba9103e1278dfa44", size = 258570 }, + { url = "https://files.pythonhosted.org/packages/6c/3f/6f5922f80dc6f2d8b2c6f974835c43f53eb4257a7797727e6ca5b7b2ec1f/coverage-7.11.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314c24e700d7027ae3ab0d95fbf8d53544fca1f20345fd30cd219b737c6e58d3", size = 260738 }, + { url = "https://files.pythonhosted.org/packages/0e/5f/9e883523c4647c860b3812b417a2017e361eca5b635ee658387dc11b13c1/coverage-7.11.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:630d0bd7a293ad2fc8b4b94e5758c8b2536fdf36c05f1681270203e463cbfa9b", size = 262994 }, + { url = "https://files.pythonhosted.org/packages/07/bb/43b5a8e94c09c8bf51743ffc65c4c841a4ca5d3ed191d0a6919c379a1b83/coverage-7.11.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e89641f5175d65e2dbb44db15fe4ea48fade5d5bbb9868fdc2b4fce22f4a469d", size = 257282 }, + { url = "https://files.pythonhosted.org/packages/aa/e5/0ead8af411411330b928733e1d201384b39251a5f043c1612970310e8283/coverage-7.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c9f08ea03114a637dab06cedb2e914da9dc67fa52c6015c018ff43fdde25b9c2", size = 260430 }, + { url = "https://files.pythonhosted.org/packages/ae/66/03dd8bb0ba5b971620dcaac145461950f6d8204953e535d2b20c6b65d729/coverage-7.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ce9f3bde4e9b031eaf1eb61df95c1401427029ea1bfddb8621c1161dcb0fa02e", size = 258190 }, + { url = "https://files.pythonhosted.org/packages/45/ae/28a9cce40bf3174426cb2f7e71ee172d98e7f6446dff936a7ccecee34b14/coverage-7.11.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:e4dc07e95495923d6fd4d6c27bf70769425b71c89053083843fd78f378558996", size = 256658 }, + { url = "https://files.pythonhosted.org/packages/5c/7c/3a44234a8599513684bfc8684878fd7b126c2760f79712bb78c56f19efc4/coverage-7.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:424538266794db2861db4922b05d729ade0940ee69dcf0591ce8f69784db0e11", size = 259342 }, + { url = "https://files.pythonhosted.org/packages/e1/e6/0108519cba871af0351725ebdb8660fd7a0fe2ba3850d56d32490c7d9b4b/coverage-7.11.0-cp314-cp314t-win32.whl", hash = "sha256:4c1eeb3fb8eb9e0190bebafd0462936f75717687117339f708f395fe455acc73", size = 219568 }, + { url = "https://files.pythonhosted.org/packages/c9/76/44ba876e0942b4e62fdde23ccb029ddb16d19ba1bef081edd00857ba0b16/coverage-7.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b56efee146c98dbf2cf5cffc61b9829d1e94442df4d7398b26892a53992d3547", size = 220687 }, + { url = "https://files.pythonhosted.org/packages/b9/0c/0df55ecb20d0d0ed5c322e10a441775e1a3a5d78c60f0c4e1abfe6fcf949/coverage-7.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:b5c2705afa83f49bd91962a4094b6b082f94aef7626365ab3f8f4bd159c5acf3", size = 218711 }, + { url = "https://files.pythonhosted.org/packages/5f/04/642c1d8a448ae5ea1369eac8495740a79eb4e581a9fb0cbdce56bbf56da1/coverage-7.11.0-py3-none-any.whl", hash = "sha256:4b7589765348d78fb4e5fb6ea35d07564e387da2fc5efff62e0222971f155f68", size = 207761 }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + +[[package]] +name = "dataframe-expectations" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "pandas" }, + { name = "pyspark" }, + { name = "tabulate" }, +] + +[package.dev-dependencies] +dev = [ + { name = "numpy" }, + { name = "pre-commit" }, + { name = "pytest" }, + { name = "pytest-cov" }, + { name = "ruff" }, +] +docs = [ + { name = "pandas" }, + { name = "pydata-sphinx-theme" }, + { name = "pyspark" }, + { name = "sphinx" }, + { name = "sphinx-autobuild" }, + { name = "tabulate" }, +] + +[package.metadata] +requires-dist = [ + { name = "pandas", specifier = ">=1.5.0" }, + { name = "pyspark", specifier = ">=3.3.0" }, + { name = "tabulate", specifier = ">=0.8.9" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "numpy", specifier = ">=1.21.0" }, + { name = "pre-commit", specifier = ">=2.20.0" }, + { name = "pytest", specifier = ">=7.0.0" }, + { name = "pytest-cov", specifier = ">=4.0.0" }, + { name = "ruff", specifier = ">=0.1.0" }, +] +docs = [ + { name = "pandas", specifier = ">=1.5.0" }, + { name = "pydata-sphinx-theme", specifier = ">=0.13.0" }, + { name = "pyspark", specifier = ">=3.3.0" }, + { name = "sphinx", specifier = ">=4.0.0" }, + { name = "sphinx-autobuild", specifier = ">=2021.3.14" }, + { name = "tabulate", specifier = ">=0.8.9" }, +] + +[[package]] +name = "distlib" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047 }, +] + +[[package]] +name = "docutils" +version = "0.21.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 }, +] + +[[package]] +name = "filelock" +version = "3.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "identify" +version = "2.6.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/e7/685de97986c916a6d93b3876139e00eef26ad5bbbd61925d670ae8013449/identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf", size = 99311 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/1c/e5fd8f973d4f375adb21565739498e2e9a1e54c858a97b9a8ccfdc81da9b/identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757", size = 99183 }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, +] + +[[package]] +name = "imagesize" +version = "1.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/4b/3541d44f3937ba468b75da9eebcae497dcf67adb65caa16760b0a6807ebb/markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559", size = 11631 }, + { url = "https://files.pythonhosted.org/packages/98/1b/fbd8eed11021cabd9226c37342fa6ca4e8a98d8188a8d9b66740494960e4/markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419", size = 12057 }, + { url = "https://files.pythonhosted.org/packages/40/01/e560d658dc0bb8ab762670ece35281dec7b6c1b33f5fbc09ebb57a185519/markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695", size = 22050 }, + { url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591", size = 20681 }, + { url = "https://files.pythonhosted.org/packages/c9/2a/b5c12c809f1c3045c4d580b035a743d12fcde53cf685dbc44660826308da/markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c", size = 20705 }, + { url = "https://files.pythonhosted.org/packages/cf/e3/9427a68c82728d0a88c50f890d0fc072a1484de2f3ac1ad0bfc1a7214fd5/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f", size = 21524 }, + { url = "https://files.pythonhosted.org/packages/bc/36/23578f29e9e582a4d0278e009b38081dbe363c5e7165113fad546918a232/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6", size = 20282 }, + { url = "https://files.pythonhosted.org/packages/56/21/dca11354e756ebd03e036bd8ad58d6d7168c80ce1fe5e75218e4945cbab7/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1", size = 20745 }, + { url = "https://files.pythonhosted.org/packages/87/99/faba9369a7ad6e4d10b6a5fbf71fa2a188fe4a593b15f0963b73859a1bbd/markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa", size = 14571 }, + { url = "https://files.pythonhosted.org/packages/d6/25/55dc3ab959917602c96985cb1253efaa4ff42f71194bddeb61eb7278b8be/markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8", size = 15056 }, + { url = "https://files.pythonhosted.org/packages/d0/9e/0a02226640c255d1da0b8d12e24ac2aa6734da68bff14c05dd53b94a0fc3/markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1", size = 13932 }, + { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631 }, + { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058 }, + { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287 }, + { url = "https://files.pythonhosted.org/packages/30/ac/0273f6fcb5f42e314c6d8cd99effae6a5354604d461b8d392b5ec9530a54/markupsafe-3.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf", size = 22940 }, + { url = "https://files.pythonhosted.org/packages/19/ae/31c1be199ef767124c042c6c3e904da327a2f7f0cd63a0337e1eca2967a8/markupsafe-3.0.3-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f", size = 21887 }, + { url = "https://files.pythonhosted.org/packages/b2/76/7edcab99d5349a4532a459e1fe64f0b0467a3365056ae550d3bcf3f79e1e/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a", size = 23692 }, + { url = "https://files.pythonhosted.org/packages/a4/28/6e74cdd26d7514849143d69f0bf2399f929c37dc2b31e6829fd2045b2765/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115", size = 21471 }, + { url = "https://files.pythonhosted.org/packages/62/7e/a145f36a5c2945673e590850a6f8014318d5577ed7e5920a4b3448e0865d/markupsafe-3.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a", size = 22923 }, + { url = "https://files.pythonhosted.org/packages/0f/62/d9c46a7f5c9adbeeeda52f5b8d802e1094e9717705a645efc71b0913a0a8/markupsafe-3.0.3-cp311-cp311-win32.whl", hash = "sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19", size = 14572 }, + { url = "https://files.pythonhosted.org/packages/83/8a/4414c03d3f891739326e1783338e48fb49781cc915b2e0ee052aa490d586/markupsafe-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01", size = 15077 }, + { url = "https://files.pythonhosted.org/packages/35/73/893072b42e6862f319b5207adc9ae06070f095b358655f077f69a35601f0/markupsafe-3.0.3-cp311-cp311-win_arm64.whl", hash = "sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c", size = 13876 }, + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 }, +] + +[[package]] +name = "numpy" +version = "2.2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245 }, + { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048 }, + { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542 }, + { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301 }, + { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320 }, + { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050 }, + { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034 }, + { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185 }, + { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149 }, + { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620 }, + { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963 }, + { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743 }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616 }, + { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579 }, + { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005 }, + { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570 }, + { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548 }, + { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521 }, + { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866 }, + { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455 }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348 }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362 }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103 }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382 }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462 }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618 }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511 }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783 }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506 }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190 }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828 }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006 }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765 }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736 }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719 }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072 }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213 }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632 }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532 }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885 }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467 }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144 }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217 }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014 }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935 }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122 }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143 }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260 }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225 }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374 }, + { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391 }, + { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754 }, + { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476 }, + { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666 }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469 }, +] + +[[package]] +name = "pandas" +version = "2.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763 }, + { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217 }, + { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791 }, + { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373 }, + { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444 }, + { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459 }, + { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086 }, + { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790 }, + { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831 }, + { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267 }, + { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281 }, + { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453 }, + { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361 }, + { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702 }, + { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846 }, + { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618 }, + { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212 }, + { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693 }, + { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002 }, + { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971 }, + { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722 }, + { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671 }, + { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807 }, + { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872 }, + { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371 }, + { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333 }, + { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120 }, + { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991 }, + { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227 }, + { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056 }, + { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189 }, + { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912 }, + { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160 }, + { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233 }, + { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635 }, + { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079 }, + { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049 }, + { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638 }, + { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834 }, + { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925 }, + { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071 }, + { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504 }, + { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702 }, + { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535 }, + { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582 }, + { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963 }, + { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175 }, +] + +[[package]] +name = "platformdirs" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "pre-commit" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965 }, +] + +[[package]] +name = "py4j" +version = "0.10.9.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/31/0b210511177070c8d5d3059556194352e5753602fa64b85b7ab81ec1a009/py4j-0.10.9.9.tar.gz", hash = "sha256:f694cad19efa5bd1dee4f3e5270eb406613c974394035e5bfc4ec1aba870b879", size = 761089 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/db/ea0203e495be491c85af87b66e37acfd3bf756fd985f87e46fc5e3bf022c/py4j-0.10.9.9-py2.py3-none-any.whl", hash = "sha256:c7c26e4158defb37b0bb124933163641a2ff6e3a3913f7811b0ddbe07ed61533", size = 203008 }, +] + +[[package]] +name = "pydata-sphinx-theme" +version = "0.16.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "accessible-pygments" }, + { name = "babel" }, + { name = "beautifulsoup4" }, + { name = "docutils" }, + { name = "pygments" }, + { name = "sphinx" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/20/bb50f9de3a6de69e6abd6b087b52fa2418a0418b19597601605f855ad044/pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7", size = 2412693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pyspark" +version = "4.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "py4j" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/40/1414582f16c1d7b051c668c2e19c62d21a18bd181d944cb24f5ddbb2423f/pyspark-4.0.1.tar.gz", hash = "sha256:9d1f22d994f60369228397e3479003ffe2dd736ba79165003246ff7bd48e2c73", size = 434204896 } + +[[package]] +name = "pytest" +version = "8.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750 }, +] + +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227 }, + { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019 }, + { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646 }, + { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793 }, + { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293 }, + { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872 }, + { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828 }, + { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415 }, + { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561 }, + { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826 }, + { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577 }, + { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556 }, + { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114 }, + { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638 }, + { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463 }, + { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986 }, + { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543 }, + { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763 }, + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, +] + +[[package]] +name = "ruff" +version = "0.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/58/6ca66896635352812de66f71cdf9ff86b3a4f79071ca5730088c0cd0fc8d/ruff-0.14.1.tar.gz", hash = "sha256:1dd86253060c4772867c61791588627320abcb6ed1577a90ef432ee319729b69", size = 5513429 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/39/9cc5ab181478d7a18adc1c1e051a84ee02bec94eb9bdfd35643d7c74ca31/ruff-0.14.1-py3-none-linux_armv6l.whl", hash = "sha256:083bfc1f30f4a391ae09c6f4f99d83074416b471775b59288956f5bc18e82f8b", size = 12445415 }, + { url = "https://files.pythonhosted.org/packages/ef/2e/1226961855ccd697255988f5a2474890ac7c5863b080b15bd038df820818/ruff-0.14.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:f6fa757cd717f791009f7669fefb09121cc5f7d9bd0ef211371fad68c2b8b224", size = 12784267 }, + { url = "https://files.pythonhosted.org/packages/c1/ea/fd9e95863124ed159cd0667ec98449ae461de94acda7101f1acb6066da00/ruff-0.14.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d6191903d39ac156921398e9c86b7354d15e3c93772e7dbf26c9fcae59ceccd5", size = 11781872 }, + { url = "https://files.pythonhosted.org/packages/1e/5a/e890f7338ff537dba4589a5e02c51baa63020acfb7c8cbbaea4831562c96/ruff-0.14.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed04f0e04f7a4587244e5c9d7df50e6b5bf2705d75059f409a6421c593a35896", size = 12226558 }, + { url = "https://files.pythonhosted.org/packages/a6/7a/8ab5c3377f5bf31e167b73651841217542bcc7aa1c19e83030835cc25204/ruff-0.14.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5c9e6cf6cd4acae0febbce29497accd3632fe2025c0c583c8b87e8dbdeae5f61", size = 12187898 }, + { url = "https://files.pythonhosted.org/packages/48/8d/ba7c33aa55406955fc124e62c8259791c3d42e3075a71710fdff9375134f/ruff-0.14.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6fa2458527794ecdfbe45f654e42c61f2503a230545a91af839653a0a93dbc6", size = 12939168 }, + { url = "https://files.pythonhosted.org/packages/b4/c2/70783f612b50f66d083380e68cbd1696739d88e9b4f6164230375532c637/ruff-0.14.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:39f1c392244e338b21d42ab29b8a6392a722c5090032eb49bb4d6defcdb34345", size = 14386942 }, + { url = "https://files.pythonhosted.org/packages/48/44/cd7abb9c776b66d332119d67f96acf15830d120f5b884598a36d9d3f4d83/ruff-0.14.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7382fa12a26cce1f95070ce450946bec357727aaa428983036362579eadcc5cf", size = 13990622 }, + { url = "https://files.pythonhosted.org/packages/eb/56/4259b696db12ac152fe472764b4f78bbdd9b477afd9bc3a6d53c01300b37/ruff-0.14.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0bf2be3ae8521e1093a487c4aa3b455882f139787770698530d28ed3fbb37c", size = 13431143 }, + { url = "https://files.pythonhosted.org/packages/e0/35/266a80d0eb97bd224b3265b9437bd89dde0dcf4faf299db1212e81824e7e/ruff-0.14.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cabcaa9ccf8089fb4fdb78d17cc0e28241520f50f4c2e88cb6261ed083d85151", size = 13132844 }, + { url = "https://files.pythonhosted.org/packages/65/6e/d31ce218acc11a8d91ef208e002a31acf315061a85132f94f3df7a252b18/ruff-0.14.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:747d583400f6125ec11a4c14d1c8474bf75d8b419ad22a111a537ec1a952d192", size = 13401241 }, + { url = "https://files.pythonhosted.org/packages/9f/b5/dbc4221bf0b03774b3b2f0d47f39e848d30664157c15b965a14d890637d2/ruff-0.14.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5a6e74c0efd78515a1d13acbfe6c90f0f5bd822aa56b4a6d43a9ffb2ae6e56cd", size = 12132476 }, + { url = "https://files.pythonhosted.org/packages/98/4b/ac99194e790ccd092d6a8b5f341f34b6e597d698e3077c032c502d75ea84/ruff-0.14.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0ea6a864d2fb41a4b6d5b456ed164302a0d96f4daac630aeba829abfb059d020", size = 12139749 }, + { url = "https://files.pythonhosted.org/packages/47/26/7df917462c3bb5004e6fdfcc505a49e90bcd8a34c54a051953118c00b53a/ruff-0.14.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:0826b8764f94229604fa255918d1cc45e583e38c21c203248b0bfc9a0e930be5", size = 12544758 }, + { url = "https://files.pythonhosted.org/packages/64/d0/81e7f0648e9764ad9b51dd4be5e5dac3fcfff9602428ccbae288a39c2c22/ruff-0.14.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cbc52160465913a1a3f424c81c62ac8096b6a491468e7d872cb9444a860bc33d", size = 13221811 }, + { url = "https://files.pythonhosted.org/packages/c3/07/3c45562c67933cc35f6d5df4ca77dabbcd88fddaca0d6b8371693d29fd56/ruff-0.14.1-py3-none-win32.whl", hash = "sha256:e037ea374aaaff4103240ae79168c0945ae3d5ae8db190603de3b4012bd1def6", size = 12319467 }, + { url = "https://files.pythonhosted.org/packages/02/88/0ee4ca507d4aa05f67e292d2e5eb0b3e358fbcfe527554a2eda9ac422d6b/ruff-0.14.1-py3-none-win_amd64.whl", hash = "sha256:59d599cdff9c7f925a017f6f2c256c908b094e55967f93f2821b1439928746a1", size = 13401123 }, + { url = "https://files.pythonhosted.org/packages/b8/81/4b6387be7014858d924b843530e1b2a8e531846807516e9bea2ee0936bf7/ruff-0.14.1-py3-none-win_arm64.whl", hash = "sha256:e3b443c4c9f16ae850906b8d0a707b2a4c16f8d2f0a7fe65c475c5886665ce44", size = 12436636 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "snowballstemmer" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274 }, +] + +[[package]] +name = "soupsieve" +version = "2.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679 }, +] + +[[package]] +name = "sphinx" +version = "8.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "alabaster" }, + { name = "babel" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "docutils" }, + { name = "imagesize" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pygments" }, + { name = "requests" }, + { name = "snowballstemmer" }, + { name = "sphinxcontrib-applehelp" }, + { name = "sphinxcontrib-devhelp" }, + { name = "sphinxcontrib-htmlhelp" }, + { name = "sphinxcontrib-jsmath" }, + { name = "sphinxcontrib-qthelp" }, + { name = "sphinxcontrib-serializinghtml" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125 }, +] + +[[package]] +name = "sphinx-autobuild" +version = "2024.10.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama" }, + { name = "sphinx" }, + { name = "starlette" }, + { name = "uvicorn" }, + { name = "watchfiles" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/2c/155e1de2c1ba96a72e5dba152c509a8b41e047ee5c2def9e9f0d812f8be7/sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1", size = 14023 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa", size = 11908 }, +] + +[[package]] +name = "sphinxcontrib-applehelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 }, +] + +[[package]] +name = "sphinxcontrib-devhelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 }, +] + +[[package]] +name = "sphinxcontrib-htmlhelp" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, +] + +[[package]] +name = "sphinxcontrib-jsmath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 }, +] + +[[package]] +name = "sphinxcontrib-qthelp" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 }, +] + +[[package]] +name = "sphinxcontrib-serializinghtml" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 }, +] + +[[package]] +name = "starlette" +version = "0.48.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736 }, +] + +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 }, +] + +[[package]] +name = "tomli" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236 }, + { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084 }, + { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832 }, + { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052 }, + { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555 }, + { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128 }, + { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445 }, + { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165 }, + { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891 }, + { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796 }, + { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121 }, + { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070 }, + { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859 }, + { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296 }, + { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124 }, + { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698 }, + { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819 }, + { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766 }, + { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771 }, + { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586 }, + { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792 }, + { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909 }, + { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946 }, + { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705 }, + { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244 }, + { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637 }, + { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925 }, + { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045 }, + { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835 }, + { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109 }, + { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930 }, + { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964 }, + { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065 }, + { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088 }, + { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193 }, + { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488 }, + { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669 }, + { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709 }, + { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563 }, + { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756 }, + { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] + +[[package]] +name = "urllib3" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 }, +] + +[[package]] +name = "uvicorn" +version = "0.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109 }, +] + +[[package]] +name = "virtualenv" +version = "20.35.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/d5/b0ccd381d55c8f45d46f77df6ae59fbc23d19e901e2d523395598e5f4c93/virtualenv-20.35.3.tar.gz", hash = "sha256:4f1a845d131133bdff10590489610c98c168ff99dc75d6c96853801f7f67af44", size = 6002907 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/73/d9a94da0e9d470a543c1b9d3ccbceb0f59455983088e727b8a1824ed90fb/virtualenv-20.35.3-py3-none-any.whl", hash = "sha256:63d106565078d8c8d0b206d48080f938a8b25361e19432d2c9db40d2899c810a", size = 5981061 }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/1a/206e8cf2dd86fddf939165a57b4df61607a1e0add2785f170a3f616b7d9f/watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c", size = 407318 }, + { url = "https://files.pythonhosted.org/packages/b3/0f/abaf5262b9c496b5dad4ed3c0e799cbecb1f8ea512ecb6ddd46646a9fca3/watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43", size = 394478 }, + { url = "https://files.pythonhosted.org/packages/b1/04/9cc0ba88697b34b755371f5ace8d3a4d9a15719c07bdc7bd13d7d8c6a341/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31", size = 449894 }, + { url = "https://files.pythonhosted.org/packages/d2/9c/eda4615863cd8621e89aed4df680d8c3ec3da6a4cf1da113c17decd87c7f/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac", size = 459065 }, + { url = "https://files.pythonhosted.org/packages/84/13/f28b3f340157d03cbc8197629bc109d1098764abe1e60874622a0be5c112/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d", size = 488377 }, + { url = "https://files.pythonhosted.org/packages/86/93/cfa597fa9389e122488f7ffdbd6db505b3b915ca7435ecd7542e855898c2/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d", size = 595837 }, + { url = "https://files.pythonhosted.org/packages/57/1e/68c1ed5652b48d89fc24d6af905d88ee4f82fa8bc491e2666004e307ded1/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863", size = 473456 }, + { url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab", size = 455614 }, + { url = "https://files.pythonhosted.org/packages/61/a5/3d782a666512e01eaa6541a72ebac1d3aae191ff4a31274a66b8dd85760c/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82", size = 630690 }, + { url = "https://files.pythonhosted.org/packages/9b/73/bb5f38590e34687b2a9c47a244aa4dd50c56a825969c92c9c5fc7387cea1/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4", size = 622459 }, + { url = "https://files.pythonhosted.org/packages/f1/ac/c9bb0ec696e07a20bd58af5399aeadaef195fb2c73d26baf55180fe4a942/watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844", size = 272663 }, + { url = "https://files.pythonhosted.org/packages/11/a0/a60c5a7c2ec59fa062d9a9c61d02e3b6abd94d32aac2d8344c4bdd033326/watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e", size = 287453 }, + { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529 }, + { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384 }, + { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789 }, + { url = "https://files.pythonhosted.org/packages/4a/24/33e71113b320030011c8e4316ccca04194bf0cbbaeee207f00cbc7d6b9f5/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b", size = 460521 }, + { url = "https://files.pythonhosted.org/packages/f4/c3/3c9a55f255aa57b91579ae9e98c88704955fa9dac3e5614fb378291155df/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14", size = 488722 }, + { url = "https://files.pythonhosted.org/packages/49/36/506447b73eb46c120169dc1717fe2eff07c234bb3232a7200b5f5bd816e9/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d", size = 596088 }, + { url = "https://files.pythonhosted.org/packages/82/ab/5f39e752a9838ec4d52e9b87c1e80f1ee3ccdbe92e183c15b6577ab9de16/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff", size = 472923 }, + { url = "https://files.pythonhosted.org/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606", size = 456080 }, + { url = "https://files.pythonhosted.org/packages/b0/c3/d5932fd62bde1a30c36e10c409dc5d54506726f08cb3e1d8d0ba5e2bc8db/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701", size = 629432 }, + { url = "https://files.pythonhosted.org/packages/f7/77/16bddd9779fafb795f1a94319dc965209c5641db5bf1edbbccace6d1b3c0/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10", size = 623046 }, + { url = "https://files.pythonhosted.org/packages/46/ef/f2ecb9a0f342b4bfad13a2787155c6ee7ce792140eac63a34676a2feeef2/watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849", size = 271473 }, + { url = "https://files.pythonhosted.org/packages/94/bc/f42d71125f19731ea435c3948cad148d31a64fccde3867e5ba4edee901f9/watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4", size = 287598 }, + { url = "https://files.pythonhosted.org/packages/57/c9/a30f897351f95bbbfb6abcadafbaca711ce1162f4db95fc908c98a9165f3/watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e", size = 277210 }, + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745 }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769 }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374 }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485 }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813 }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816 }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186 }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812 }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196 }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657 }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042 }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410 }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209 }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321 }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783 }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279 }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405 }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976 }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506 }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936 }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147 }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007 }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280 }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056 }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162 }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909 }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389 }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964 }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114 }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264 }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877 }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176 }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577 }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425 }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826 }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208 }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315 }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869 }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919 }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845 }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027 }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615 }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836 }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099 }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626 }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519 }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078 }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664 }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154 }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820 }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510 }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408 }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968 }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096 }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040 }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847 }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072 }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104 }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112 }, + { url = "https://files.pythonhosted.org/packages/ba/4c/a888c91e2e326872fa4705095d64acd8aa2fb9c1f7b9bd0588f33850516c/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3", size = 409611 }, + { url = "https://files.pythonhosted.org/packages/1e/c7/5420d1943c8e3ce1a21c0a9330bcf7edafb6aa65d26b21dbb3267c9e8112/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2", size = 396889 }, + { url = "https://files.pythonhosted.org/packages/0c/e5/0072cef3804ce8d3aaddbfe7788aadff6b3d3f98a286fdbee9fd74ca59a7/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d", size = 451616 }, + { url = "https://files.pythonhosted.org/packages/83/4e/b87b71cbdfad81ad7e83358b3e447fedd281b880a03d64a760fe0a11fc2e/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b", size = 458413 }, + { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250 }, + { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117 }, + { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493 }, + { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546 }, +] + +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423 }, + { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080 }, + { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329 }, + { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312 }, + { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319 }, + { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631 }, + { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016 }, + { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426 }, + { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360 }, + { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388 }, + { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830 }, + { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 }, + { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 }, + { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 }, + { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 }, + { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 }, + { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 }, + { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 }, + { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 }, + { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 }, + { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 }, + { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 }, + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 }, + { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109 }, + { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343 }, + { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599 }, + { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207 }, + { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155 }, + { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884 }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 }, +]