Skip to content

Commit 2b0cf25

Browse files
committed
docs: add Spark session initialization to PySpark examples and update author info
- Add SparkSession initialization to PySpark and decorator examples in README.md and getting_started.rst - Update author name from "Data Products GYG" to "Data Products GetYourGuide" in pyproject.toml and conf.py - Add expectation_count property and list_expectations() method to DataFrameExpectationsSuiteRunner - Improve test_builder_pattern_immutability test to verify expectation counts and names - Fix type checker error by making allow_none keyword-only parameter in validate decorator
1 parent 66cf5a4 commit 2b0cf25

File tree

6 files changed

+47
-5
lines changed

6 files changed

+47
-5
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ runner.run(df)
8383
**PySpark example:**
8484
```python
8585
from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
86+
from pyspark.sql import SparkSession
87+
88+
# Initialize Spark session
89+
spark = SparkSession.builder.appName("example").getOrCreate()
8690

8791
# Build a validation suite (same API as Pandas!)
8892
suite = (
@@ -113,7 +117,10 @@ runner.run(df)
113117
**Decorator pattern for automatic validation:**
114118
```python
115119
from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
116-
import pandas as pd
120+
from pyspark.sql import SparkSession
121+
122+
# Initialize Spark session
123+
spark = SparkSession.builder.appName("example").getOrCreate()
117124

118125
suite = (
119126
DataFrameExpectationsSuite()

dataframe_expectations/expectations_suite.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,20 @@ def __init__(self, expectations: List):
6161
"""
6262
self.__expectations = tuple(expectations) # Immutable tuple
6363

64+
@property
65+
def expectation_count(self) -> int:
66+
"""Return the number of expectations in this runner."""
67+
return len(self.__expectations)
68+
69+
def list_expectations(self) -> List[str]:
70+
"""
71+
Return a list of expectation descriptions in this runner.
72+
73+
:return: List of expectation descriptions as strings in the format:
74+
"ExpectationName (description)"
75+
"""
76+
return [f"{exp}" for exp in self.__expectations]
77+
6478
def run(
6579
self,
6680
data_frame: DataFrameLike,

docs/source/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
# Project information
1919
project = 'DataFrame Expectations'
20-
copyright = '2025, Data Products GYG'
21-
author = 'Data Products GYG'
20+
copyright = '2025, GetYourGuide'
21+
author = 'Data Products GetYourGuide'
2222

2323
# Extensions
2424
extensions = [

docs/source/getting_started.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ PySpark Example
6262
.. code-block:: python
6363
6464
from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
65+
from pyspark.sql import SparkSession
66+
67+
# Initialize Spark session
68+
spark = SparkSession.builder.appName("example").getOrCreate()
6569
6670
# Build a validation suite (same API as Pandas!)
6771
suite = (
@@ -93,8 +97,11 @@ Decorator Pattern for Automatic Validation
9397

9498
.. code-block:: python
9599
96-
import pandas as pd
97100
from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
101+
from pyspark.sql import SparkSession
102+
103+
# Initialize Spark session
104+
spark = SparkSession.builder.appName("example").getOrCreate()
98105
99106
suite = (
100107
DataFrameExpectationsSuite()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description = "Python library designed to validate Pandas and PySpark DataFrames
55
readme = "README.md"
66
requires-python = ">=3.10"
77
authors = [
8-
{name = "Data Products GYG", email = "[email protected]"}
8+
{name = "Data Products GetYourGuide", email = "[email protected]"}
99
]
1010

1111
dependencies = [

tests/test_expectations_suite.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,26 @@ def test_builder_pattern_immutability():
207207
# Build first runner
208208
runner1 = suite.build()
209209

210+
# Verify runner1 has exactly 1 expectation
211+
assert runner1.expectation_count == 1, "Runner1 should have 1 expectation"
212+
expectations_list = runner1.list_expectations()
213+
assert len(expectations_list) == 1
214+
assert expectations_list[0] == "ExpectationValueGreaterThan ('col1' is greater than 5)"
215+
210216
# Add more expectations to suite
211217
suite.expect_value_less_than(column_name="col1", value=20)
212218

213219
# Build second runner
214220
runner2 = suite.build()
215221

222+
# Verify runner2 has 2 expectations but runner1 is unchanged
223+
assert runner1.expectation_count == 1, "Runner1 should still have 1 expectation (immutable)"
224+
assert runner2.expectation_count == 2, "Runner2 should have 2 expectations"
225+
expectations_list2 = runner2.list_expectations()
226+
assert len(expectations_list2) == 2
227+
assert expectations_list2[0] == "ExpectationValueGreaterThan ('col1' is greater than 5)"
228+
assert expectations_list2[1] == "ExpectationValueLessThan ('col1' is less than 20)"
229+
216230
# Test data
217231
df = pd.DataFrame({"col1": [10, 15]})
218232

0 commit comments

Comments
 (0)