@@ -21,6 +21,14 @@ standardized solution for this use case. As a result, any contributions made her
2121pip install dataframe-expectations
2222```
2323
24+ ### Requirements
25+
26+ * Python 3.10+
27+ * pandas >= 1.5.0
28+ * pydantic >= 2.12.4
29+ * pyspark >= 3.3.0
30+ * tabulate >= 0.8.9
31+
2432### Development setup
2533
2634To set up the development environment:
@@ -45,67 +53,127 @@ uv run pytest tests/ --cov=dataframe_expectations
4553
4654### Using the library
4755
48- ** Pandas example :**
56+ ** Basic usage with Pandas :**
4957``` python
50- from dataframe_expectations.expectations_suite import DataFameExpectationsSuite
58+ from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
59+ import pandas as pd
5160
61+ # Build a suite with expectations
5262suite = (
5363 DataFrameExpectationsSuite()
54- .expect_value_greater_than(" age" , 18 )
55- .expect_value_less_than(" age" , 10 )
64+ .expect_min_rows(min_rows = 3 )
65+ .expect_max_rows(max_rows = 10 )
66+ .expect_value_greater_than(column_name = " age" , value = 18 )
67+ .expect_value_less_than(column_name = " salary" , value = 100000 )
68+ .expect_value_not_null(column_name = " name" )
5669)
5770
58- # Create a Pandas DataFrame
59- import pandas as pd
60- test_pandas_df = pd.DataFrame({" age" : [20 , 15 , 30 ], " name" : [" Alice" , " Bob" , " Charlie" ]})
61-
62- suite.run(test_pandas_df)
71+ # Create a runner
72+ runner = suite.build()
6373
74+ # Validate a DataFrame
75+ df = pd.DataFrame({
76+ " age" : [25 , 15 , 45 , 22 ],
77+ " name" : [" Alice" , " Bob" , " Charlie" , " Diana" ],
78+ " salary" : [50000 , 60000 , 80000 , 45000 ]
79+ })
80+ runner.run(df)
6481```
6582
66-
6783** PySpark example:**
6884``` python
6985from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
7086
87+ # Build a validation suite (same API as Pandas!)
7188suite = (
7289 DataFrameExpectationsSuite()
73- .expect_value_greater_than(" age" , 18 )
74- .expect_value_less_than(" age" , 40 )
90+ .expect_min_rows(min_rows = 3 )
91+ .expect_max_rows(max_rows = 10 )
92+ .expect_value_greater_than(column_name = " age" , value = 18 )
93+ .expect_value_less_than(column_name = " salary" , value = 100000 )
94+ .expect_value_not_null(column_name = " name" )
7595)
7696
97+ # Build the runner
98+ runner = suite.build()
99+
77100# Create a PySpark DataFrame
78- test_spark_df = spark.createDataFrame(
79- [
80- {" name" : " Alice" , " age" : 20 },
81- {" name" : " Bob" , " age" : 15 },
82- {" name" : " Charlie" , " age" : 30 },
83- ]
84- )
101+ data = [
102+ {" age" : 25 , " name" : " Alice" , " salary" : 50000 },
103+ {" age" : 15 , " name" : " Bob" , " salary" : 60000 },
104+ {" age" : 45 , " name" : " Charlie" , " salary" : 80000 },
105+ {" age" : 22 , " name" : " Diana" , " salary" : 45000 }
106+ ]
107+ df = spark.createDataFrame(data)
108+
109+ # Validate
110+ runner.run(df)
111+ ```
112+
113+ ** Decorator pattern for automatic validation:**
114+ ``` python
115+ from dataframe_expectations.expectations_suite import DataFrameExpectationsSuite
116+ import pandas as pd
85117
86- suite.run(test_spark_df)
118+ suite = (
119+ DataFrameExpectationsSuite()
120+ .expect_min_rows(min_rows = 3 )
121+ .expect_max_rows(max_rows = 10 )
122+ .expect_value_greater_than(column_name = " age" , value = 18 )
123+ .expect_value_less_than(column_name = " salary" , value = 100000 )
124+ .expect_value_not_null(column_name = " name" )
125+ )
87126
127+ # Build the runner
128+ runner = suite.build()
129+
130+ # Apply decorator to automatically validate function output
131+ @runner.validate
132+ def load_employee_data ():
133+ """ Load and return employee data - automatically validated."""
134+ return spark.createDataFrame(
135+ [
136+ {" age" : 25 , " name" : " Alice" , " salary" : 50000 },
137+ {" age" : 15 , " name" : " Bob" , " salary" : 60000 },
138+ {" age" : 45 , " name" : " Charlie" , " salary" : 80000 },
139+ {" age" : 22 , " name" : " Diana" , " salary" : 45000 }
140+ ]
141+ )
142+
143+ # Function execution automatically validates the returned DataFrame
144+ df = load_employee_data() # Raises DataFrameExpectationsSuiteFailure if validation fails
145+
146+ # Allow functions that may return None
147+ @runner.validate (allow_none = True )
148+ def conditional_load (should_load : bool ):
149+ """ Conditionally load data - validation only runs when DataFrame is returned."""
150+ if should_load:
151+ return spark.createDataFrame([{" age" : 25 , " name" : " Alice" , " salary" : 50000 }])
152+ return None # No validation when None is returned
88153```
89154
90155** Output:**
91156``` python
92157========================== Running expectations suite ==========================
93- ExpectationValueGreaterThan (' age' greater than 18 ) ... FAIL
94- ExpectationValueLessThan (' age' less than 40 ) ... OK
95- ============================ 1 success, 1 failures ============================ =
158+ ExpectationMinRows (DataFrame contains at least 3 rows) ... OK
159+ ExpectationMaxRows (DataFrame contains at most 10 rows) ... OK
160+ ExpectationValueGreaterThan (' age' is greater than 18 ) ... FAIL
161+ ExpectationValueLessThan (' salary' is less than 100000 ) ... OK
162+ ExpectationValueNotNull (' name' is not null) ... OK
163+ ============================ 4 success, 1 failures ============================ =
96164
97- ExpectationSuiteFailure: (1 / 2 ) expectations failed.
165+ ExpectationSuiteFailure: (1 / 5 ) expectations failed.
98166
99167================================================================================
100168List of violations:
101169--------------------------------------------------------------------------------
102- [Failed 1 / 1 ] ExpectationValueGreaterThan (' age' greater than 18 ): Found 1 row(s) where ' age' is not greater than 18 .
170+ [Failed 1 / 1 ] ExpectationValueGreaterThan (' age' is greater than 18 ): Found 1 row(s) where ' age' is not greater than 18 .
103171Some examples of violations:
104- + ---- -+ ------ +
105- | age | name |
106- + ---- -+ ------ +
107- | 15 | Bob |
108- + ---- -+ ------ +
172+ + ---- -+ ------ + -------- +
173+ | age | name | salary |
174+ + ---- -+ ------ + -------- +
175+ | 15 | Bob | 60000 |
176+ + ---- -+ ------ + -------- +
109177================================================================================
110178
111179```
0 commit comments