|
9 | 9 | DataFrameAggregationExpectation, |
10 | 10 | ) |
11 | 11 | from dataframe_expectations.expectations.expectation_registry import ( |
| 12 | + ExpectationCategory, |
| 13 | + ExpectationSubcategory, |
12 | 14 | register_expectation, |
13 | 15 | ) |
14 | 16 | from dataframe_expectations.expectations.utils import requires_params |
@@ -36,8 +38,7 @@ def __init__(self, min_rows: int): |
36 | 38 | """ |
37 | 39 | Initialize the minimum rows expectation. |
38 | 40 |
|
39 | | - Args: |
40 | | - min_rows (int): Minimum number of rows required (inclusive). |
| 41 | + :param min_rows: Minimum number of rows required (inclusive). |
41 | 42 | """ |
42 | 43 | if min_rows < 0: |
43 | 44 | raise ValueError(f"min_rows must be non-negative, got {min_rows}") |
@@ -122,8 +123,7 @@ def __init__(self, max_rows: int): |
122 | 123 | """ |
123 | 124 | Initialize the maximum rows expectation. |
124 | 125 |
|
125 | | - Args: |
126 | | - max_rows (int): Maximum number of rows allowed (inclusive). |
| 126 | + :param max_rows: Maximum number of rows allowed (inclusive). |
127 | 127 | """ |
128 | 128 | if max_rows < 0: |
129 | 129 | raise ValueError(f"max_rows must be non-negative, got {max_rows}") |
@@ -212,9 +212,8 @@ def __init__(self, column_name: str, max_percentage: float): |
212 | 212 | """ |
213 | 213 | Initialize the maximum null percentage expectation. |
214 | 214 |
|
215 | | - Args: |
216 | | - column_name (str): Name of the column to check for null percentage. |
217 | | - max_percentage (float): Maximum percentage of null values allowed (0.0-100.0). |
| 215 | + :param column_name: Name of the column to check for null percentage. |
| 216 | + :param max_percentage: Maximum percentage of null values allowed (0.0-100.0). |
218 | 217 | """ |
219 | 218 | if not 0 <= max_percentage <= 100: |
220 | 219 | raise ValueError(f"max_percentage must be between 0.0 and 100.0, got {max_percentage}") |
@@ -330,9 +329,8 @@ def __init__(self, column_name: str, max_count: int): |
330 | 329 | """ |
331 | 330 | Initialize the maximum null count expectation. |
332 | 331 |
|
333 | | - Args: |
334 | | - column_name (str): Name of the column to check for null count. |
335 | | - max_count (int): Maximum number of null values allowed. |
| 332 | + :param column_name: Name of the column to check for null count. |
| 333 | + :param max_count: Maximum number of null values allowed. |
336 | 334 | """ |
337 | 335 | if max_count < 0: |
338 | 336 | raise ValueError(f"max_count must be non-negative, got {max_count}") |
@@ -414,77 +412,101 @@ def aggregate_and_validate_pyspark( |
414 | 412 |
|
415 | 413 |
|
416 | 414 | # Factory functions for the registry |
417 | | -@register_expectation("ExpectationMinRows") |
| 415 | +@register_expectation( |
| 416 | + "ExpectationMinRows", |
| 417 | + pydoc="Check if the DataFrame has at least a minimum number of rows", |
| 418 | + category=ExpectationCategory.DATAFRAME_AGGREGATION_EXPECTATIONS, |
| 419 | + subcategory=ExpectationSubcategory.ANY_VALUE, |
| 420 | + params_doc={ |
| 421 | + "min_rows": "The minimum number of rows expected", |
| 422 | + }, |
| 423 | +) |
418 | 424 | @requires_params("min_rows", types={"min_rows": int}) |
419 | | -def create_expectation_min_rows(**kwargs) -> ExpectationMinRows: |
| 425 | +def create_expectation_min_rows(min_rows: int) -> ExpectationMinRows: |
420 | 426 | """ |
421 | 427 | Create an ExpectMinRows instance. |
422 | 428 |
|
423 | | - Args: |
424 | | - min_rows (int): Minimum number of rows required. |
425 | | -
|
426 | | - Returns: |
427 | | - ExpectationMinRows: A configured expectation instance. |
| 429 | + :param min_rows: Minimum number of rows required. |
| 430 | + :return: A configured expectation instance. |
428 | 431 | """ |
429 | | - return ExpectationMinRows(min_rows=kwargs["min_rows"]) |
| 432 | + return ExpectationMinRows(min_rows=min_rows) |
430 | 433 |
|
431 | 434 |
|
432 | | -@register_expectation("ExpectationMaxRows") |
| 435 | +@register_expectation( |
| 436 | + "ExpectationMaxRows", |
| 437 | + pydoc="Check if the DataFrame has at most a maximum number of rows", |
| 438 | + category=ExpectationCategory.DATAFRAME_AGGREGATION_EXPECTATIONS, |
| 439 | + subcategory=ExpectationSubcategory.ANY_VALUE, |
| 440 | + params_doc={ |
| 441 | + "max_rows": "The maximum number of rows expected", |
| 442 | + }, |
| 443 | +) |
433 | 444 | @requires_params("max_rows", types={"max_rows": int}) |
434 | | -def create_expectation_max_rows(**kwargs) -> ExpectationMaxRows: |
| 445 | +def create_expectation_max_rows(max_rows: int) -> ExpectationMaxRows: |
435 | 446 | """ |
436 | 447 | Create an ExpectationMaxRows instance. |
437 | 448 |
|
438 | | - Args: |
439 | | - max_rows (int): Maximum number of rows allowed. |
440 | | -
|
441 | | - Returns: |
442 | | - ExpectationMaxRows: A configured expectation instance. |
| 449 | + :param max_rows: Maximum number of rows allowed. |
| 450 | + :return: A configured expectation instance. |
443 | 451 | """ |
444 | | - return ExpectationMaxRows(max_rows=kwargs["max_rows"]) |
445 | | - |
446 | | - |
447 | | -@register_expectation("ExpectationMaxNullPercentage") |
| 452 | + return ExpectationMaxRows(max_rows=max_rows) |
| 453 | + |
| 454 | + |
| 455 | +@register_expectation( |
| 456 | + "ExpectationMaxNullPercentage", |
| 457 | + pydoc="Check if the percentage of null/NaN values in a specific column is below a threshold", |
| 458 | + category=ExpectationCategory.COLUMN_AGGREGATION_EXPECTATIONS, |
| 459 | + subcategory=ExpectationSubcategory.ANY_VALUE, |
| 460 | + params_doc={ |
| 461 | + "column_name": "The name of the column to check for null percentage", |
| 462 | + "max_percentage": "The maximum allowed percentage of null/NaN values (0.0 to 100.0)", |
| 463 | + }, |
| 464 | +) |
448 | 465 | @requires_params( |
449 | 466 | "column_name", |
450 | 467 | "max_percentage", |
451 | 468 | types={"column_name": str, "max_percentage": (int, float)}, |
452 | 469 | ) |
453 | | -def create_expectation_max_null_percentage(**kwargs) -> ExpectationMaxNullPercentage: |
| 470 | +def create_expectation_max_null_percentage( |
| 471 | + column_name: str, max_percentage: float |
| 472 | +) -> ExpectationMaxNullPercentage: |
454 | 473 | """ |
455 | 474 | Create an ExpectationMaxNullPercentage instance. |
456 | 475 |
|
457 | | - Args: |
458 | | - column_name (str): Name of the column to check for null percentage. |
459 | | - max_percentage (float): Maximum percentage of null values allowed (0.0-100.0). |
460 | | -
|
461 | | - Returns: |
462 | | - ExpectationMaxNullPercentage: A configured expectation instance. |
| 476 | + :param column_name: Name of the column to check for null percentage. |
| 477 | + :param max_percentage: Maximum percentage of null values allowed (0.0-100.0). |
| 478 | + :return: A configured expectation instance. |
463 | 479 | """ |
464 | 480 | return ExpectationMaxNullPercentage( |
465 | | - column_name=kwargs["column_name"], |
466 | | - max_percentage=kwargs["max_percentage"], |
| 481 | + column_name=column_name, |
| 482 | + max_percentage=max_percentage, |
467 | 483 | ) |
468 | 484 |
|
469 | 485 |
|
470 | | -@register_expectation("ExpectationMaxNullCount") |
| 486 | +@register_expectation( |
| 487 | + "ExpectationMaxNullCount", |
| 488 | + pydoc="Check if the count of null/NaN values in a specific column is below a threshold", |
| 489 | + category=ExpectationCategory.COLUMN_AGGREGATION_EXPECTATIONS, |
| 490 | + subcategory=ExpectationSubcategory.ANY_VALUE, |
| 491 | + params_doc={ |
| 492 | + "column_name": "The name of the column to check for null count", |
| 493 | + "max_count": "The maximum allowed count of null/NaN values", |
| 494 | + }, |
| 495 | +) |
471 | 496 | @requires_params( |
472 | 497 | "column_name", |
473 | 498 | "max_count", |
474 | 499 | types={"column_name": str, "max_count": int}, |
475 | 500 | ) |
476 | | -def create_expectation_max_null_count(**kwargs) -> ExpectationMaxNullCount: |
| 501 | +def create_expectation_max_null_count(column_name: str, max_count: int) -> ExpectationMaxNullCount: |
477 | 502 | """ |
478 | 503 | Create an ExpectationMaxNullCount instance. |
479 | 504 |
|
480 | | - Args: |
481 | | - column_name (str): Name of the column to check for null count. |
482 | | - max_count (int): Maximum number of null values allowed. |
483 | | -
|
484 | | - Returns: |
485 | | - ExpectationMaxNullCount: A configured expectation instance. |
| 505 | + :param column_name: Name of the column to check for null count. |
| 506 | + :param max_count: Maximum number of null values allowed. |
| 507 | + :return: A configured expectation instance. |
486 | 508 | """ |
487 | 509 | return ExpectationMaxNullCount( |
488 | | - column_name=kwargs["column_name"], |
489 | | - max_count=kwargs["max_count"], |
| 510 | + column_name=column_name, |
| 511 | + max_count=max_count, |
490 | 512 | ) |
0 commit comments