Skip to content

Commit abfb447

Browse files
committed
Refactor
1 parent 627c3be commit abfb447

File tree

8 files changed

+148
-148
lines changed

8 files changed

+148
-148
lines changed

docs/dqx/docs/reference/benchmarks.mdx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ sidebar_position: 13
5353
| test_benchmark_foreach_sql_expression[n_rows_100000000_n_columns_5] | 0.895489 | 0.888982 | 0.853895 | 0.950998 | 0.041479 | 0.071722 | 0.858589 | 0.930311 | 5 | 0 | 2 | 1.12 |
5454
| test_benchmark_foreach_sql_query[n_rows_100000000_n_columns_5] | 4.578799 | 4.602143 | 4.442396 | 4.644892 | 0.083901 | 0.113694 | 4.530776 | 4.644470 | 5 | 0 | 1 | 0.22 |
5555
| test_benchmark_foreign_key | 31.784272 | 31.787610 | 31.414708 | 32.123221 | 0.269713 | 0.386951 | 31.597198 | 31.984149 | 5 | 0 | 2 | 0.03 |
56-
| test_benchmark_has_area_equal_to | 0.209381 | 0.207647 | 0.205255 | 0.216179 | 0.004471 | 0.006593 | 0.206066 | 0.212659 | 5 | 0 | 1 | 4.78 |
57-
| test_benchmark_has_area_greater_than | 0.171868 | 0.166867 | 0.161877 | 0.204155 | 0.015957 | 0.003194 | 0.164123 | 0.167316 | 6 | 1 | 1 | 5.82 |
58-
| test_benchmark_has_area_less_than | 0.177230 | 0.179352 | 0.161536 | 0.190875 | 0.010356 | 0.013261 | 0.169503 | 0.182763 | 6 | 0 | 2 | 5.64 |
59-
| test_benchmark_has_area_not_equal_to | 0.208875 | 0.207436 | 0.203626 | 0.217694 | 0.005257 | 0.004513 | 0.206265 | 0.210778 | 5 | 1 | 1 | 4.79 |
56+
| test_benchmark_is_area_equal_to | 0.209381 | 0.207647 | 0.205255 | 0.216179 | 0.004471 | 0.006593 | 0.206066 | 0.212659 | 5 | 0 | 1 | 4.78 |
57+
| test_benchmark_is_area_not_less_than | 0.171868 | 0.166867 | 0.161877 | 0.204155 | 0.015957 | 0.003194 | 0.164123 | 0.167316 | 6 | 1 | 1 | 5.82 |
58+
| test_benchmark_is_area_not_greater_than | 0.177230 | 0.179352 | 0.161536 | 0.190875 | 0.010356 | 0.013261 | 0.169503 | 0.182763 | 6 | 0 | 2 | 5.64 |
59+
| test_benchmark_is_area_not_equal_to | 0.208875 | 0.207436 | 0.203626 | 0.217694 | 0.005257 | 0.004513 | 0.206265 | 0.210778 | 5 | 1 | 1 | 4.79 |
6060
| test_benchmark_has_dimension | 0.215338 | 0.213285 | 0.210530 | 0.223131 | 0.005056 | 0.007086 | 0.211819 | 0.218905 | 5 | 0 | 1 | 4.64 |
61-
| test_benchmark_has_num_points_equal_to | 0.213472 | 0.208326 | 0.200840 | 0.228556 | 0.011595 | 0.018574 | 0.205502 | 0.224076 | 5 | 0 | 2 | 4.68 |
62-
| test_benchmark_has_num_points_greater_than | 0.159204 | 0.157405 | 0.151457 | 0.175503 | 0.008775 | 0.008935 | 0.152260 | 0.161195 | 6 | 1 | 1 | 6.28 |
63-
| test_benchmark_has_num_points_less_than | 0.162069 | 0.161908 | 0.149400 | 0.178192 | 0.010833 | 0.014197 | 0.154168 | 0.168365 | 5 | 0 | 2 | 6.17 |
64-
| test_benchmark_has_num_points_not_equal_to | 0.211439 | 0.212084 | 0.200625 | 0.223375 | 0.008900 | 0.013585 | 0.204124 | 0.217709 | 5 | 0 | 2 | 4.73 |
61+
| test_benchmark_is_num_points_equal_to | 0.213472 | 0.208326 | 0.200840 | 0.228556 | 0.011595 | 0.018574 | 0.205502 | 0.224076 | 5 | 0 | 2 | 4.68 |
62+
| test_benchmark_is_num_points_not_less_than | 0.159204 | 0.157405 | 0.151457 | 0.175503 | 0.008775 | 0.008935 | 0.152260 | 0.161195 | 6 | 1 | 1 | 6.28 |
63+
| test_benchmark_is_num_points_not_greater_than | 0.162069 | 0.161908 | 0.149400 | 0.178192 | 0.010833 | 0.014197 | 0.154168 | 0.168365 | 5 | 0 | 2 | 6.17 |
64+
| test_benchmark_is_num_points_not_equal_to | 0.211439 | 0.212084 | 0.200625 | 0.223375 | 0.008900 | 0.013585 | 0.204124 | 0.217709 | 5 | 0 | 2 | 4.73 |
6565
| test_benchmark_has_no_outliers | 0.234952 | 0.228169 | 0.224165 | 0.257274 | 0.013649 | 0.017354 | 0.225936 | 0.243290 | 5 | 0 | 1 | 4.26 |
6666
| test_benchmark_has_valid_schema | 0.172078 | 0.172141 | 0.163793 | 0.181081 | 0.006715 | 0.009295 | 0.167010 | 0.176305 | 6 | 0 | 2 | 5.81 |
6767
| test_benchmark_has_x_coordinate_between | 0.217192 | 0.213656 | 0.209310 | 0.236233 | 0.011150 | 0.012638 | 0.209410 | 0.222048 | 5 | 0 | 1 | 4.60 |

docs/dqx/docs/reference/quality_checks.mdx

Lines changed: 44 additions & 44 deletions
Large diffs are not rendered by default.

src/databricks/labs/dqx/geo/check_funcs.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def has_y_coordinate_between(column: str | Column, min_value: float, max_value:
456456

457457

458458
@register_rule("row")
459-
def has_area_equal_to(
459+
def is_area_equal_to(
460460
column: str | Column, value: int | float | str | Column, srid: int | None = 3857, geodesic: bool = False
461461
) -> Column:
462462
"""
@@ -493,7 +493,7 @@ def has_area_equal_to(
493493

494494

495495
@register_rule("row")
496-
def has_area_not_equal_to(
496+
def is_area_not_equal_to(
497497
column: str | Column, value: int | float | str | Column, srid: int | None = 3857, geodesic: bool = False
498498
) -> Column:
499499
"""
@@ -530,7 +530,7 @@ def has_area_not_equal_to(
530530

531531

532532
@register_rule("row")
533-
def has_area_less_than(
533+
def is_area_not_greater_than(
534534
column: str | Column, value: int | float | str | Column, srid: int | None = 3857, geodesic: bool = False
535535
) -> Column:
536536
"""
@@ -567,7 +567,7 @@ def has_area_less_than(
567567

568568

569569
@register_rule("row")
570-
def has_area_greater_than(
570+
def is_area_not_less_than(
571571
column: str | Column, value: int | float | str | Column, srid: int | None = 3857, geodesic: bool = False
572572
) -> Column:
573573
"""
@@ -604,7 +604,7 @@ def has_area_greater_than(
604604

605605

606606
@register_rule("row")
607-
def has_num_points_equal_to(column: str | Column, value: int | float | str | Column) -> Column:
607+
def is_num_points_equal_to(column: str | Column, value: int | float | str | Column) -> Column:
608608
"""
609609
Checks if the number of coordinate pairs in values of a geometry column is equal to a specified value.
610610
@@ -632,7 +632,7 @@ def has_num_points_equal_to(column: str | Column, value: int | float | str | Col
632632

633633

634634
@register_rule("row")
635-
def has_num_points_not_equal_to(column: str | Column, value: int | float | str | Column) -> Column:
635+
def is_num_points_not_equal_to(column: str | Column, value: int | float | str | Column) -> Column:
636636
"""
637637
Checks if the number of coordinate pairs in values of a geometry column is not equal to a specified value.
638638
@@ -660,7 +660,7 @@ def has_num_points_not_equal_to(column: str | Column, value: int | float | str |
660660

661661

662662
@register_rule("row")
663-
def has_num_points_less_than(column: str | Column, value: int | float | str | Column) -> Column:
663+
def is_num_points_not_greater_than(column: str | Column, value: int | float | str | Column) -> Column:
664664
"""
665665
Checks if the number of coordinate pairs in the values of a geometry column is not greater than a specified limit.
666666
@@ -688,7 +688,7 @@ def has_num_points_less_than(column: str | Column, value: int | float | str | Co
688688

689689

690690
@register_rule("row")
691-
def has_num_points_greater_than(column: str | Column, value: int | float | str | Column) -> Column:
691+
def is_num_points_not_less_than(column: str | Column, value: int | float | str | Column) -> Column:
692692
"""
693693
Checks if the number of coordinate pairs in values of a geometry column is not less than a specified limit.
694694

tests/integration/test_apply_checks.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6625,49 +6625,49 @@ def test_apply_checks_all_geo_checks_using_classes(skip_if_runtime_not_geo_compa
66256625
),
66266626
DQRowRule(
66276627
criticality="error",
6628-
check_func=geo_check_funcs.has_area_greater_than,
6628+
check_func=geo_check_funcs.is_area_not_less_than,
66296629
column=F.col("polygon_geom"),
66306630
check_func_kwargs={"value": 0.0},
66316631
),
66326632
DQRowRule(
66336633
criticality="error",
6634-
check_func=geo_check_funcs.has_area_less_than,
6634+
check_func=geo_check_funcs.is_area_not_greater_than,
66356635
column=F.col("point_geom"),
66366636
check_func_kwargs={"value": 1.0},
66376637
),
66386638
DQRowRule(
66396639
criticality="error",
6640-
check_func=geo_check_funcs.has_area_equal_to,
6640+
check_func=geo_check_funcs.is_area_equal_to,
66416641
column=F.col("point_geom"),
66426642
check_func_kwargs={"value": 0.0},
66436643
),
66446644
DQRowRule(
66456645
criticality="error",
6646-
check_func=geo_check_funcs.has_area_not_equal_to,
6646+
check_func=geo_check_funcs.is_area_not_equal_to,
66476647
column=F.col("polygon_geom"),
66486648
check_func_kwargs={"value": 0.0},
66496649
),
66506650
DQRowRule(
66516651
criticality="error",
6652-
check_func=geo_check_funcs.has_num_points_greater_than,
6652+
check_func=geo_check_funcs.is_num_points_not_less_than,
66536653
column=F.col("polygon_geom"),
66546654
check_func_kwargs={"value": 2},
66556655
),
66566656
DQRowRule(
66576657
criticality="error",
6658-
check_func=geo_check_funcs.has_num_points_less_than,
6658+
check_func=geo_check_funcs.is_num_points_not_greater_than,
66596659
column=F.col("point_geom"),
66606660
check_func_kwargs={"value": 2},
66616661
),
66626662
DQRowRule(
66636663
criticality="error",
6664-
check_func=geo_check_funcs.has_num_points_equal_to,
6664+
check_func=geo_check_funcs.is_num_points_equal_to,
66656665
column=F.col("point_geom"),
66666666
check_func_kwargs={"value": 1},
66676667
),
66686668
DQRowRule(
66696669
criticality="error",
6670-
check_func=geo_check_funcs.has_num_points_not_equal_to,
6670+
check_func=geo_check_funcs.is_num_points_not_equal_to,
66716671
column=F.col("polygon_geom"),
66726672
check_func_kwargs={"value": 2},
66736673
),

tests/integration/test_row_checks_geo.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from chispa.dataframe_comparer import assert_df_equality # type: ignore
22
from databricks.labs.dqx.geo.check_funcs import (
3-
has_area_equal_to,
4-
has_area_not_equal_to,
5-
has_area_greater_than,
6-
has_area_less_than,
7-
has_num_points_equal_to,
8-
has_num_points_not_equal_to,
9-
has_num_points_greater_than,
10-
has_num_points_less_than,
3+
is_area_equal_to,
4+
is_area_not_equal_to,
5+
is_area_not_less_than,
6+
is_area_not_greater_than,
7+
is_num_points_equal_to,
8+
is_num_points_not_equal_to,
9+
is_num_points_not_less_than,
10+
is_num_points_not_greater_than,
1111
has_dimension,
1212
has_x_coordinate_between,
1313
has_y_coordinate_between,
@@ -408,7 +408,7 @@ def test_has_y_coordinate_between(skip_if_runtime_not_geo_compatible, spark):
408408
assert_df_equality(actual, expected, ignore_nullable=True)
409409

410410

411-
def test_has_area_equal_to(skip_if_runtime_not_geo_compatible, spark):
411+
def test_is_area_equal_to(skip_if_runtime_not_geo_compatible, spark):
412412
test_df = spark.sql(
413413
"""
414414
SELECT geom, geog FROM VALUES
@@ -422,9 +422,9 @@ def test_has_area_equal_to(skip_if_runtime_not_geo_compatible, spark):
422422
)
423423

424424
actual = test_df.select(
425-
has_area_equal_to("geom", 0.0).alias("basic_geometry"),
426-
has_area_equal_to("geom", 1.0, srid=4326).alias("geometry_srid"),
427-
has_area_equal_to("geog", 0.0, geodesic=True).alias("geography_geodesic"),
425+
is_area_equal_to("geom", 0.0).alias("basic_geometry"),
426+
is_area_equal_to("geom", 1.0, srid=4326).alias("geometry_srid"),
427+
is_area_equal_to("geog", 0.0, geodesic=True).alias("geography_geodesic"),
428428
)
429429

430430
checked_schema = "basic_geometry: string, geometry_srid: string, geography_geodesic: string"
@@ -454,7 +454,7 @@ def test_has_area_equal_to(skip_if_runtime_not_geo_compatible, spark):
454454
assert_df_equality(actual, expected, ignore_nullable=True)
455455

456456

457-
def test_has_area_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
457+
def test_is_area_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
458458
test_df = spark.sql(
459459
"""
460460
SELECT geom, geog FROM VALUES
@@ -468,9 +468,9 @@ def test_has_area_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
468468
)
469469

470470
actual = test_df.select(
471-
has_area_not_equal_to("geom", 0.0).alias("basic_geometry"),
472-
has_area_not_equal_to("geom", 1.0, srid=4326).alias("geometry_srid"),
473-
has_area_not_equal_to("geog", 0.0, geodesic=True).alias("geography_geodesic"),
471+
is_area_not_equal_to("geom", 0.0).alias("basic_geometry"),
472+
is_area_not_equal_to("geom", 1.0, srid=4326).alias("geometry_srid"),
473+
is_area_not_equal_to("geog", 0.0, geodesic=True).alias("geography_geodesic"),
474474
)
475475

476476
checked_schema = "basic_geometry: string, geometry_srid: string, geography_geodesic: string"
@@ -504,7 +504,7 @@ def test_has_area_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
504504
assert_df_equality(actual, expected, ignore_nullable=True)
505505

506506

507-
def test_has_area_less_than(skip_if_runtime_not_geo_compatible, spark):
507+
def test_is_area_not_greater_than(skip_if_runtime_not_geo_compatible, spark):
508508
test_df = spark.sql(
509509
"""
510510
SELECT geom, geog FROM VALUES
@@ -518,9 +518,9 @@ def test_has_area_less_than(skip_if_runtime_not_geo_compatible, spark):
518518
)
519519

520520
actual = test_df.select(
521-
has_area_less_than("geom", 20000.0).alias("basic_geometry"),
522-
has_area_less_than("geom", 1.0, srid=4326).alias("geometry_srid"),
523-
has_area_less_than("geog", 1000.0, geodesic=True).alias("geography_geodesic"),
521+
is_area_not_greater_than("geom", 20000.0).alias("basic_geometry"),
522+
is_area_not_greater_than("geom", 1.0, srid=4326).alias("geometry_srid"),
523+
is_area_not_greater_than("geog", 1000.0, geodesic=True).alias("geography_geodesic"),
524524
)
525525

526526
checked_schema = "basic_geometry: string, geometry_srid: string, geography_geodesic: string"
@@ -550,7 +550,7 @@ def test_has_area_less_than(skip_if_runtime_not_geo_compatible, spark):
550550
assert_df_equality(actual, expected, ignore_nullable=True)
551551

552552

553-
def test_has_area_greater_than(skip_if_runtime_not_geo_compatible, spark):
553+
def test_is_area_not_less_than(skip_if_runtime_not_geo_compatible, spark):
554554
test_df = spark.sql(
555555
"""
556556
SELECT geom, geog FROM VALUES
@@ -564,9 +564,9 @@ def test_has_area_greater_than(skip_if_runtime_not_geo_compatible, spark):
564564
)
565565

566566
actual = test_df.select(
567-
has_area_greater_than("geom", 20000.0).alias("basic_geometry"),
568-
has_area_greater_than("geom", 1.0, srid=4326).alias("geometry_srid"),
569-
has_area_greater_than("geog", 20000.0, geodesic=True).alias("geography_geodesic"),
567+
is_area_not_less_than("geom", 20000.0).alias("basic_geometry"),
568+
is_area_not_less_than("geom", 1.0, srid=4326).alias("geometry_srid"),
569+
is_area_not_less_than("geog", 20000.0, geodesic=True).alias("geography_geodesic"),
570570
)
571571

572572
checked_schema = "basic_geometry: string, geometry_srid: string, geography_geodesic: string"
@@ -600,7 +600,7 @@ def test_has_area_greater_than(skip_if_runtime_not_geo_compatible, spark):
600600
assert_df_equality(actual, expected, ignore_nullable=True)
601601

602602

603-
def test_has_num_points_equal_to(skip_if_runtime_not_geo_compatible, spark):
603+
def test_is_num_points_equal_to(skip_if_runtime_not_geo_compatible, spark):
604604
test_df = spark.sql(
605605
"""
606606
SELECT geom FROM VALUES
@@ -614,7 +614,7 @@ def test_has_num_points_equal_to(skip_if_runtime_not_geo_compatible, spark):
614614
"""
615615
)
616616

617-
actual = test_df.select(has_num_points_equal_to("geom", 5))
617+
actual = test_df.select(is_num_points_equal_to("geom", 5))
618618

619619
checked_schema = "geom_num_points_not_equal_to_limit: string"
620620
expected = spark.createDataFrame(
@@ -632,7 +632,7 @@ def test_has_num_points_equal_to(skip_if_runtime_not_geo_compatible, spark):
632632
assert_df_equality(actual, expected, ignore_nullable=True)
633633

634634

635-
def test_has_num_points_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
635+
def test_is_num_points_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
636636
test_df = spark.sql(
637637
"""
638638
SELECT geom FROM VALUES
@@ -646,7 +646,7 @@ def test_has_num_points_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
646646
"""
647647
)
648648

649-
actual = test_df.select(has_num_points_not_equal_to("geom", 1))
649+
actual = test_df.select(is_num_points_not_equal_to("geom", 1))
650650

651651
checked_schema = "geom_num_points_equal_to_limit: string"
652652
expected = spark.createDataFrame(
@@ -664,7 +664,7 @@ def test_has_num_points_not_equal_to(skip_if_runtime_not_geo_compatible, spark):
664664
assert_df_equality(actual, expected, ignore_nullable=True)
665665

666666

667-
def test_has_num_points_less_than(skip_if_runtime_not_geo_compatible, spark):
667+
def test_is_num_points_not_greater_than(skip_if_runtime_not_geo_compatible, spark):
668668
test_df = spark.sql(
669669
"""
670670
SELECT geom FROM VALUES
@@ -678,7 +678,7 @@ def test_has_num_points_less_than(skip_if_runtime_not_geo_compatible, spark):
678678
"""
679679
)
680680

681-
actual = test_df.select(has_num_points_less_than("geom", 3))
681+
actual = test_df.select(is_num_points_not_greater_than("geom", 3))
682682

683683
checked_schema = "geom_num_points_greater_than_limit: string"
684684
expected = spark.createDataFrame(
@@ -698,7 +698,7 @@ def test_has_num_points_less_than(skip_if_runtime_not_geo_compatible, spark):
698698
assert_df_equality(actual, expected, ignore_nullable=True)
699699

700700

701-
def test_has_num_points_greater_than(skip_if_runtime_not_geo_compatible, spark):
701+
def test_is_num_points_not_less_than(skip_if_runtime_not_geo_compatible, spark):
702702
test_df = spark.sql(
703703
"""
704704
SELECT geom FROM VALUES
@@ -712,7 +712,7 @@ def test_has_num_points_greater_than(skip_if_runtime_not_geo_compatible, spark):
712712
"""
713713
)
714714

715-
actual = test_df.select(has_num_points_greater_than("geom", 3))
715+
actual = test_df.select(is_num_points_not_less_than("geom", 3))
716716

717717
checked_schema = "geom_num_points_less_than_limit: string"
718718
expected = spark.createDataFrame(

0 commit comments

Comments
 (0)