Skip to content

Commit af94616

Browse files
authored
Merge pull request #1271 from gabecano4308/input-validation
Adding rest of external input validators
2 parents 7ac25ed + 0e0f424 commit af94616

27 files changed

+215
-54
lines changed

data/src/data_utils/community_gardens.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def community_gardens(
4949
name="Community Gardens",
5050
esri_urls=COMMUNITY_GARDENS_TO_LOAD,
5151
cols=["site_name"],
52+
validator=CommunityGardensInputValidator(),
5253
)
5354

5455
community_gardens, input_validation = loader.load_or_fetch()

data/src/data_utils/imm_dang_buildings.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
from src.metadata.metadata_utils import current_metadata, provide_metadata
77
from src.validation.base import ValidationResult, validate_output
8-
from src.validation.imm_dang_buildings import ImmDangerOutputValidator
8+
from src.validation.imm_dang_buildings import (
9+
ImmDangerInputValidator,
10+
ImmDangerOutputValidator,
11+
)
912

1013
from ..classes.loaders import CartoLoader
1114
from ..constants.services import IMMINENT_DANGER_BUILDINGS_QUERY
@@ -47,6 +50,7 @@ def imm_dang_buildings(
4750
name="Imminently Dangerous Buildings",
4851
carto_queries=IMMINENT_DANGER_BUILDINGS_QUERY,
4952
opa_col="opa_account_num",
53+
validator=ImmDangerInputValidator(),
5054
)
5155

5256
imm_dang_buildings, input_validation = loader.load_or_fetch()

data/src/data_utils/li_violations.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
from src.metadata.metadata_utils import current_metadata, provide_metadata
77
from src.validation.base import ValidationResult, validate_output
8-
from src.validation.li_violations import LIViolationsOutputValidator
8+
from src.validation.li_violations import (
9+
LIViolationsOutputValidator,
10+
LIViolationsInputValidator,
11+
)
912

1013
from ..classes.loaders import CartoLoader
1114
from ..constants.services import VIOLATIONS_SQL_QUERY
@@ -61,6 +64,7 @@ def li_violations(
6164
name="LI Violations",
6265
carto_queries=VIOLATIONS_SQL_QUERY,
6366
opa_col="opa_account_num",
67+
validator=LIViolationsInputValidator(),
6468
)
6569

6670
l_and_i_violations, input_validation = loader.load_or_fetch()

data/src/data_utils/phs_properties.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
from src.metadata.metadata_utils import current_metadata, provide_metadata
66
from src.validation.base import ValidationResult, validate_output
7-
from src.validation.phs_properties import PHSPropertiesOutputValidator
7+
from src.validation.phs_properties import (
8+
PHSPropertiesInputValidator,
9+
PHSPropertiesOutputValidator,
10+
)
811

912
from ..classes.loaders import EsriLoader
1013
from ..constants.services import PHS_LAYERS_TO_LOAD
@@ -44,7 +47,10 @@ def phs_properties(
4447
print(input_gdf.head())
4548

4649
loader = EsriLoader(
47-
name="PHS Properties", esri_urls=PHS_LAYERS_TO_LOAD, cols=["program"]
50+
name="PHS Properties",
51+
esri_urls=PHS_LAYERS_TO_LOAD,
52+
cols=["program"],
53+
validator=PHSPropertiesInputValidator(),
4854
)
4955

5056
phs_properties, input_validation = loader.load_or_fetch()

data/src/data_utils/ppr_properties.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
from src.metadata.metadata_utils import current_metadata, provide_metadata
88
from src.validation.base import ValidationResult, validate_output
9-
from src.validation.ppr_properties import PPRPropertiesOutputValidator
9+
from src.validation.ppr_properties import (
10+
PPRPropertiesInputValidator,
11+
PPRPropertiesOutputValidator,
12+
)
1013

1114
from ..classes.loaders import EsriLoader, GdfLoader
1215
from ..constants.services import PPR_PROPERTIES_TO_LOAD
@@ -75,6 +78,7 @@ def ppr_properties(
7578
input=io.BytesIO(response.content),
7679
name="PPR Properties",
7780
cols=["public_name"],
81+
validator=PPRPropertiesInputValidator(),
7882
)
7983
ppr_properties, input_validation = loader.load_or_fetch()
8084

data/src/data_utils/pwd_parcels.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
from src.metadata.metadata_utils import current_metadata, provide_metadata
66
from src.validation.base import ValidationResult, validate_output
7-
from src.validation.pwd_parcels import PWDParcelsOutputValidator
7+
from src.validation.pwd_parcels import (
8+
PWDParcelsInputValidator,
9+
PWDParcelsOutputValidator,
10+
)
811

912
from ..classes.loaders import CartoLoader
1013
from ..constants.services import PWD_PARCELS_QUERY
@@ -124,6 +127,7 @@ def pwd_parcels(
124127
name="PWD Parcels",
125128
carto_queries=PWD_PARCELS_QUERY,
126129
opa_col="brt_id",
130+
validator=PWDParcelsInputValidator(),
127131
)
128132

129133
pwd_parcels, input_validation = loader.load_or_fetch()

data/src/data_utils/rco_geoms.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from src.metadata.metadata_utils import current_metadata, provide_metadata
88
from src.validation.base import ValidationResult, validate_output
9-
from src.validation.rco_geoms import RCOGeomsOutputValidator
9+
from src.validation.rco_geoms import RCOGeomsOutputValidator, RCOGeomsInputValidator
1010

1111
from ..classes.loaders import EsriLoader
1212
from ..constants.services import RCOS_LAYERS_TO_LOAD
@@ -47,7 +47,9 @@ def rco_geoms(input_gdf: gpd.GeoDataFrame) -> Tuple[gpd.GeoDataFrame, Validation
4747
Columns referenced:
4848
opa_id, geometry
4949
"""
50-
loader = EsriLoader(name="RCOs", esri_urls=RCOS_LAYERS_TO_LOAD)
50+
loader = EsriLoader(
51+
name="RCOs", esri_urls=RCOS_LAYERS_TO_LOAD, validator=RCOGeomsInputValidator()
52+
)
5153
rco_geoms, input_validation = loader.load_or_fetch()
5254

5355
logger.debug(f"RCO data loaded: {len(rco_geoms)} RCO records")

data/src/data_utils/unsafe_buildings.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55

66
from src.metadata.metadata_utils import current_metadata, provide_metadata
77
from src.validation.base import ValidationResult, validate_output
8-
from src.validation.unsafe_buildings import UnsafeBuildingsOutputValidator
8+
from src.validation.unsafe_buildings import (
9+
UnsafeBuildingsOutputValidator,
10+
UnsafeBuildingsInputValidator,
11+
)
912

1013
from ..classes.loaders import CartoLoader
1114
from ..constants.services import UNSAFE_BUILDINGS_QUERY
@@ -46,6 +49,7 @@ def unsafe_buildings(
4649
name="Unsafe Buildings",
4750
carto_queries=UNSAFE_BUILDINGS_QUERY,
4851
opa_col="opa_account_num",
52+
validator=UnsafeBuildingsInputValidator(),
4953
)
5054

5155
unsafe_buildings, input_validation = loader.load_or_fetch()

data/src/data_utils/vacant_properties.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
from src.config.config import ROOT_DIRECTORY
99
from src.metadata.metadata_utils import current_metadata, provide_metadata
1010
from src.validation.base import ValidationResult, validate_output
11-
from src.validation.vacant_properties import VacantPropertiesOutputValidator
11+
from src.validation.vacant_properties import (
12+
VacantPropertiesInputValidator,
13+
VacantPropertiesOutputValidator,
14+
)
1215

1316
from ..classes.loaders import EsriLoader, google_cloud_bucket
1417
from ..constants.services import VACANT_PROPS_LAYERS_TO_LOAD
@@ -128,6 +131,7 @@ def vacant_properties(
128131
name="Vacant Properties",
129132
esri_urls=VACANT_PROPS_LAYERS_TO_LOAD,
130133
cols=["opa_id", "parcel_type"],
134+
validator=VacantPropertiesInputValidator(),
131135
)
132136

133137
vacant_properties, input_validation = loader.load_or_fetch()

data/src/validation/base.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,26 @@ def null_percentage_check(null_percent: float) -> Check:
754754
)
755755

756756

757+
def row_count_check(reference_count: int, tolerance: float = 0.1) -> Check:
758+
"""
759+
Create a check that validates if the DataFrame's row count is within a specified tolerance range.
760+
761+
Args:
762+
reference_count: The expected number of rows
763+
tolerance: The allowed deviation as a percentage (default 10%)
764+
765+
Returns:
766+
Check: A pandera Check object that validates row count
767+
"""
768+
lower_bound = reference_count * (1 - tolerance)
769+
upper_bound = reference_count * (1 + tolerance)
770+
771+
return Check(
772+
lambda df: df.shape[0] >= lower_bound and df.shape[0] <= upper_bound,
773+
error=f"DataFrame size must be between {int(lower_bound)} and {int(upper_bound)} rows (±{tolerance * 100}% from {reference_count}).",
774+
)
775+
776+
757777
@dataclass
758778
class DistributionParams:
759779
min_value: Optional[int | float] = None

0 commit comments

Comments
 (0)