|
40 | 40 | vacant_properties, |
41 | 41 | ) |
42 | 42 | from new_etl.database import to_postgis_with_schema |
43 | | - |
| 43 | +from new_etl.validation import ( |
| 44 | + CommunityGardensValidator, |
| 45 | + KDEValidator, |
| 46 | + LIViolationsValidator, |
| 47 | + OwnerTypeValidator, |
| 48 | + TreeCanopyValidator, |
| 49 | + VacantValidator, |
| 50 | +) |
| 51 | +from new_etl.validation.access_process import AccessProcessValidator |
| 52 | +from new_etl.validation.city_owned_properties import CityOwnedPropertiesValidator |
| 53 | +from new_etl.validation.council_dists import CouncilDistrictsValidator |
| 54 | +from new_etl.validation.nbhoods import NeighborhoodsValidator |
| 55 | +from new_etl.validation.phs_properties import PHSPropertiesValidator |
| 56 | +from new_etl.validation.ppr_properties import PPRPropertiesValidator |
| 57 | +from new_etl.validation.rco_geoms import RCOGeomsValidator |
| 58 | + |
| 59 | +# Map services to their validators |
| 60 | +SERVICE_VALIDATORS = { |
| 61 | + "community_gardens": CommunityGardensValidator(), |
| 62 | + "drug_crime": KDEValidator().configure( |
| 63 | + density_column="drug_crimes_density", |
| 64 | + zscore_column="drug_crimes_density_zscore", |
| 65 | + label_column="drug_crimes_density_label", |
| 66 | + percentile_column="drug_crimes_density_percentile", |
| 67 | + ), |
| 68 | + "gun_crime": KDEValidator().configure( |
| 69 | + density_column="gun_crimes_density", |
| 70 | + zscore_column="gun_crimes_density_zscore", |
| 71 | + label_column="gun_crimes_density_label", |
| 72 | + percentile_column="gun_crimes_density_percentile", |
| 73 | + ), |
| 74 | + "li_complaints": KDEValidator().configure( |
| 75 | + density_column="l_and_i_complaints_density", |
| 76 | + zscore_column="l_and_i_complaints_density_zscore", |
| 77 | + label_column="l_and_i_complaints_density_label", |
| 78 | + percentile_column="l_and_i_complaints_density_percentile", |
| 79 | + ), |
| 80 | + "li_violations": LIViolationsValidator(), |
| 81 | + "owner_type": OwnerTypeValidator(), |
| 82 | + "vacant": VacantValidator(), |
| 83 | + "council_dists": CouncilDistrictsValidator(), |
| 84 | + "nbhoods": NeighborhoodsValidator(), |
| 85 | + "rco_geoms": RCOGeomsValidator(), |
| 86 | + "city_owned_properties": CityOwnedPropertiesValidator(), |
| 87 | + "phs_properties": PHSPropertiesValidator(), |
| 88 | + "ppr_properties": PPRPropertiesValidator(), |
| 89 | + "tree_canopy": TreeCanopyValidator(), |
| 90 | + "access_process": AccessProcessValidator(), |
| 91 | + # Add other service validators as they are created |
| 92 | +} |
44 | 93 |
|
45 | 94 | try: |
46 | 95 | print("Starting ETL process.") |
|
79 | 128 | print(f"Running service: {service.__name__}") |
80 | 129 | dataset = service(dataset) |
81 | 130 |
|
| 131 | + # Run validation if a validator exists for this service |
| 132 | + if service.__name__ in SERVICE_VALIDATORS: |
| 133 | + validator = SERVICE_VALIDATORS[service.__name__] |
| 134 | + is_valid, errors = validator.validate(dataset.gdf) |
| 135 | + |
| 136 | + if not is_valid: |
| 137 | + error_message = ( |
| 138 | + f"Data validation failed for {service.__name__}:\n" |
| 139 | + + "\n".join(errors) |
| 140 | + ) |
| 141 | + send_error_to_slack(error_message) |
| 142 | + raise ValueError(error_message) |
| 143 | + |
| 144 | + print(f"Validation passed for {service.__name__}") |
| 145 | + |
82 | 146 | print("Applying final dataset transformations.") |
83 | 147 | dataset = priority_level(dataset) |
84 | 148 | dataset = access_process(dataset) |
|
0 commit comments