Skip to content

Commit 07a15c2

Browse files
committed
Add validator for access process
1 parent c823217 commit 07a15c2

File tree

3 files changed

+83
-3
lines changed

3 files changed

+83
-3
lines changed

data/src/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
TreeCanopyValidator,
4949
VacantValidator,
5050
)
51+
from new_etl.validation.access_process import AccessProcessValidator
5152
from new_etl.validation.city_owned_properties import CityOwnedPropertiesValidator
5253
from new_etl.validation.council_dists import CouncilDistrictsValidator
5354
from new_etl.validation.nbhoods import NeighborhoodsValidator
@@ -86,6 +87,7 @@
8687
"phs_properties": PHSPropertiesValidator(),
8788
"ppr_properties": PPRPropertiesValidator(),
8889
"tree_canopy": TreeCanopyValidator(),
90+
"access_process": AccessProcessValidator(),
8991
# Add other service validators as they are created
9092
}
9193

data/src/new_etl/validation/__init__.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from .base import BaseValidator
1+
from .access_process import AccessProcessValidator
2+
from .base import ServiceValidator
23
from .city_owned_properties import CityOwnedPropertiesValidator
34
from .community_gardens import CommunityGardensValidator
45
from .council_dists import CouncilDistrictsValidator
@@ -10,10 +11,11 @@
1011
from .ppr_properties import PPRPropertiesValidator
1112
from .rco_geoms import RCOGeomsValidator
1213
from .tree_canopy import TreeCanopyValidator
13-
from .vacant import VacantValidator
14+
from .vacant_properties import VacantValidator
1415

1516
__all__ = [
16-
"BaseValidator",
17+
"AccessProcessValidator",
18+
"ServiceValidator",
1719
"CityOwnedPropertiesValidator",
1820
"CommunityGardensValidator",
1921
"CouncilDistrictsValidator",
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
from typing import List, Tuple
2+
3+
import geopandas as gpd
4+
5+
from .base import ServiceValidator
6+
7+
8+
class AccessProcessValidator(ServiceValidator):
9+
"""Validator for access process service."""
10+
11+
def validate(self, data: gpd.GeoDataFrame) -> Tuple[bool, List[str]]:
12+
"""
13+
Validate access process data.
14+
15+
Critical checks:
16+
- Required fields present (opa_id, access_process)
17+
- No duplicate opa_ids
18+
- Valid geometries
19+
- Valid access process values
20+
21+
Returns:
22+
Tuple of (is_valid, list of error messages)
23+
"""
24+
errors = []
25+
26+
# Check required columns
27+
errors.extend(self.check_required_columns(data, ["opa_id", "access_process"]))
28+
29+
# Check for duplicate opa_ids
30+
errors.extend(self.check_duplicates(data, "opa_id"))
31+
32+
# Check data types
33+
if "opa_id" in data.columns and not data["opa_id"].dtype == "object":
34+
errors.append("opa_id must be string type")
35+
if (
36+
"access_process" in data.columns
37+
and not data["access_process"].dtype == "object"
38+
):
39+
errors.append("access_process must be string type")
40+
41+
# Check null values in critical fields
42+
errors.extend(
43+
self.check_null_percentage(data, "opa_id", threshold=0.0)
44+
) # No nulls allowed
45+
errors.extend(
46+
self.check_null_percentage(data, "access_process", threshold=0.0)
47+
) # No nulls allowed
48+
49+
# Check geometry validity
50+
if not data.geometry.is_valid.all():
51+
errors.append("Found invalid geometries")
52+
53+
total_count = len(data)
54+
55+
# Check for valid access process values
56+
valid_processes = {
57+
"Go through Land Bank",
58+
"Do Nothing",
59+
"Private Land Use Agreement",
60+
"Buy Property",
61+
}
62+
invalid_processes = set(data["access_process"].unique()) - valid_processes
63+
if invalid_processes:
64+
errors.append(
65+
f"Found invalid access processes: {', '.join(invalid_processes)}"
66+
)
67+
68+
# Log statistics about access processes
69+
print("\nAccess Process Statistics:")
70+
print(f"- Total properties: {total_count}")
71+
72+
for process in sorted(valid_processes):
73+
count = len(data[data["access_process"] == process])
74+
print(f"- {process}: {count} ({count / total_count:.1%})")
75+
76+
return len(errors) == 0, errors

0 commit comments

Comments
 (0)