|
| 1 | +from typing import List, Tuple |
| 2 | + |
| 3 | +import geopandas as gpd |
| 4 | + |
| 5 | +from .base import ServiceValidator |
| 6 | + |
| 7 | + |
| 8 | +class AccessProcessValidator(ServiceValidator): |
| 9 | + """Validator for access process service.""" |
| 10 | + |
| 11 | + def validate(self, data: gpd.GeoDataFrame) -> Tuple[bool, List[str]]: |
| 12 | + """ |
| 13 | + Validate access process data. |
| 14 | +
|
| 15 | + Critical checks: |
| 16 | + - Required fields present (opa_id, access_process) |
| 17 | + - No duplicate opa_ids |
| 18 | + - Valid geometries |
| 19 | + - Valid access process values |
| 20 | +
|
| 21 | + Returns: |
| 22 | + Tuple of (is_valid, list of error messages) |
| 23 | + """ |
| 24 | + errors = [] |
| 25 | + |
| 26 | + # Check required columns |
| 27 | + errors.extend(self.check_required_columns(data, ["opa_id", "access_process"])) |
| 28 | + |
| 29 | + # Check for duplicate opa_ids |
| 30 | + errors.extend(self.check_duplicates(data, "opa_id")) |
| 31 | + |
| 32 | + # Check data types |
| 33 | + if "opa_id" in data.columns and not data["opa_id"].dtype == "object": |
| 34 | + errors.append("opa_id must be string type") |
| 35 | + if ( |
| 36 | + "access_process" in data.columns |
| 37 | + and not data["access_process"].dtype == "object" |
| 38 | + ): |
| 39 | + errors.append("access_process must be string type") |
| 40 | + |
| 41 | + # Check null values in critical fields |
| 42 | + errors.extend( |
| 43 | + self.check_null_percentage(data, "opa_id", threshold=0.0) |
| 44 | + ) # No nulls allowed |
| 45 | + errors.extend( |
| 46 | + self.check_null_percentage(data, "access_process", threshold=0.0) |
| 47 | + ) # No nulls allowed |
| 48 | + |
| 49 | + # Check geometry validity |
| 50 | + if not data.geometry.is_valid.all(): |
| 51 | + errors.append("Found invalid geometries") |
| 52 | + |
| 53 | + total_count = len(data) |
| 54 | + |
| 55 | + # Check for valid access process values |
| 56 | + valid_processes = { |
| 57 | + "Go through Land Bank", |
| 58 | + "Do Nothing", |
| 59 | + "Private Land Use Agreement", |
| 60 | + "Buy Property", |
| 61 | + } |
| 62 | + invalid_processes = set(data["access_process"].unique()) - valid_processes |
| 63 | + if invalid_processes: |
| 64 | + errors.append( |
| 65 | + f"Found invalid access processes: {', '.join(invalid_processes)}" |
| 66 | + ) |
| 67 | + |
| 68 | + # Log statistics about access processes |
| 69 | + print("\nAccess Process Statistics:") |
| 70 | + print(f"- Total properties: {total_count}") |
| 71 | + |
| 72 | + for process in sorted(valid_processes): |
| 73 | + count = len(data[data["access_process"] == process]) |
| 74 | + print(f"- {process}: {count} ({count / total_count:.1%})") |
| 75 | + |
| 76 | + return len(errors) == 0, errors |
0 commit comments