Skip to content

Commit a366112

Browse files
committed
feat(FilterView): switched standardized_address for street_address. switched some data types in schema that were off.
1 parent 941a062 commit a366112

File tree

5 files changed

+24
-35
lines changed

5 files changed

+24
-35
lines changed

data/src/data_utils/city_owned_properties.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ def city_owned_properties(
5454
)
5555

5656
city_owned_properties, input_validation = loader.load_or_fetch()
57-
print("City-Owned Properties Input:")
58-
print(city_owned_properties.columns)
59-
print(city_owned_properties.head())
6057

6158
merged_gdf = opa_join(input_gdf, city_owned_properties)
6259

@@ -159,8 +156,4 @@ def city_owned_properties(
159156
"Land Bank (PHDC)"
160157
)
161158

162-
print("City-Owned Properties Output:")
163-
print(merged_gdf.columns)
164-
print(merged_gdf.head())
165-
166159
return merged_gdf, input_validation

data/src/data_utils/council_dists.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ def council_dists(
4949
)
5050

5151
council_dists, input_validation = loader.load_or_fetch()
52-
print("Council Districts Input:")
53-
print(council_dists.columns)
54-
print(council_dists.head())
5552

5653
# Check that the required columns exist in the DataFrame
5754
required_columns = ["district", "geometry"]
@@ -72,7 +69,4 @@ def council_dists(
7269
# Drop duplicates in the primary feature layer
7370
merged_gdf.drop_duplicates(inplace=True)
7471

75-
print("Council Districts Output:")
76-
print(merged_gdf.columns)
77-
print(merged_gdf.head())
7872
return merged_gdf, input_validation

data/src/validation/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def validate(self, gdf: gpd.GeoDataFrame) -> ValidationResult:
197197
try:
198198
self.schema.validate(gdf, lazy=True)
199199
except pa.errors.SchemaErrors as err:
200-
self.errors.append(err.failure_case)
200+
self.errors.append(err.failure_cases)
201201
schema_time = time.time() - schema_start
202202

203203
# Custom validation

data/src/validation/city_owned_properties.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import geopandas as gpd
22
import pandera.pandas as pa
3+
import pandas as pd
34
from .base import BaseValidator
45

56
# Expecting ~7,796 records returned (within ±20% tolerance).
@@ -10,8 +11,8 @@
1011

1112
CityOwnedPropertiesInputSchema = pa.DataFrameSchema(
1213
columns={
13-
"opa_id": pa.Column(int, checks=pa.Check(lambda s: s.dropna() != "")),
14-
"agency": pa.Column(str, nullable=True),
14+
"opa_id": pa.Column(pa.Int, checks=pa.Check(lambda s: s.dropna() != "")),
15+
"agency": pa.Column(pa.String, nullable=True),
1516
"sideyardeligible": pa.Column(
1617
pa.Category, nullable=True, checks=pa.Check.isin(["Yes", "No"])
1718
),
@@ -23,23 +24,23 @@
2324

2425
CityOwnedPropertiesOutputSchema = pa.DataFrameSchema(
2526
columns={
26-
"opa_id": pa.Column(int, checks=pa.Check(lambda s: s.dropna() != "")),
27-
"market_value": pa.Column(int, nullable=True),
28-
"sale_date": pa.Column(pa.DateTime, nullable=True),
29-
"sale_price": pa.Column(int, nullable=True),
30-
"owner_1": pa.Column(str, nullable=True),
31-
"owner_2": pa.Column(str, nullable=True),
32-
"building_code_description": pa.Column(str, nullable=True),
33-
"zip_code": pa.Column(str, nullable=True),
34-
"zoning": pa.Column(str, nullable=True),
35-
"parcel_type": pa.Column(str, nullable=True),
36-
"standardized_address": pa.Column(str, nullable=True),
27+
"opa_id": pa.Column(pa.Int, checks=pa.Check(lambda s: s.dropna() != "")),
28+
"market_value": pa.Column(pa.Int, nullable=True),
29+
"sale_date": pa.Column(pd.DatetimeTZDtype(tz="UTC"), nullable=True),
30+
"sale_price": pa.Column(pa.Float, nullable=True),
31+
"owner_1": pa.Column(pa.String, nullable=True),
32+
"owner_2": pa.Column(pa.String, nullable=True),
33+
"building_code_description": pa.Column(pa.String, nullable=True),
34+
"zip_code": pa.Column(pa.String, nullable=True),
35+
"zoning": pa.Column(pa.String, nullable=True),
36+
"parcel_type": pa.Column(pa.String, nullable=True),
37+
"standardized_address": pa.Column(pa.String, nullable=True),
3738
"vacant": pa.Column(pa.Bool, nullable=True),
38-
"district": pa.Column(str, nullable=True),
39-
"neighborhood": pa.Column(str, nullable=True),
40-
"rco_info": pa.Column(str, nullable=True),
41-
"rco_names": pa.Column(str, nullable=True),
42-
"city_owner_agency": pa.Column(str, nullable=True),
39+
"district": pa.Column(pa.String, nullable=True),
40+
"neighborhood": pa.Column(pa.String, nullable=True),
41+
"rco_info": pa.Column(pa.String, nullable=True),
42+
"rco_names": pa.Column(pa.String, nullable=True),
43+
"city_owner_agency": pa.Column(pa.String, nullable=True),
4344
"side_yard_eligible": pa.Column(
4445
pa.Category, nullable=True, checks=pa.Check.isin(["Yes", "No"])
4546
),

data/src/validation/council_dists.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import geopandas as gpd
22
import pandera.pandas as pa
3+
import pandas as pd
34
from .base import BaseValidator
45

56
CouncilDistrictsInputSchema = pa.DataFrameSchema(
@@ -21,16 +22,16 @@
2122
CouncilDistrictsOutputSchema = pa.DataFrameSchema(
2223
columns={
2324
"opa_id": pa.Column(pa.String),
25+
"street_address": pa.Column(pa.String, nullable=True),
2426
"market_value": pa.Column(pa.Int, nullable=True),
25-
"sale_date": pa.Column(pa.DateTime, nullable=True),
26-
"sale_price": pa.Column(pa.Int, nullable=True),
27+
"sale_date": pa.Column(pd.DatetimeTZDtype(tz="UTC"), nullable=True),
28+
"sale_price": pa.Column(pa.Float, nullable=True),
2729
"owner_1": pa.Column(pa.String, nullable=True),
2830
"owner_2": pa.Column(pa.String, nullable=True),
2931
"building_code_description": pa.Column(pa.String, nullable=True),
3032
"zip_code": pa.Column(pa.String, nullable=True),
3133
"zoning": pa.Column(pa.String, nullable=True),
3234
"parcel_type": pa.Column(pa.String, nullable=True),
33-
"standardized_address": pa.Column(pa.String, nullable=True),
3435
"vacant": pa.Column(pa.Bool, nullable=True),
3536
"district": pa.Column(
3637
str, nullable=True, checks=pa.Check.isin([str(i) for i in range(1, 11)])

0 commit comments

Comments
 (0)