Skip to content

Commit f313207

Browse files
Merge pull request #975 from CodeForPhilly/staging
Weekly PR from Staging to Main
2 parents 0da3b21 + ad413d4 commit f313207

File tree

3 files changed

+54
-9
lines changed

3 files changed

+54
-9
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ data/src/tmp
99
.DS_Store
1010
/data/src/local_outputs/
1111
/data/notebooks/
12+
/data/reports/
1213

1314
## App
1415

data/src/data_utils/kde.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,13 @@ def apply_kde_to_primary(primary_featurelayer, name, query, resolution=resolutio
147147

148148

149149
def label_percentile(value):
150-
if value == 1:
151-
return "1st Percentile"
152-
elif value == 2:
153-
return "2nd Percentile"
154-
elif value == 3:
155-
return "3rd Percentile"
150+
if 10 <= value % 100 <= 13:
151+
return f"{value}th Percentile"
152+
elif value % 10 == 1:
153+
return f"{value}st Percentile"
154+
elif value % 10 == 2:
155+
return f"{value}nd Percentile"
156+
elif value % 10 == 3:
157+
return f"{value}rd Percentile"
156158
else:
157159
return f"{value}th Percentile"

data/src/data_utils/negligent_devs.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,38 @@ def create_standardized_address(row):
5858

5959
def negligent_devs(primary_featurelayer):
6060
devs = primary_featurelayer.gdf
61-
city_owners = devs.loc[~devs["city_owner_agency"].isna()].copy()
62-
non_city_owners = devs.loc[devs["city_owner_agency"].isna()].copy()
61+
62+
print("Columns in 'devs' DataFrame:", devs.columns)
63+
64+
print("Initial properties data:")
65+
print(devs[['opa_id', 'city_owner_agency', 'mailing_street']].head(10))
66+
67+
city_owners = devs.loc[~devs["city_owner_agency"].isna() & (devs["city_owner_agency"] != "")].copy()
68+
non_city_owners = devs.loc[devs["city_owner_agency"].isna() | (devs["city_owner_agency"] == "")].copy()
69+
70+
print(f"City owners shape: {city_owners.shape}, Non-city owners shape: {non_city_owners.shape}")
71+
72+
# Log before standardizing addresses
73+
print("Non-city owners mailing streets before standardization:")
74+
print(non_city_owners[['opa_id', 'mailing_street']].head(10))
6375

6476
non_city_owners.loc[:, "mailing_street"] = (
6577
non_city_owners["mailing_street"].astype(str).apply(standardize_street)
6678
)
6779

80+
print("Non-city owners mailing streets after standardization:")
81+
print(non_city_owners[['opa_id', 'mailing_street']].head(10))
82+
6883
for term in ["ST", "AVE", "RD", "BLVD"]:
6984
non_city_owners.loc[:, "mailing_street"] = non_city_owners[
7085
"mailing_street"
7186
].replace(regex={f"{term}.*": term})
7287

88+
# Log after applying term replacement
89+
print("Non-city owners mailing streets after term replacement:")
90+
print(non_city_owners[['opa_id', 'mailing_street']].head(10))
91+
92+
# Fill missing address components
7393
non_city_owners.loc[:, "mailing_address_1"] = non_city_owners[
7494
"mailing_address_1"
7595
].fillna("")
@@ -84,33 +104,52 @@ def negligent_devs(primary_featurelayer):
84104
].fillna("")
85105
non_city_owners.loc[:, "mailing_zip"] = non_city_owners["mailing_zip"].fillna("")
86106

107+
# Log addresses before creating standardized address
108+
print("Non-city owners mailing details before creating standardized address:")
109+
print(non_city_owners[['opa_id', 'mailing_street', 'mailing_city_state', 'mailing_zip']].head(10))
110+
87111
non_city_owners.loc[:, "standardized_address"] = non_city_owners.apply(
88112
create_standardized_address, axis=1
89113
)
90114

115+
# Log standardized addresses and counts
116+
print("Standardized addresses with counts:")
91117
address_counts = (
92118
non_city_owners.groupby("standardized_address")
93119
.size()
94120
.reset_index(name="property_count")
95121
)
122+
print(address_counts.head(10))
123+
96124
sorted_address_counts = address_counts.sort_values(
97125
by="property_count", ascending=False
98126
)
127+
print("Top standardized addresses by property count:")
128+
print(sorted_address_counts.head(10))
99129

100130
non_city_owners = non_city_owners.merge(
101131
sorted_address_counts, on="standardized_address", how="left"
102132
)
103133

134+
# Log merged data for city owners
104135
city_owner_counts = (
105136
city_owners.groupby("city_owner_agency")
106137
.size()
107138
.reset_index(name="property_count")
108139
)
140+
print("City owner counts:")
141+
print(city_owner_counts.head(10))
142+
109143
city_owners = city_owners.merge(
110144
city_owner_counts, on="city_owner_agency", how="left"
111145
)
112146

113147
devs_combined = pd.concat([city_owners, non_city_owners], axis=0)
148+
149+
# Final check on the merged data before updating primary_featurelayer
150+
print("Combined data with property counts:")
151+
print(devs_combined[['opa_id', 'property_count']].head(10))
152+
114153
primary_featurelayer.gdf = primary_featurelayer.gdf.merge(
115154
devs_combined[["opa_id", "property_count"]], on="opa_id", how="left"
116155
)
@@ -119,6 +158,9 @@ def negligent_devs(primary_featurelayer):
119158
)
120159
primary_featurelayer.gdf.loc[:, "negligent_dev"] = (
121160
primary_featurelayer.gdf["n_properties_owned"] > 5
122-
) & (primary_featurelayer.gdf["city_owner_agency"].isna())
161+
) & (primary_featurelayer.gdf["city_owner_agency"].isna() | (primary_featurelayer.gdf["city_owner_agency"] == ""))
162+
163+
print("Final feature layer data with negligent_dev flag:")
164+
print(primary_featurelayer.gdf[['opa_id', 'n_properties_owned', 'negligent_dev']].head(10))
123165

124166
return primary_featurelayer

0 commit comments

Comments
 (0)