@@ -58,18 +58,38 @@ def create_standardized_address(row):
5858
5959def negligent_devs (primary_featurelayer ):
6060 devs = primary_featurelayer .gdf
61- city_owners = devs .loc [~ devs ["city_owner_agency" ].isna ()].copy ()
62- non_city_owners = devs .loc [devs ["city_owner_agency" ].isna ()].copy ()
61+
62+ print ("Columns in 'devs' DataFrame:" , devs .columns )
63+
64+ print ("Initial properties data:" )
65+ print (devs [['opa_id' , 'city_owner_agency' , 'mailing_street' ]].head (10 ))
66+
67+ city_owners = devs .loc [~ devs ["city_owner_agency" ].isna () & (devs ["city_owner_agency" ] != "" )].copy ()
68+ non_city_owners = devs .loc [devs ["city_owner_agency" ].isna () | (devs ["city_owner_agency" ] == "" )].copy ()
69+
70+ print (f"City owners shape: { city_owners .shape } , Non-city owners shape: { non_city_owners .shape } " )
71+
72+ # Log before standardizing addresses
73+ print ("Non-city owners mailing streets before standardization:" )
74+ print (non_city_owners [['opa_id' , 'mailing_street' ]].head (10 ))
6375
6476 non_city_owners .loc [:, "mailing_street" ] = (
6577 non_city_owners ["mailing_street" ].astype (str ).apply (standardize_street )
6678 )
6779
80+ print ("Non-city owners mailing streets after standardization:" )
81+ print (non_city_owners [['opa_id' , 'mailing_street' ]].head (10 ))
82+
6883 for term in ["ST" , "AVE" , "RD" , "BLVD" ]:
6984 non_city_owners .loc [:, "mailing_street" ] = non_city_owners [
7085 "mailing_street"
7186 ].replace (regex = {f"{ term } .*" : term })
7287
88+ # Log after applying term replacement
89+ print ("Non-city owners mailing streets after term replacement:" )
90+ print (non_city_owners [['opa_id' , 'mailing_street' ]].head (10 ))
91+
92+ # Fill missing address components
7393 non_city_owners .loc [:, "mailing_address_1" ] = non_city_owners [
7494 "mailing_address_1"
7595 ].fillna ("" )
@@ -84,33 +104,52 @@ def negligent_devs(primary_featurelayer):
84104 ].fillna ("" )
85105 non_city_owners .loc [:, "mailing_zip" ] = non_city_owners ["mailing_zip" ].fillna ("" )
86106
107+ # Log addresses before creating standardized address
108+ print ("Non-city owners mailing details before creating standardized address:" )
109+ print (non_city_owners [['opa_id' , 'mailing_street' , 'mailing_city_state' , 'mailing_zip' ]].head (10 ))
110+
87111 non_city_owners .loc [:, "standardized_address" ] = non_city_owners .apply (
88112 create_standardized_address , axis = 1
89113 )
90114
115+ # Log standardized addresses and counts
116+ print ("Standardized addresses with counts:" )
91117 address_counts = (
92118 non_city_owners .groupby ("standardized_address" )
93119 .size ()
94120 .reset_index (name = "property_count" )
95121 )
122+ print (address_counts .head (10 ))
123+
96124 sorted_address_counts = address_counts .sort_values (
97125 by = "property_count" , ascending = False
98126 )
127+ print ("Top standardized addresses by property count:" )
128+ print (sorted_address_counts .head (10 ))
99129
100130 non_city_owners = non_city_owners .merge (
101131 sorted_address_counts , on = "standardized_address" , how = "left"
102132 )
103133
134+ # Log merged data for city owners
104135 city_owner_counts = (
105136 city_owners .groupby ("city_owner_agency" )
106137 .size ()
107138 .reset_index (name = "property_count" )
108139 )
140+ print ("City owner counts:" )
141+ print (city_owner_counts .head (10 ))
142+
109143 city_owners = city_owners .merge (
110144 city_owner_counts , on = "city_owner_agency" , how = "left"
111145 )
112146
113147 devs_combined = pd .concat ([city_owners , non_city_owners ], axis = 0 )
148+
149+ # Final check on the merged data before updating primary_featurelayer
150+ print ("Combined data with property counts:" )
151+ print (devs_combined [['opa_id' , 'property_count' ]].head (10 ))
152+
114153 primary_featurelayer .gdf = primary_featurelayer .gdf .merge (
115154 devs_combined [["opa_id" , "property_count" ]], on = "opa_id" , how = "left"
116155 )
@@ -119,6 +158,9 @@ def negligent_devs(primary_featurelayer):
119158 )
120159 primary_featurelayer .gdf .loc [:, "negligent_dev" ] = (
121160 primary_featurelayer .gdf ["n_properties_owned" ] > 5
122- ) & (primary_featurelayer .gdf ["city_owner_agency" ].isna ())
161+ ) & (primary_featurelayer .gdf ["city_owner_agency" ].isna () | (primary_featurelayer .gdf ["city_owner_agency" ] == "" ))
162+
163+ print ("Final feature layer data with negligent_dev flag:" )
164+ print (primary_featurelayer .gdf [['opa_id' , 'n_properties_owned' , 'negligent_dev' ]].head (10 ))
123165
124166 return primary_featurelayer
0 commit comments