7171 "access_process" : access_process ,
7272}
7373
74+ # Service dependencies - defines which services need to be run before each target service
75+ SERVICE_DEPENDENCIES = {
76+ "conservatorship" : [
77+ "opa_properties" ,
78+ "vacant_properties" ,
79+ "city_owned_properties" ,
80+ "delinquencies" ,
81+ "li_violations" ,
82+ ],
83+ "contig_neighbors" : ["opa_properties" , "vacant_properties" ],
84+ "negligent_devs" : ["opa_properties" , "vacant_properties" , "li_violations" ],
85+ "priority_level" : [
86+ "opa_properties" ,
87+ "vacant_properties" ,
88+ "phs_properties" ,
89+ "li_violations" ,
90+ "li_complaints" ,
91+ "tree_canopy" ,
92+ ],
93+ "tactical_urbanism" : [
94+ "opa_properties" ,
95+ "vacant_properties" ,
96+ "unsafe_buildings" ,
97+ "imm_dang_buildings" ,
98+ ],
99+ # Keep existing special cases
100+ "community_gardens" : ["opa_properties" , "vacant_properties" ],
101+ "access_process" : ["opa_properties" , "vacant_properties" , "city_owned_properties" ],
102+ }
103+
74104
75105def disable_caching ():
76106 """Temporarily disable caching for testing"""
@@ -89,6 +119,41 @@ def restore_caching(original_method):
89119 BaseLoader .cache_data = original_method
90120
91121
122+ def run_dependencies (dataset , dependencies ):
123+ """Run all required dependencies for a service."""
124+ if not dependencies :
125+ return dataset
126+
127+ print (f"\n Running dependencies: { ', ' .join (dependencies )} " )
128+ print ("-" * 30 )
129+
130+ for dep in dependencies :
131+ if dep == "opa_properties" :
132+ # opa_properties is the base dataset, already loaded
133+ continue
134+
135+ print (f"Running { dep } ..." )
136+ service_func = SERVICES [dep ]
137+
138+ # Special handling for services that need statistical summaries
139+ if dep == "phs_properties" :
140+ with enable_statistical_summaries ():
141+ dataset , validation = service_func (dataset )
142+ elif dep == "delinquencies" :
143+ with enable_statistical_summaries ():
144+ dataset , validation = service_func (dataset )
145+ else :
146+ dataset , validation = service_func (dataset )
147+
148+ print (f" Dataset shape after { dep } : { dataset .shape } " )
149+ print (f" Validation: { validation } " )
150+
151+ if not validation ["input" ] or not validation ["output" ]:
152+ print (f" Warning: { dep } validation failed: { validation } " )
153+
154+ return dataset
155+
156+
92157def test_service (service_name : str ):
93158 """Test a specific service with the base OPA properties dataset."""
94159
@@ -114,48 +179,14 @@ def test_service(service_name: str):
114179 if not opa_validation ["input" ] or not opa_validation ["output" ]:
115180 print (f"Warning: OPA properties validation failed: { opa_validation } " )
116181
117- # Run vacant_properties first if the service depends on the vacant column
118- services_that_need_vacant = ["community_gardens" ]
119- if service_name in services_that_need_vacant :
120- print ("\n Running vacant_properties first (dependency)..." )
121- dataset , vacant_validation = vacant_properties (dataset )
122- print (f"Dataset after vacant_properties shape: { dataset .shape } " )
123- print (f"Dataset after vacant_properties columns: { list (dataset .columns )} " )
124-
125- if not vacant_validation ["input" ] or not vacant_validation ["output" ]:
126- print (
127- f"Warning: Vacant properties validation failed: { vacant_validation } "
128- )
129-
130- # Run city_owned_properties first if the service depends on city ownership data
131- services_that_need_city_owned = ["access_process" ]
132- if service_name in services_that_need_city_owned :
133- print ("\n Running city_owned_properties first (dependency)..." )
134- dataset , city_owned_validation = city_owned_properties (dataset )
135- print (f"Dataset after city_owned_properties shape: { dataset .shape } " )
182+ # Run dependencies if any
183+ dependencies = SERVICE_DEPENDENCIES .get (service_name , [])
184+ if dependencies :
185+ dataset = run_dependencies (dataset , dependencies )
136186 print (
137- f"Dataset after city_owned_properties columns : { list ( dataset .columns ) } "
187+ f"\n Final dataset shape before running { service_name } : { dataset .shape } "
138188 )
139-
140- if (
141- not city_owned_validation ["input" ]
142- or not city_owned_validation ["output" ]
143- ):
144- print (
145- f"Warning: City owned properties validation failed: { city_owned_validation } "
146- )
147-
148- # Run vacant_properties for access_process (it also needs the vacant column)
149- if service_name == "access_process" :
150- print ("\n Running vacant_properties (dependency for access_process)..." )
151- dataset , vacant_validation = vacant_properties (dataset )
152- print (f"Dataset after vacant_properties shape: { dataset .shape } " )
153- print (f"Dataset after vacant_properties columns: { list (dataset .columns )} " )
154-
155- if not vacant_validation ["input" ] or not vacant_validation ["output" ]:
156- print (
157- f"Warning: Vacant properties validation failed: { vacant_validation } "
158- )
189+ print (f"Final dataset columns: { list (dataset .columns )} " )
159190
160191 # Run the service
161192 service_func = SERVICES [service_name ]
0 commit comments