ATX-Fire-Department-Analysis/analysis.py at main · kialburg/ATX-Fire-Department-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
"""
Austin Fire Department Resource Allocation Analysis
=====================================================
Research Question: Do suburban areas utilize disproportionate fire resources
per capita compared to urban areas?

Author: Research Hub - Austin Housing/Land Use Working Group
"""

import pandas as pd
import geopandas as gpd
import requests
import json
from shapely.geometry import Point
import numpy as np

# =============================================================================
# DATA SOURCE CONFIGURATION
# =============================================================================

SOURCES = {
    # Austin Open Data Portal (Socrata API)
    "fire_incidents_2022_2024": {
        "url": "https://data.austintexas.gov/resource/v5hh-nyr8.json",
        "csv_url": "https://data.austintexas.gov/api/views/v5hh-nyr8/rows.csv?accessType=DOWNLOAD",
        "description": "AFD Fire Incidents 2022-2024",
        "fields": ["incident_number", "calendaryear", "month", "incdate",
                   "call_type", "problem", "responsearea", "jurisdiction",
                   "prioritydescription", "council_district", "location"]
    },
    "fire_incidents_2018_2021": {
        "url": "https://data.austintexas.gov/resource/j9w8-x2vu.json",
        "csv_url": "https://data.austintexas.gov/api/views/j9w8-x2vu/rows.csv?accessType=DOWNLOAD",
        "description": "AFD Fire Incidents 2018-2021"
    },
    "fire_stations": {
        "url": "https://data.austintexas.gov/resource/szku-46rx.json",
        "description": "Austin Fire Station Locations"
    },

    # ArcGIS FeatureServer (City of Austin GIS)
    "afd_response_areas": {
        "url": "https://services.arcgis.com/0L95CJ0VTaxqcmED/arcgis/rest/services/BOUNDARIES_afd_response_areas/FeatureServer/0",
        "query_url": "https://services.arcgis.com/0L95CJ0VTaxqcmED/arcgis/rest/services/BOUNDARIES_afd_response_areas/FeatureServer/0/query",
        "description": "AFD Response Area Boundaries"
    },

    # Census API
    "census_population": {
        "base_url": "https://api.census.gov/data/2022/acs/acs5",
        "table": "B01003",  # Total Population
        "description": "ACS 5-Year Population Estimates by Tract"
    },
    "census_housing_units": {
        "base_url": "https://api.census.gov/data/2022/acs/acs5",
        "table": "B25024",  # Units in Structure
        "description": "Housing Units by Type (SF, MF, etc.) by Tract"
    }
}


# =============================================================================
# DATA FETCHING FUNCTIONS
# =============================================================================

def fetch_socrata_data(resource_url, limit=50000):
    """Fetch data from Austin Open Data Portal via Socrata API"""
    params = {"$limit": limit}
    response = requests.get(resource_url, params=params)
    if response.status_code == 200:
        return pd.DataFrame(response.json())
    else:
        raise Exception(f"Failed to fetch data: {response.status_code}")


def fetch_arcgis_features(query_url, where="1=1", out_fields="*", out_sr=4326):
    """Fetch features from ArcGIS REST API as GeoJSON"""
    params = {
        "where": where,
        "outFields": out_fields,
        "outSR": out_sr,
        "f": "geojson"
    }
    response = requests.get(query_url, params=params)
    if response.status_code == 200:
        return gpd.GeoDataFrame.from_features(response.json()["features"])
    else:
        raise Exception(f"Failed to fetch ArcGIS data: {response.status_code}")


def fetch_census_data(table, variables, state="48", county="453"):
    """
    Fetch Census ACS data for Travis County, TX
    State FIPS: 48 (Texas)
    County FIPS: 453 (Travis County)
    """
    base_url = f"https://api.census.gov/data/2022/acs/acs5"

    # Build variable list
    var_string = ",".join(variables)

    # Query at tract level for Travis County
    url = f"{base_url}?get={var_string}&for=tract:*&in=state:{state}&in=county:{county}"

    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        return df
    else:
        raise Exception(f"Census API error: {response.status_code}")


# =============================================================================
# DATA PROCESSING FUNCTIONS
# =============================================================================

def parse_incident_locations(df, location_col="location"):
    """Convert location strings to Point geometries"""
    def parse_loc(loc_str):
        if pd.isna(loc_str):
            return None
        try:
            # Format: "(-97.xxx, 30.xxx)"
            coords = loc_str.strip("()").split(",")
            lon, lat = float(coords[0]), float(coords[1])
            return Point(lon, lat)
        except:
            return None

    df["geometry"] = df[location_col].apply(parse_loc)
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


def classify_urban_suburban(gdf, density_col="pop_density"):
    """
    Classify areas as urban core, inner suburban, or outer suburban
    Based on population density thresholds (people per sq mile)
    """
    def classify(density):
        if density >= 10000:
            return "urban_core"
        elif density >= 3000:
            return "inner_suburban"
        else:
            return "outer_suburban"

    gdf["urban_class"] = gdf[density_col].apply(classify)
    return gdf


def calculate_housing_typology_mix(census_df):
    """
    Calculate housing mix from Census B25024 (Units in Structure)

    B25024_002E: 1, detached
    B25024_003E: 1, attached
    B25024_004E: 2 units
    B25024_005E: 3 or 4 units
    B25024_006E: 5 to 9 units
    B25024_007E: 10 to 19 units
    B25024_008E: 20 to 49 units
    B25024_009E: 50 or more units
    B25024_010E: Mobile home
    B25024_011E: Boat, RV, van, etc.
    """
    df = census_df.copy()

    # Convert to numeric
    numeric_cols = [c for c in df.columns if c.startswith("B25024")]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Calculate categories
    df["single_family"] = df["B25024_002E"] + df["B25024_003E"]
    df["small_multifamily"] = df["B25024_004E"] + df["B25024_005E"]  # 2-4 units
    df["medium_multifamily"] = df["B25024_006E"] + df["B25024_007E"]  # 5-19 units
    df["large_multifamily"] = df["B25024_008E"] + df["B25024_009E"]   # 20+ units
    df["other"] = df["B25024_010E"] + df["B25024_011E"]

    df["total_units"] = df["B25024_001E"]

    # Calculate percentages
    df["pct_single_family"] = df["single_family"] / df["total_units"] * 100
    df["pct_multifamily"] = (df["small_multifamily"] + df["medium_multifamily"] +
                            df["large_multifamily"]) / df["total_units"] * 100

    return df


# =============================================================================
# ANALYSIS FUNCTIONS
# =============================================================================

def calculate_incidents_per_capita(incidents_gdf, response_areas_gdf, population_df):
    """
    Spatial join incidents to response areas, then calculate per-capita rates
    """
    # Spatial join: which response area does each incident fall in?
    incidents_with_area = gpd.sjoin(
        incidents_gdf,
        response_areas_gdf,
        how="left",
        predicate="within"
    )

    # Aggregate incidents by response area
    incident_counts = incidents_with_area.groupby("response_area_id").size()
    incident_counts = incident_counts.reset_index(name="incident_count")

    # Merge with population
    merged = response_areas_gdf.merge(incident_counts, on="response_area_id", how="left")
    merged = merged.merge(population_df, on="response_area_id", how="left")

    # Calculate rate per 1,000 population
    merged["incidents_per_1000"] = (merged["incident_count"] / merged["population"]) * 1000

    return merged


def analyze_by_urban_class(merged_gdf):
    """Aggregate metrics by urban classification"""
    summary = merged_gdf.groupby("urban_class").agg({
        "population": "sum",
        "incident_count": "sum",
        "total_units": "sum",
        "single_family": "sum",
        "area_sq_miles": "sum"
    }).reset_index()

    # Calculate per-capita metrics
    summary["incidents_per_1000_pop"] = (summary["incident_count"] / summary["population"]) * 1000
    summary["incidents_per_1000_units"] = (summary["incident_count"] / summary["total_units"]) * 1000
    summary["pop_density"] = summary["population"] / summary["area_sq_miles"]
    summary["pct_single_family"] = summary["single_family"] / summary["total_units"] * 100

    return summary


def analyze_by_housing_type(merged_gdf):
    """
    Correlate fire incident rates with housing typology
    """
    # Create bins for single-family percentage
    merged_gdf["sf_category"] = pd.cut(
        merged_gdf["pct_single_family"],
        bins=[0, 25, 50, 75, 100],
        labels=["<25% SF", "25-50% SF", "50-75% SF", ">75% SF"]
    )

    summary = merged_gdf.groupby("sf_category").agg({
        "incidents_per_1000": "mean",
        "population": "sum",
        "incident_count": "sum"
    }).reset_index()

    return summary


# =============================================================================
# MAIN ANALYSIS PIPELINE
# =============================================================================

def run_full_analysis():
    """
    Execute the complete analysis pipeline
    """
    print("=" * 60)
    print("AUSTIN FIRE RESOURCE ALLOCATION ANALYSIS")
    print("=" * 60)

    # Step 1: Fetch fire incidents
    print("\n[1/6] Fetching fire incident data...")
    incidents_recent = fetch_socrata_data(SOURCES["fire_incidents_2022_2024"]["url"])
    print(f"      Retrieved {len(incidents_recent)} incidents (2022-2024)")

    # Step 2: Fetch response area boundaries
    print("\n[2/6] Fetching AFD response area boundaries...")
    response_areas = fetch_arcgis_features(
        SOURCES["afd_response_areas"]["query_url"]
    )
    print(f"      Retrieved {len(response_areas)} response areas")

    # Step 3: Fetch census data
    print("\n[3/6] Fetching Census population data...")
    pop_vars = ["B01003_001E", "NAME"]
    population = fetch_census_data("B01003", pop_vars)
    print(f"      Retrieved population for {len(population)} tracts")

    print("\n[4/6] Fetching Census housing typology data...")
    housing_vars = [f"B25024_{str(i).zfill(3)}E" for i in range(1, 12)] + ["NAME"]
    housing = fetch_census_data("B25024", housing_vars)
    housing = calculate_housing_typology_mix(housing)
    print(f"      Retrieved housing data for {len(housing)} tracts")

    # Step 4: Process incident locations
    print("\n[5/6] Processing incident locations...")
    incidents_gdf = parse_incident_locations(incidents_recent)
    valid_locations = incidents_gdf.geometry.notna().sum()
    print(f"      {valid_locations} incidents with valid coordinates")

    # Step 5: Spatial analysis
    print("\n[6/6] Performing spatial analysis...")
    # (This would require more work to spatially join census tracts to response areas)

    print("\n" + "=" * 60)
    print("DATA RETRIEVAL COMPLETE")
    print("=" * 60)

    return {
        "incidents": incidents_gdf,
        "response_areas": response_areas,
        "population": population,
        "housing": housing
    }


# =============================================================================
# VISUALIZATION HELPERS
# =============================================================================

def create_choropleth_map(gdf, column, title):
    """Create a folium choropleth map (requires folium)"""
    try:
        import folium

        m = folium.Map(location=[30.2672, -97.7431], zoom_start=11)

        folium.Choropleth(
            geo_data=gdf.__geo_interface__,
            data=gdf,
            columns=["response_area_id", column],
            key_on="feature.properties.response_area_id",
            fill_color="YlOrRd",
            legend_name=title
        ).add_to(m)

        return m
    except ImportError:
        print("Install folium for map visualization: pip install folium")
        return None


# =============================================================================
# ENTRY POINT
# =============================================================================

if __name__ == "__main__":
    # Run the analysis
    data = run_full_analysis()

    # Print sample of incident types
    if "incidents" in data:
        print("\n--- Fire Incident Types (sample) ---")
        print(data["incidents"]["problem"].value_counts().head(15))

    # Print housing mix summary
    if "housing" in data:
        print("\n--- Housing Typology Summary ---")
        housing = data["housing"]
        print(f"Total units in Travis County: {housing['total_units'].sum():,.0f}")
        print(f"Single-family: {housing['single_family'].sum():,.0f}")
        print(f"Multifamily: {(housing['small_multifamily'].sum() + housing['medium_multifamily'].sum() + housing['large_multifamily'].sum()):,.0f}")