|
| 1 | +""" |
| 2 | +map_parcel_block_geo.py |
| 3 | +
|
| 4 | +Interactive folium map for QA of parcel-to-block geography classification. |
| 5 | +Parcels are colored by their relationship to a requested geography (TRA, GG, PPA, or TOC) |
| 6 | +and the area share of that geography within their primary Census block. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + from map_parcel_block_geo import list_superdistricts, create_parcel_block_map |
| 10 | +
|
| 11 | + list_superdistricts() |
| 12 | + create_parcel_block_map(["San Francisco"], geo="TRA") |
| 13 | + create_parcel_block_map(["Oakland", "Berkeley"], geo="GG", output_path="map_gg.html") |
| 14 | +
|
| 15 | +Prerequisites: |
| 16 | + parcel_block20_xwalk.csv must already exist in COMBO_OUT (built by metrics_cloud_bespoke.py). |
| 17 | +""" |
| 18 | + |
| 19 | +import sys |
| 20 | +import argparse |
| 21 | +from pathlib import Path |
| 22 | +from functools import lru_cache |
| 23 | + |
| 24 | +import numpy as np |
| 25 | +import pandas as pd |
| 26 | +import geopandas as gpd |
| 27 | +import folium |
| 28 | + |
| 29 | +sys.path.insert(0, str(Path(__file__).parent)) |
| 30 | +import metrics_utils |
| 31 | + |
| 32 | +# --------------------------------------------------------------------------- |
| 33 | +# Paths (mirror metrics_cloud_bespoke.py) |
| 34 | +# --------------------------------------------------------------------------- |
| 35 | +METRICS_DIR = metrics_utils.BOX_DIR / "Plan Bay Area 2050+/Performance and Equity/Plan Performance/Equity_Performance_Metrics/Final_Blueprint" |
| 36 | +COMBO_OUT = Path(r"M:\urban_modeling\urbansim_cloud\projects\combo") |
| 37 | +PARCELS_FILE = metrics_utils.BOX_DIR / "Modeling and Surveys/Urban Modeling/Spatial/Parcels/parcel_geoms_fgdb_out.shp" |
| 38 | +BLOCKS_2020_FILE = Path(r"E:\Box\Modeling and Surveys\Census\2020\geo\bay_area_blocks_2020.gpkg") |
| 39 | +ANALYSIS_CRS = 26910 |
| 40 | +XWALK_CACHE = COMBO_OUT / "parcel_block20_xwalk.csv" |
| 41 | + |
| 42 | +# --------------------------------------------------------------------------- |
| 43 | +# Classification thresholds and display config |
| 44 | +# --------------------------------------------------------------------------- |
| 45 | +HALF_AREA = 0.50 # block geo share >= this → "in geo block" |
| 46 | + |
| 47 | +# Class colors built dynamically per geo argument. |
| 48 | +# Only in-geo parcels are shown; two classes based on whether the block clears the half-area threshold. |
| 49 | +BLOCK_BOUNDARY_COLOR = "#00C853" # bright green |
| 50 | + |
| 51 | +GEO_CONFIG = { |
| 52 | + "TRA": {"is_col": "is_tra", "share_col": "tra_area_share"}, |
| 53 | + "GG": {"is_col": "is_gg", "share_col": "gg_area_share"}, |
| 54 | + "PPA": {"is_col": "is_ppa", "share_col": "ppa_area_share"}, |
| 55 | + "TOC": {"is_col": "is_toc", "share_col": "toc_area_share"}, |
| 56 | +} |
| 57 | + |
| 58 | +# --------------------------------------------------------------------------- |
| 59 | +# Cached data loaders |
| 60 | +# --------------------------------------------------------------------------- |
| 61 | + |
| 62 | +@lru_cache(maxsize=1) |
| 63 | +def _load_taz_xwalk(): |
| 64 | + """Load TAZ crosswalk; initialises metrics_utils module globals as a side-effect.""" |
| 65 | + return metrics_utils.load_taz_crosswalks(METRICS_DIR) |
| 66 | + |
| 67 | + |
| 68 | +@lru_cache(maxsize=1) |
| 69 | +def _load_sd_names(): |
| 70 | + """Return DataFrame mapping superdistrict int → sd_name string.""" |
| 71 | + crosswalks_dir = metrics_utils.M_DRIVE / "urban_modeling" / "baus" / "BAUS Inputs" / "basis_inputs" / "crosswalks" |
| 72 | + sd = pd.read_csv( |
| 73 | + crosswalks_dir / "superdistrict_names.csv", |
| 74 | + encoding="utf-16", sep="\t", |
| 75 | + )[["Superdistrict", "SD Name Old"]] |
| 76 | + sd.rename(columns={"Superdistrict": "superdistrict", "SD Name Old": "sd_name"}, inplace=True) |
| 77 | + return sd |
| 78 | + |
| 79 | + |
| 80 | +@lru_cache(maxsize=1) |
| 81 | +def _load_parcel_geo_data(): |
| 82 | + """Return (parcel_block_xwalk, parcel_flags, block_geo) DataFrames. |
| 83 | +
|
| 84 | + parcel_block_xwalk : parcel_id, block_id, intersection_area_sqm, parcel_block_share |
| 85 | + parcel_flags : parcel_id, block_id, parcel_block_share, is_tra, is_gg, is_ppa, is_toc |
| 86 | + block_geo : block_id, tra_area_share, gg_area_share, ppa_area_share, toc_area_share |
| 87 | + """ |
| 88 | + if not XWALK_CACHE.exists(): |
| 89 | + raise FileNotFoundError( |
| 90 | + f"parcel_block20_xwalk.csv not found at {XWALK_CACHE}.\n" |
| 91 | + "Run build_parcel_block20_xwalk() from metrics_cloud_bespoke.py first." |
| 92 | + ) |
| 93 | + |
| 94 | + print(" Loading parcel→block crosswalk...") |
| 95 | + parcel_block_xwalk = pd.read_csv(XWALK_CACHE, dtype={"block_id": str}) |
| 96 | + |
| 97 | + _load_taz_xwalk() # ensure module globals are populated |
| 98 | + |
| 99 | + geo_xwalk = ( |
| 100 | + metrics_utils.rtp2025_geography_crosswalk_df[["PARCEL_ID", "tra_id", "gg_id", "ppa_id"]] |
| 101 | + .rename(columns={"PARCEL_ID": "parcel_id"}) |
| 102 | + .copy() |
| 103 | + ) |
| 104 | + toc_xwalk = metrics_utils.rtp2025_parcel_toc_crosswalk_df[["parcel_id", "toc_id"]].copy() |
| 105 | + |
| 106 | + parcel_flags = ( |
| 107 | + parcel_block_xwalk |
| 108 | + .merge(geo_xwalk, on="parcel_id", how="left") |
| 109 | + .merge(toc_xwalk, on="parcel_id", how="left") |
| 110 | + ) |
| 111 | + parcel_flags["is_tra"] = parcel_flags["tra_id"].isin({"tra_1","tra_2","tra_3","tra_4","tra_5"}) |
| 112 | + parcel_flags["is_gg"] = parcel_flags["gg_id"] == "GG" |
| 113 | + parcel_flags["is_ppa"] = parcel_flags["ppa_id"] == "PPA" |
| 114 | + parcel_flags["is_toc"] = parcel_flags["toc_id"] == "toc" |
| 115 | + |
| 116 | + for geo in ["tra", "gg", "ppa", "toc"]: |
| 117 | + parcel_flags[f"{geo}_area"] = ( |
| 118 | + parcel_flags[f"is_{geo}"].astype(float) * parcel_flags["intersection_area_sqm"] |
| 119 | + ) |
| 120 | + |
| 121 | + print(" Computing block-level geo area shares...") |
| 122 | + block_geo = ( |
| 123 | + parcel_flags |
| 124 | + .groupby("block_id")[["intersection_area_sqm", "tra_area", "gg_area", "ppa_area", "toc_area"]] |
| 125 | + .sum() |
| 126 | + .reset_index() |
| 127 | + ) |
| 128 | + for geo in ["tra", "gg", "ppa", "toc"]: |
| 129 | + block_geo[f"{geo}_area_share"] = block_geo[f"{geo}_area"] / block_geo["intersection_area_sqm"] |
| 130 | + |
| 131 | + flag_cols = ["parcel_id", "block_id", "parcel_block_share", "is_tra", "is_gg", "is_ppa", "is_toc"] |
| 132 | + return parcel_block_xwalk, parcel_flags[flag_cols].copy(), block_geo |
| 133 | + |
| 134 | + |
| 135 | +# --------------------------------------------------------------------------- |
| 136 | +# Public API |
| 137 | +# --------------------------------------------------------------------------- |
| 138 | + |
| 139 | +def list_superdistricts(): |
| 140 | + """Print all available superdistrict names for use in create_parcel_block_map().""" |
| 141 | + _load_taz_xwalk() |
| 142 | + sd = _load_sd_names() |
| 143 | + names = sorted(sd["sd_name"].dropna().unique()) |
| 144 | + print(f"{len(names)} superdistricts:") |
| 145 | + for n in names: |
| 146 | + print(f" {n!r}") |
| 147 | + |
| 148 | + |
| 149 | +def create_parcel_block_map( |
| 150 | + sd_names, |
| 151 | + geo: str = "TRA", |
| 152 | + output_path=None, |
| 153 | +) -> folium.Map: |
| 154 | + """Create an interactive folium map of parcels classified by geo membership and block area share. |
| 155 | +
|
| 156 | + Parameters |
| 157 | + ---------- |
| 158 | + sd_names : str or list of str |
| 159 | + One or more superdistrict names (use list_superdistricts() to see valid values). |
| 160 | + geo : {"TRA", "GG", "PPA", "TOC"} |
| 161 | + The geography type to visualise. |
| 162 | + output_path : str or Path, optional |
| 163 | + If provided, saves the HTML map to this path. |
| 164 | +
|
| 165 | + Returns |
| 166 | + ------- |
| 167 | + folium.Map |
| 168 | +
|
| 169 | + Classification categories |
| 170 | + ------------------------- |
| 171 | + "{geo} parcel → {geo} block": parcel is in geo AND block ≥50% qualifying (blue) |
| 172 | + "{geo} parcel → non-{geo} block": parcel is in geo AND block <50% qualifying (red) |
| 173 | + Out-of-geo parcels are not shown. |
| 174 | + """ |
| 175 | + if isinstance(sd_names, str): |
| 176 | + sd_names = [sd_names] |
| 177 | + |
| 178 | + geo = geo.upper() |
| 179 | + if geo not in GEO_CONFIG: |
| 180 | + raise ValueError(f"geo must be one of {list(GEO_CONFIG)}, got {geo!r}") |
| 181 | + |
| 182 | + cfg = GEO_CONFIG[geo] |
| 183 | + is_col = cfg["is_col"] |
| 184 | + share_col = cfg["share_col"] |
| 185 | + |
| 186 | + # ---- resolve parcel set for requested superdistrict(s) ------------------ |
| 187 | + _load_taz_xwalk() |
| 188 | + sd_df = _load_sd_names() |
| 189 | + |
| 190 | + parcel_sd = metrics_utils.parcel_taz_sd_crosswalk_df[["parcel_id", "superdistrict"]].copy() |
| 191 | + parcel_sd = parcel_sd.merge(sd_df, on="superdistrict", how="left") |
| 192 | + |
| 193 | + mask = parcel_sd["sd_name"].isin(sd_names) |
| 194 | + if not mask.any(): |
| 195 | + available = sorted(sd_df["sd_name"].dropna().unique()) |
| 196 | + raise ValueError( |
| 197 | + f"No parcels found for sd_names={sd_names!r}.\nAvailable: {available}" |
| 198 | + ) |
| 199 | + target_parcels = set(parcel_sd.loc[mask, "parcel_id"]) |
| 200 | + print(f" {len(target_parcels):,} parcels in {sd_names}") |
| 201 | + |
| 202 | + # ---- classify parcels --------------------------------------------------- |
| 203 | + parcel_block_xwalk, parcel_flags, block_geo = _load_parcel_geo_data() |
| 204 | + |
| 205 | + # Dominant block per parcel (largest parcel_block_share) |
| 206 | + pb = parcel_block_xwalk[parcel_block_xwalk["parcel_id"].isin(target_parcels)].copy() |
| 207 | + dominant = pb.loc[ |
| 208 | + pb.groupby("parcel_id")["parcel_block_share"].idxmax(), |
| 209 | + ["parcel_id", "block_id"], |
| 210 | + ].copy() |
| 211 | + |
| 212 | + # Join is_geo flag (one row per parcel) |
| 213 | + pf = ( |
| 214 | + parcel_flags[parcel_flags["parcel_id"].isin(target_parcels)][["parcel_id", is_col]] |
| 215 | + .drop_duplicates("parcel_id") |
| 216 | + ) |
| 217 | + dominant = dominant.merge(pf, on="parcel_id", how="left") |
| 218 | + |
| 219 | + # Join block area share |
| 220 | + dominant = dominant.merge( |
| 221 | + block_geo[["block_id", share_col]].rename(columns={share_col: "block_area_share"}), |
| 222 | + on="block_id", how="left", |
| 223 | + ) |
| 224 | + dominant["block_area_share"] = dominant["block_area_share"].fillna(0.0) |
| 225 | + dominant["is_geo"] = dominant[is_col].fillna(False) |
| 226 | + |
| 227 | + # Build geo-specific class labels and color map |
| 228 | + label_in = f"{geo} parcel \u2192 {geo} block" |
| 229 | + label_out = f"{geo} parcel \u2192 non-{geo} block" |
| 230 | + class_colors = { |
| 231 | + label_in: "#1565C0", # blue |
| 232 | + label_out: "#B71C1C", # red |
| 233 | + } |
| 234 | + |
| 235 | + def _classify(is_geo, share): |
| 236 | + if not is_geo: |
| 237 | + return None # out-of-geo parcels hidden |
| 238 | + return label_in if share >= HALF_AREA else label_out |
| 239 | + |
| 240 | + dominant["class"] = dominant.apply(lambda r: _classify(r["is_geo"], r["block_area_share"]), axis=1) |
| 241 | + dominant = dominant[dominant["class"].notna()].copy() |
| 242 | + print(f" {len(dominant):,} in-{geo} parcels to render") |
| 243 | + |
| 244 | + # ---- load parcel geometries --------------------------------------------- |
| 245 | + print(" Loading parcel geometries (may take a moment)...") |
| 246 | + parcels_gdf = gpd.read_file(PARCELS_FILE).to_crs(ANALYSIS_CRS) |
| 247 | + parcels_gdf = parcels_gdf[["parcel_id", "geometry"]].copy() |
| 248 | + parcels_gdf = parcels_gdf[parcels_gdf["parcel_id"].isin(dominant["parcel_id"])] |
| 249 | + |
| 250 | + parcels_gdf = parcels_gdf.merge( |
| 251 | + dominant[["parcel_id", "block_id", "is_geo", "block_area_share", "class"]], |
| 252 | + on="parcel_id", how="inner", |
| 253 | + ) |
| 254 | + parcels_gdf["block_area_pct"] = (parcels_gdf["block_area_share"] * 100).round(1) |
| 255 | + parcels_gdf["color"] = parcels_gdf["class"].map(class_colors) |
| 256 | + |
| 257 | + parcels_wgs84 = parcels_gdf.to_crs(4326) |
| 258 | + |
| 259 | + # ---- load block boundaries for the SD area ------------------------------ |
| 260 | + print(" Loading block boundaries...") |
| 261 | + bbox = tuple(parcels_wgs84.total_bounds) # (minx, miny, maxx, maxy) in WGS84 |
| 262 | + blocks_gdf = gpd.read_file(BLOCKS_2020_FILE, bbox=bbox).to_crs(4326) |
| 263 | + blocks_gdf = blocks_gdf.rename(columns={"GEOID20": "block_id"})[["block_id", "geometry"]] |
| 264 | + blocks_gdf = blocks_gdf[blocks_gdf["block_id"].isin(set(dominant["block_id"]))].copy() |
| 265 | + blocks_gdf = blocks_gdf.merge( |
| 266 | + block_geo[["block_id", share_col]].rename(columns={share_col: "block_area_share"}), |
| 267 | + on="block_id", how="left", |
| 268 | + ) |
| 269 | + blocks_gdf["block_area_share"] = blocks_gdf["block_area_share"].fillna(0.0) |
| 270 | + blocks_gdf["block_area_pct"] = (blocks_gdf["block_area_share"] * 100).round(1) |
| 271 | + |
| 272 | + blocks_gdf["stroke_color"] = BLOCK_BOUNDARY_COLOR |
| 273 | + |
| 274 | + # ---- build folium map --------------------------------------------------- |
| 275 | + centroid = parcels_wgs84.geometry.union_all().centroid |
| 276 | + m = folium.Map(location=[centroid.y, centroid.x], zoom_start=13, tiles="CartoDB positron") |
| 277 | + |
| 278 | + # Legend (dynamic labels based on geo argument) |
| 279 | + legend_items = "".join( |
| 280 | + f"<i style='background:{c};width:12px;height:12px;display:inline-block;margin-right:6px;'></i>{label}<br>" |
| 281 | + for label, c in class_colors.items() |
| 282 | + ) |
| 283 | + legend_items += ( |
| 284 | + f"<i style='border:2px solid {BLOCK_BOUNDARY_COLOR};width:12px;height:12px;" |
| 285 | + f"display:inline-block;margin-right:6px;'></i>Block boundary<br>" |
| 286 | + ) |
| 287 | + legend_html = ( |
| 288 | + "<div style='position:fixed;bottom:30px;left:30px;z-index:9999;" |
| 289 | + "background:white;padding:10px;border:1px solid grey;font-size:12px;'>" |
| 290 | + f"<b>{geo} Parcel Classification</b><br>{legend_items}</div>" |
| 291 | + ) |
| 292 | + m.get_root().html.add_child(folium.Element(legend_html)) |
| 293 | + |
| 294 | + # Block boundaries (rendered first so parcels appear on top) |
| 295 | + fg_blocks = folium.FeatureGroup(name="Block boundaries", show=True) |
| 296 | + folium.GeoJson( |
| 297 | + blocks_gdf, |
| 298 | + style_function=lambda f: { |
| 299 | + "fillOpacity": 0.0, |
| 300 | + "color": f["properties"]["stroke_color"], |
| 301 | + "weight": 1.5, |
| 302 | + }, |
| 303 | + tooltip=folium.GeoJsonTooltip( |
| 304 | + fields=["block_id", "block_area_pct"], |
| 305 | + aliases=["Block ID:", f"{geo} block %:"], |
| 306 | + ), |
| 307 | + ).add_to(fg_blocks) |
| 308 | + fg_blocks.add_to(m) |
| 309 | + |
| 310 | + # One FeatureGroup per class for layer toggling |
| 311 | + for class_label, color in class_colors.items(): |
| 312 | + subset = parcels_wgs84[parcels_wgs84["class"] == class_label].copy() |
| 313 | + if subset.empty: |
| 314 | + continue |
| 315 | + fg = folium.FeatureGroup(name=class_label, show=True) |
| 316 | + folium.GeoJson( |
| 317 | + subset, |
| 318 | + style_function=lambda f, c=color: { |
| 319 | + "fillColor": c, |
| 320 | + "color": "#333333", |
| 321 | + "weight": 0.5, |
| 322 | + "fillOpacity": 0.65, |
| 323 | + }, |
| 324 | + tooltip=folium.GeoJsonTooltip( |
| 325 | + fields=["parcel_id", "block_id", "block_area_pct", "class"], |
| 326 | + aliases=["Parcel ID:", "Block ID:", f"{geo} block %:", "Class:"], |
| 327 | + ), |
| 328 | + ).add_to(fg) |
| 329 | + fg.add_to(m) |
| 330 | + |
| 331 | + folium.LayerControl(collapsed=False).add_to(m) |
| 332 | + |
| 333 | + if output_path is not None: |
| 334 | + output_path = Path(output_path) |
| 335 | + output_path.parent.mkdir(parents=True, exist_ok=True) |
| 336 | + m.save(str(output_path)) |
| 337 | + print(f" Map saved to {output_path}") |
| 338 | + |
| 339 | + return m |
| 340 | + |
| 341 | + |
| 342 | +if __name__ == "__main__": |
| 343 | + parser = argparse.ArgumentParser( |
| 344 | + description="Generate an interactive parcel geography classification map." |
| 345 | + ) |
| 346 | + parser.add_argument( |
| 347 | + "sd_names", nargs="*", |
| 348 | + help="One or more superdistrict names (quoted if they contain spaces). " |
| 349 | + "Omit to list available superdistricts.", |
| 350 | + ) |
| 351 | + parser.add_argument( |
| 352 | + "--geo", default="TRA", choices=["TRA", "GG", "PPA", "TOC"], |
| 353 | + help="Geography type to visualise (default: TRA).", |
| 354 | + ) |
| 355 | + parser.add_argument( |
| 356 | + "--output", "-o", default=None, |
| 357 | + help="Output HTML file path (default: map_<geo>_<sd>.html in current directory).", |
| 358 | + ) |
| 359 | + args = parser.parse_args() |
| 360 | + |
| 361 | + if not args.sd_names: |
| 362 | + list_superdistricts() |
| 363 | + else: |
| 364 | + out = args.output |
| 365 | + if out is None: |
| 366 | + slug = "_".join(s.lower().replace("/", "-").replace(" ", "_") for s in args.sd_names) |
| 367 | + out = f"map_{args.geo.lower()}_{slug}.html" |
| 368 | + create_parcel_block_map(args.sd_names, geo=args.geo, output_path=out) |
0 commit comments