Skip to content

Commit 1cb0976

Browse files
committed
Script to QAQC parcel to block crosswalking
1 parent 93d94b1 commit 1cb0976

File tree

1 file changed

+368
-0
lines changed

1 file changed

+368
-0
lines changed
Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
"""
2+
map_parcel_block_geo.py
3+
4+
Interactive folium map for QA of parcel-to-block geography classification.
5+
Parcels are colored by their relationship to a requested geography (TRA, GG, PPA, or TOC)
6+
and the area share of that geography within their primary Census block.
7+
8+
Usage:
9+
from map_parcel_block_geo import list_superdistricts, create_parcel_block_map
10+
11+
list_superdistricts()
12+
create_parcel_block_map(["San Francisco"], geo="TRA")
13+
create_parcel_block_map(["Oakland", "Berkeley"], geo="GG", output_path="map_gg.html")
14+
15+
Prerequisites:
16+
parcel_block20_xwalk.csv must already exist in COMBO_OUT (built by metrics_cloud_bespoke.py).
17+
"""
18+
19+
import sys
20+
import argparse
21+
from pathlib import Path
22+
from functools import lru_cache
23+
24+
import numpy as np
25+
import pandas as pd
26+
import geopandas as gpd
27+
import folium
28+
29+
sys.path.insert(0, str(Path(__file__).parent))
30+
import metrics_utils
31+
32+
# ---------------------------------------------------------------------------
33+
# Paths (mirror metrics_cloud_bespoke.py)
34+
# ---------------------------------------------------------------------------
35+
METRICS_DIR = metrics_utils.BOX_DIR / "Plan Bay Area 2050+/Performance and Equity/Plan Performance/Equity_Performance_Metrics/Final_Blueprint"
36+
COMBO_OUT = Path(r"M:\urban_modeling\urbansim_cloud\projects\combo")
37+
PARCELS_FILE = metrics_utils.BOX_DIR / "Modeling and Surveys/Urban Modeling/Spatial/Parcels/parcel_geoms_fgdb_out.shp"
38+
BLOCKS_2020_FILE = Path(r"E:\Box\Modeling and Surveys\Census\2020\geo\bay_area_blocks_2020.gpkg")
39+
ANALYSIS_CRS = 26910
40+
XWALK_CACHE = COMBO_OUT / "parcel_block20_xwalk.csv"
41+
42+
# ---------------------------------------------------------------------------
43+
# Classification thresholds and display config
44+
# ---------------------------------------------------------------------------
45+
HALF_AREA = 0.50 # block geo share >= this → "in geo block"
46+
47+
# Class colors built dynamically per geo argument.
48+
# Only in-geo parcels are shown; two classes based on whether the block clears the half-area threshold.
49+
BLOCK_BOUNDARY_COLOR = "#00C853" # bright green
50+
51+
GEO_CONFIG = {
52+
"TRA": {"is_col": "is_tra", "share_col": "tra_area_share"},
53+
"GG": {"is_col": "is_gg", "share_col": "gg_area_share"},
54+
"PPA": {"is_col": "is_ppa", "share_col": "ppa_area_share"},
55+
"TOC": {"is_col": "is_toc", "share_col": "toc_area_share"},
56+
}
57+
58+
# ---------------------------------------------------------------------------
59+
# Cached data loaders
60+
# ---------------------------------------------------------------------------
61+
62+
@lru_cache(maxsize=1)
63+
def _load_taz_xwalk():
64+
"""Load TAZ crosswalk; initialises metrics_utils module globals as a side-effect."""
65+
return metrics_utils.load_taz_crosswalks(METRICS_DIR)
66+
67+
68+
@lru_cache(maxsize=1)
69+
def _load_sd_names():
70+
"""Return DataFrame mapping superdistrict int → sd_name string."""
71+
crosswalks_dir = metrics_utils.M_DRIVE / "urban_modeling" / "baus" / "BAUS Inputs" / "basis_inputs" / "crosswalks"
72+
sd = pd.read_csv(
73+
crosswalks_dir / "superdistrict_names.csv",
74+
encoding="utf-16", sep="\t",
75+
)[["Superdistrict", "SD Name Old"]]
76+
sd.rename(columns={"Superdistrict": "superdistrict", "SD Name Old": "sd_name"}, inplace=True)
77+
return sd
78+
79+
80+
@lru_cache(maxsize=1)
81+
def _load_parcel_geo_data():
82+
"""Return (parcel_block_xwalk, parcel_flags, block_geo) DataFrames.
83+
84+
parcel_block_xwalk : parcel_id, block_id, intersection_area_sqm, parcel_block_share
85+
parcel_flags : parcel_id, block_id, parcel_block_share, is_tra, is_gg, is_ppa, is_toc
86+
block_geo : block_id, tra_area_share, gg_area_share, ppa_area_share, toc_area_share
87+
"""
88+
if not XWALK_CACHE.exists():
89+
raise FileNotFoundError(
90+
f"parcel_block20_xwalk.csv not found at {XWALK_CACHE}.\n"
91+
"Run build_parcel_block20_xwalk() from metrics_cloud_bespoke.py first."
92+
)
93+
94+
print(" Loading parcel→block crosswalk...")
95+
parcel_block_xwalk = pd.read_csv(XWALK_CACHE, dtype={"block_id": str})
96+
97+
_load_taz_xwalk() # ensure module globals are populated
98+
99+
geo_xwalk = (
100+
metrics_utils.rtp2025_geography_crosswalk_df[["PARCEL_ID", "tra_id", "gg_id", "ppa_id"]]
101+
.rename(columns={"PARCEL_ID": "parcel_id"})
102+
.copy()
103+
)
104+
toc_xwalk = metrics_utils.rtp2025_parcel_toc_crosswalk_df[["parcel_id", "toc_id"]].copy()
105+
106+
parcel_flags = (
107+
parcel_block_xwalk
108+
.merge(geo_xwalk, on="parcel_id", how="left")
109+
.merge(toc_xwalk, on="parcel_id", how="left")
110+
)
111+
parcel_flags["is_tra"] = parcel_flags["tra_id"].isin({"tra_1","tra_2","tra_3","tra_4","tra_5"})
112+
parcel_flags["is_gg"] = parcel_flags["gg_id"] == "GG"
113+
parcel_flags["is_ppa"] = parcel_flags["ppa_id"] == "PPA"
114+
parcel_flags["is_toc"] = parcel_flags["toc_id"] == "toc"
115+
116+
for geo in ["tra", "gg", "ppa", "toc"]:
117+
parcel_flags[f"{geo}_area"] = (
118+
parcel_flags[f"is_{geo}"].astype(float) * parcel_flags["intersection_area_sqm"]
119+
)
120+
121+
print(" Computing block-level geo area shares...")
122+
block_geo = (
123+
parcel_flags
124+
.groupby("block_id")[["intersection_area_sqm", "tra_area", "gg_area", "ppa_area", "toc_area"]]
125+
.sum()
126+
.reset_index()
127+
)
128+
for geo in ["tra", "gg", "ppa", "toc"]:
129+
block_geo[f"{geo}_area_share"] = block_geo[f"{geo}_area"] / block_geo["intersection_area_sqm"]
130+
131+
flag_cols = ["parcel_id", "block_id", "parcel_block_share", "is_tra", "is_gg", "is_ppa", "is_toc"]
132+
return parcel_block_xwalk, parcel_flags[flag_cols].copy(), block_geo
133+
134+
135+
# ---------------------------------------------------------------------------
136+
# Public API
137+
# ---------------------------------------------------------------------------
138+
139+
def list_superdistricts():
140+
"""Print all available superdistrict names for use in create_parcel_block_map()."""
141+
_load_taz_xwalk()
142+
sd = _load_sd_names()
143+
names = sorted(sd["sd_name"].dropna().unique())
144+
print(f"{len(names)} superdistricts:")
145+
for n in names:
146+
print(f" {n!r}")
147+
148+
149+
def create_parcel_block_map(
150+
sd_names,
151+
geo: str = "TRA",
152+
output_path=None,
153+
) -> folium.Map:
154+
"""Create an interactive folium map of parcels classified by geo membership and block area share.
155+
156+
Parameters
157+
----------
158+
sd_names : str or list of str
159+
One or more superdistrict names (use list_superdistricts() to see valid values).
160+
geo : {"TRA", "GG", "PPA", "TOC"}
161+
The geography type to visualise.
162+
output_path : str or Path, optional
163+
If provided, saves the HTML map to this path.
164+
165+
Returns
166+
-------
167+
folium.Map
168+
169+
Classification categories
170+
-------------------------
171+
"{geo} parcel → {geo} block": parcel is in geo AND block ≥50% qualifying (blue)
172+
"{geo} parcel → non-{geo} block": parcel is in geo AND block <50% qualifying (red)
173+
Out-of-geo parcels are not shown.
174+
"""
175+
if isinstance(sd_names, str):
176+
sd_names = [sd_names]
177+
178+
geo = geo.upper()
179+
if geo not in GEO_CONFIG:
180+
raise ValueError(f"geo must be one of {list(GEO_CONFIG)}, got {geo!r}")
181+
182+
cfg = GEO_CONFIG[geo]
183+
is_col = cfg["is_col"]
184+
share_col = cfg["share_col"]
185+
186+
# ---- resolve parcel set for requested superdistrict(s) ------------------
187+
_load_taz_xwalk()
188+
sd_df = _load_sd_names()
189+
190+
parcel_sd = metrics_utils.parcel_taz_sd_crosswalk_df[["parcel_id", "superdistrict"]].copy()
191+
parcel_sd = parcel_sd.merge(sd_df, on="superdistrict", how="left")
192+
193+
mask = parcel_sd["sd_name"].isin(sd_names)
194+
if not mask.any():
195+
available = sorted(sd_df["sd_name"].dropna().unique())
196+
raise ValueError(
197+
f"No parcels found for sd_names={sd_names!r}.\nAvailable: {available}"
198+
)
199+
target_parcels = set(parcel_sd.loc[mask, "parcel_id"])
200+
print(f" {len(target_parcels):,} parcels in {sd_names}")
201+
202+
# ---- classify parcels ---------------------------------------------------
203+
parcel_block_xwalk, parcel_flags, block_geo = _load_parcel_geo_data()
204+
205+
# Dominant block per parcel (largest parcel_block_share)
206+
pb = parcel_block_xwalk[parcel_block_xwalk["parcel_id"].isin(target_parcels)].copy()
207+
dominant = pb.loc[
208+
pb.groupby("parcel_id")["parcel_block_share"].idxmax(),
209+
["parcel_id", "block_id"],
210+
].copy()
211+
212+
# Join is_geo flag (one row per parcel)
213+
pf = (
214+
parcel_flags[parcel_flags["parcel_id"].isin(target_parcels)][["parcel_id", is_col]]
215+
.drop_duplicates("parcel_id")
216+
)
217+
dominant = dominant.merge(pf, on="parcel_id", how="left")
218+
219+
# Join block area share
220+
dominant = dominant.merge(
221+
block_geo[["block_id", share_col]].rename(columns={share_col: "block_area_share"}),
222+
on="block_id", how="left",
223+
)
224+
dominant["block_area_share"] = dominant["block_area_share"].fillna(0.0)
225+
dominant["is_geo"] = dominant[is_col].fillna(False)
226+
227+
# Build geo-specific class labels and color map
228+
label_in = f"{geo} parcel \u2192 {geo} block"
229+
label_out = f"{geo} parcel \u2192 non-{geo} block"
230+
class_colors = {
231+
label_in: "#1565C0", # blue
232+
label_out: "#B71C1C", # red
233+
}
234+
235+
def _classify(is_geo, share):
236+
if not is_geo:
237+
return None # out-of-geo parcels hidden
238+
return label_in if share >= HALF_AREA else label_out
239+
240+
dominant["class"] = dominant.apply(lambda r: _classify(r["is_geo"], r["block_area_share"]), axis=1)
241+
dominant = dominant[dominant["class"].notna()].copy()
242+
print(f" {len(dominant):,} in-{geo} parcels to render")
243+
244+
# ---- load parcel geometries ---------------------------------------------
245+
print(" Loading parcel geometries (may take a moment)...")
246+
parcels_gdf = gpd.read_file(PARCELS_FILE).to_crs(ANALYSIS_CRS)
247+
parcels_gdf = parcels_gdf[["parcel_id", "geometry"]].copy()
248+
parcels_gdf = parcels_gdf[parcels_gdf["parcel_id"].isin(dominant["parcel_id"])]
249+
250+
parcels_gdf = parcels_gdf.merge(
251+
dominant[["parcel_id", "block_id", "is_geo", "block_area_share", "class"]],
252+
on="parcel_id", how="inner",
253+
)
254+
parcels_gdf["block_area_pct"] = (parcels_gdf["block_area_share"] * 100).round(1)
255+
parcels_gdf["color"] = parcels_gdf["class"].map(class_colors)
256+
257+
parcels_wgs84 = parcels_gdf.to_crs(4326)
258+
259+
# ---- load block boundaries for the SD area ------------------------------
260+
print(" Loading block boundaries...")
261+
bbox = tuple(parcels_wgs84.total_bounds) # (minx, miny, maxx, maxy) in WGS84
262+
blocks_gdf = gpd.read_file(BLOCKS_2020_FILE, bbox=bbox).to_crs(4326)
263+
blocks_gdf = blocks_gdf.rename(columns={"GEOID20": "block_id"})[["block_id", "geometry"]]
264+
blocks_gdf = blocks_gdf[blocks_gdf["block_id"].isin(set(dominant["block_id"]))].copy()
265+
blocks_gdf = blocks_gdf.merge(
266+
block_geo[["block_id", share_col]].rename(columns={share_col: "block_area_share"}),
267+
on="block_id", how="left",
268+
)
269+
blocks_gdf["block_area_share"] = blocks_gdf["block_area_share"].fillna(0.0)
270+
blocks_gdf["block_area_pct"] = (blocks_gdf["block_area_share"] * 100).round(1)
271+
272+
blocks_gdf["stroke_color"] = BLOCK_BOUNDARY_COLOR
273+
274+
# ---- build folium map ---------------------------------------------------
275+
centroid = parcels_wgs84.geometry.union_all().centroid
276+
m = folium.Map(location=[centroid.y, centroid.x], zoom_start=13, tiles="CartoDB positron")
277+
278+
# Legend (dynamic labels based on geo argument)
279+
legend_items = "".join(
280+
f"<i style='background:{c};width:12px;height:12px;display:inline-block;margin-right:6px;'></i>{label}<br>"
281+
for label, c in class_colors.items()
282+
)
283+
legend_items += (
284+
f"<i style='border:2px solid {BLOCK_BOUNDARY_COLOR};width:12px;height:12px;"
285+
f"display:inline-block;margin-right:6px;'></i>Block boundary<br>"
286+
)
287+
legend_html = (
288+
"<div style='position:fixed;bottom:30px;left:30px;z-index:9999;"
289+
"background:white;padding:10px;border:1px solid grey;font-size:12px;'>"
290+
f"<b>{geo} Parcel Classification</b><br>{legend_items}</div>"
291+
)
292+
m.get_root().html.add_child(folium.Element(legend_html))
293+
294+
# Block boundaries (rendered first so parcels appear on top)
295+
fg_blocks = folium.FeatureGroup(name="Block boundaries", show=True)
296+
folium.GeoJson(
297+
blocks_gdf,
298+
style_function=lambda f: {
299+
"fillOpacity": 0.0,
300+
"color": f["properties"]["stroke_color"],
301+
"weight": 1.5,
302+
},
303+
tooltip=folium.GeoJsonTooltip(
304+
fields=["block_id", "block_area_pct"],
305+
aliases=["Block ID:", f"{geo} block %:"],
306+
),
307+
).add_to(fg_blocks)
308+
fg_blocks.add_to(m)
309+
310+
# One FeatureGroup per class for layer toggling
311+
for class_label, color in class_colors.items():
312+
subset = parcels_wgs84[parcels_wgs84["class"] == class_label].copy()
313+
if subset.empty:
314+
continue
315+
fg = folium.FeatureGroup(name=class_label, show=True)
316+
folium.GeoJson(
317+
subset,
318+
style_function=lambda f, c=color: {
319+
"fillColor": c,
320+
"color": "#333333",
321+
"weight": 0.5,
322+
"fillOpacity": 0.65,
323+
},
324+
tooltip=folium.GeoJsonTooltip(
325+
fields=["parcel_id", "block_id", "block_area_pct", "class"],
326+
aliases=["Parcel ID:", "Block ID:", f"{geo} block %:", "Class:"],
327+
),
328+
).add_to(fg)
329+
fg.add_to(m)
330+
331+
folium.LayerControl(collapsed=False).add_to(m)
332+
333+
if output_path is not None:
334+
output_path = Path(output_path)
335+
output_path.parent.mkdir(parents=True, exist_ok=True)
336+
m.save(str(output_path))
337+
print(f" Map saved to {output_path}")
338+
339+
return m
340+
341+
342+
if __name__ == "__main__":
343+
parser = argparse.ArgumentParser(
344+
description="Generate an interactive parcel geography classification map."
345+
)
346+
parser.add_argument(
347+
"sd_names", nargs="*",
348+
help="One or more superdistrict names (quoted if they contain spaces). "
349+
"Omit to list available superdistricts.",
350+
)
351+
parser.add_argument(
352+
"--geo", default="TRA", choices=["TRA", "GG", "PPA", "TOC"],
353+
help="Geography type to visualise (default: TRA).",
354+
)
355+
parser.add_argument(
356+
"--output", "-o", default=None,
357+
help="Output HTML file path (default: map_<geo>_<sd>.html in current directory).",
358+
)
359+
args = parser.parse_args()
360+
361+
if not args.sd_names:
362+
list_superdistricts()
363+
else:
364+
out = args.output
365+
if out is None:
366+
slug = "_".join(s.lower().replace("/", "-").replace(" ", "_") for s in args.sd_names)
367+
out = f"map_{args.geo.lower()}_{slug}.html"
368+
create_parcel_block_map(args.sd_names, geo=args.geo, output_path=out)

0 commit comments

Comments
 (0)