Skip to content

Commit 289d836

Browse files
committed
Promote crime and voter overlays to base; remove social alignment from core
1 parent 7e835d6 commit 289d836

File tree

4 files changed

+365
-19
lines changed

4 files changed

+365
-19
lines changed

README.md

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,22 @@ directory.
5151

5252
Optional overlays can be placed in the same data directory:
5353

54-
- `overlays/crime_data.csv` (or `crime_data.csv`)
55-
- `overlays/project_data.csv` (or `project_data.csv`)
56-
- `overlays/social_alignment.csv`
54+
- `overlays/crime_data.csv`
55+
- `overlays/voter_data.csv`
56+
- `overlays/project_data.csv`
5757

5858
Overlay files should include a `GEOID` column plus numeric metric columns.
59-
Crime metrics (column names containing `crime`) appear under a `CRIME` section
60-
in demographic profiles. Other overlay metrics appear under `PROJECT DATA`.
59+
You can also normalize sources into canonical overlays automatically:
60+
61+
```bash
62+
python3 scripts/fetch_overlays.py \
63+
--out-dir /path/to/data \
64+
--crime-source /path/or/url/to/crime.csv \
65+
--voter-source /path/or/url/to/voter.csv
66+
```
67+
68+
Crime metrics appear under `CRIME`, voter metrics under `CIVICS`, and other
69+
private/custom metrics in `project_data.csv` appear under `PROJECT DATA`.
6170

6271
Query workflows:
6372

geocompare/database/Database.py

Lines changed: 94 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ class Database:
5757
('total_crime_rate', 'Total crime rate', '/100k'),
5858
]
5959

60+
VOTER_PERCENT_METRIC_DEFS = [
61+
('percent_democratic', 'Percent Democratic'),
62+
('percent_republican', 'Percent Republican'),
63+
('percent_other', 'Percent Other'),
64+
]
65+
6066
###########################################################################
6167
# Helper methods for __init__
6268

@@ -195,13 +201,9 @@ def _normalize_geoid_keys(self, geoid):
195201
def _iter_overlay_candidates(self, path):
196202
overlay_dir = path / 'overlays'
197203
candidates = [
198-
path / 'crime_data.csv',
199-
path / 'crime.csv',
200204
overlay_dir / 'crime_data.csv',
201-
overlay_dir / 'crime.csv',
202-
path / 'project_data.csv',
203205
overlay_dir / 'project_data.csv',
204-
overlay_dir / 'social_alignment.csv',
206+
overlay_dir / 'voter_data.csv',
205207
]
206208
for candidate in candidates:
207209
if candidate.exists():
@@ -311,11 +313,17 @@ def _add_overlay_metric(self, dp, section_title, metric_key, metric_value):
311313
else:
312314
value_display = f'{metric_value:,.0f}'
313315
break
316+
for known_key, known_label in self.VOTER_PERCENT_METRIC_DEFS:
317+
if key == known_key:
318+
label = known_label
319+
value_display = f'{metric_value:,.1f}%'
320+
break
314321

315-
if key.endswith('social_alignment_index'):
316-
label = 'Social alignment index'
317-
value_display = f'{metric_value:,.3f}'
318-
elif value_display is None:
322+
if key == 'registered_voters':
323+
label = 'Registered voters'
324+
value_display = f'{metric_value:,.0f}'
325+
326+
if value_display is None:
319327
if float(metric_value).is_integer():
320328
value_display = f'{metric_value:,.0f}'
321329
else:
@@ -337,6 +345,70 @@ def _add_overlay_metric(self, dp, section_title, metric_key, metric_value):
337345
compound_suffix=compound_suffix,
338346
)
339347

348+
def _derive_crime_rate_metrics(self, metrics, population):
349+
if not population:
350+
return {}
351+
352+
derived = {}
353+
for count_key in (
354+
'violent_crime_count',
355+
'property_crime_count',
356+
'total_crime_count',
357+
):
358+
rate_key = count_key.replace('_count', '_rate')
359+
if rate_key in metrics:
360+
continue
361+
362+
count_value = metrics.get(count_key)
363+
if count_value is None:
364+
continue
365+
try:
366+
count_value = float(count_value)
367+
except (TypeError, ValueError):
368+
continue
369+
370+
derived[rate_key] = count_value / population * 100000.0
371+
372+
return derived
373+
374+
def _derive_voter_share_metrics(self, metrics):
375+
registered = metrics.get('registered_voters')
376+
if not registered:
377+
return {}
378+
try:
379+
registered = float(registered)
380+
except (TypeError, ValueError):
381+
return {}
382+
if registered <= 0:
383+
return {}
384+
385+
derived = {}
386+
for party in ('democratic', 'republican', 'other'):
387+
percent_key = f'percent_{party}'
388+
if percent_key in metrics:
389+
continue
390+
391+
count_key = f'{party}_voters'
392+
count_value = metrics.get(count_key)
393+
if count_value is None:
394+
continue
395+
try:
396+
count_value = float(count_value)
397+
except (TypeError, ValueError):
398+
continue
399+
400+
derived[percent_key] = count_value / registered * 100.0
401+
402+
return derived
403+
404+
def _overlay_section(self, metric_key):
405+
lowered = metric_key.lower()
406+
if 'crime' in lowered:
407+
return 'CRIME'
408+
if 'voter' in lowered or lowered.startswith('percent_'):
409+
return 'CIVICS'
410+
return 'PROJECT DATA'
411+
340412
def apply_overlays(self):
341413
if not self.overlays:
342414
return
@@ -354,8 +426,19 @@ def apply_overlays(self):
354426
continue
355427

356428
for dp in matches:
357-
for metric_key, metric_value in metrics.items():
358-
section = 'CRIME' if 'crime' in metric_key.lower() else 'PROJECT DATA'
429+
effective_metrics = dict(metrics)
430+
effective_metrics.update(
431+
self._derive_crime_rate_metrics(
432+
effective_metrics,
433+
dp.rc.get('population', 0),
434+
)
435+
)
436+
effective_metrics.update(
437+
self._derive_voter_share_metrics(effective_metrics)
438+
)
439+
440+
for metric_key, metric_value in effective_metrics.items():
441+
section = self._overlay_section(metric_key)
359442
self._add_overlay_metric(dp, section, metric_key, metric_value)
360443

361444
def dbapi_qm_substr(self, columns_len):

scripts/fetch_overlays.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
#!/usr/bin/env python3
2+
"""Fetch and normalize optional overlay files for geocompare.
3+
4+
Writes canonical overlay CSVs under:
5+
<out-dir>/overlays/{crime_data.csv,voter_data.csv}
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import argparse
11+
import csv
12+
import json
13+
import sys
14+
import urllib.request
15+
from pathlib import Path
16+
from typing import Dict, Iterable, List, Optional
17+
18+
CANONICAL_FILES = {
19+
"crime": "crime_data.csv",
20+
"voter": "voter_data.csv",
21+
}
22+
23+
24+
def _read_text_from_source(source: str) -> str:
25+
if source.startswith("http://") or source.startswith("https://"):
26+
with urllib.request.urlopen(source) as response: # nosec - user-provided source
27+
return response.read().decode("utf-8")
28+
return Path(source).read_text(encoding="utf-8")
29+
30+
31+
def _normalize_key(value: str) -> str:
32+
return value.strip().lower().replace(" ", "_")
33+
34+
35+
def _parse_records(source: str) -> List[Dict[str, str]]:
36+
text = _read_text_from_source(source)
37+
stripped = text.lstrip()
38+
if stripped.startswith("[") or stripped.startswith("{"):
39+
payload = json.loads(text)
40+
if isinstance(payload, dict):
41+
payload = payload.get("rows", [])
42+
if not isinstance(payload, list):
43+
raise ValueError("JSON payload must be a list or object with 'rows'.")
44+
out = []
45+
for row in payload:
46+
if isinstance(row, dict):
47+
out.append({str(k): str(v) for k, v in row.items() if v is not None})
48+
return out
49+
50+
reader = csv.DictReader(text.splitlines())
51+
rows = []
52+
for row in reader:
53+
rows.append({str(k): ("" if v is None else str(v)) for k, v in row.items() if k})
54+
return rows
55+
56+
57+
def _find_col(record: Dict[str, str], aliases: Iterable[str]) -> Optional[str]:
58+
key_map = {_normalize_key(k): k for k in record.keys()}
59+
for alias in aliases:
60+
if alias in key_map:
61+
return key_map[alias]
62+
return None
63+
64+
65+
def _as_float(value: str) -> Optional[float]:
66+
text = value.strip()
67+
if not text:
68+
return None
69+
text = text.replace(",", "")
70+
if text.endswith("%"):
71+
text = text[:-1]
72+
try:
73+
return float(text)
74+
except ValueError:
75+
return None
76+
77+
78+
def _canonicalize_crime(rows: List[Dict[str, str]]) -> List[Dict[str, object]]:
79+
out = []
80+
for row in rows:
81+
geoid_col = _find_col(row, ("geoid", "geoid20", "geoid10"))
82+
if not geoid_col:
83+
continue
84+
geoid = row.get(geoid_col, "").strip()
85+
if not geoid:
86+
continue
87+
item: Dict[str, object] = {"GEOID": geoid}
88+
mappings = {
89+
"violent_crime_count": ("violent_crime_count", "violent_crime", "violent"),
90+
"property_crime_count": ("property_crime_count", "property_crime", "property"),
91+
"total_crime_count": ("total_crime_count", "total_crime", "crime_total"),
92+
}
93+
has_metric = False
94+
for canonical, aliases in mappings.items():
95+
col = _find_col(row, aliases)
96+
if not col:
97+
continue
98+
value = _as_float(row.get(col, ""))
99+
if value is None:
100+
continue
101+
item[canonical] = value
102+
has_metric = True
103+
if has_metric:
104+
out.append(item)
105+
return out
106+
107+
108+
def _canonicalize_voter(rows: List[Dict[str, str]]) -> List[Dict[str, object]]:
109+
out = []
110+
for row in rows:
111+
geoid_col = _find_col(row, ("geoid", "geoid20", "geoid10"))
112+
if not geoid_col:
113+
continue
114+
geoid = row.get(geoid_col, "").strip()
115+
if not geoid:
116+
continue
117+
item: Dict[str, object] = {"GEOID": geoid}
118+
mappings = {
119+
"registered_voters": ("registered_voters", "total_registered", "registered"),
120+
"democratic_voters": ("democratic_voters", "dem_voters", "democratic"),
121+
"republican_voters": ("republican_voters", "rep_voters", "republican"),
122+
"other_voters": ("other_voters", "oth_voters", "other"),
123+
}
124+
has_metric = False
125+
for canonical, aliases in mappings.items():
126+
col = _find_col(row, aliases)
127+
if not col:
128+
continue
129+
value = _as_float(row.get(col, ""))
130+
if value is None:
131+
continue
132+
item[canonical] = value
133+
has_metric = True
134+
if has_metric:
135+
out.append(item)
136+
return out
137+
138+
139+
def _write_csv(path: Path, rows: List[Dict[str, object]], fieldnames: List[str]) -> None:
140+
path.parent.mkdir(parents=True, exist_ok=True)
141+
with path.open("w", newline="", encoding="utf-8") as f:
142+
writer = csv.DictWriter(f, fieldnames=fieldnames)
143+
writer.writeheader()
144+
for row in rows:
145+
writer.writerow(row)
146+
147+
148+
def _run_one(kind: str, source: str, out_dir: Path) -> None:
149+
rows = _parse_records(source)
150+
if kind == "crime":
151+
normalized = _canonicalize_crime(rows)
152+
fieldnames = ["GEOID", "violent_crime_count", "property_crime_count", "total_crime_count"]
153+
elif kind == "voter":
154+
normalized = _canonicalize_voter(rows)
155+
fieldnames = [
156+
"GEOID",
157+
"registered_voters",
158+
"democratic_voters",
159+
"republican_voters",
160+
"other_voters",
161+
]
162+
else:
163+
raise ValueError(f"unsupported overlay kind: {kind}")
164+
destination = out_dir / "overlays" / CANONICAL_FILES[kind]
165+
_write_csv(destination, normalized, fieldnames)
166+
print(f"{kind}: wrote {len(normalized)} rows -> {destination}")
167+
168+
169+
def main() -> int:
170+
parser = argparse.ArgumentParser(
171+
description="Fetch and normalize private overlay datasets for geocompare.",
172+
)
173+
parser.add_argument(
174+
"--out-dir",
175+
default="../000-data",
176+
help="data root where overlays/ will be written (default: ../000-data)",
177+
)
178+
parser.add_argument("--crime-source", help="crime source CSV/JSON path or URL")
179+
parser.add_argument("--voter-source", help="voter source CSV/JSON path or URL")
180+
args = parser.parse_args()
181+
182+
if not any([args.crime_source, args.voter_source]):
183+
parser.error("Provide at least one source: --crime-source / --voter-source")
184+
185+
out_dir = Path(args.out_dir).resolve()
186+
try:
187+
if args.crime_source:
188+
_run_one("crime", args.crime_source, out_dir)
189+
if args.voter_source:
190+
_run_one("voter", args.voter_source, out_dir)
191+
except Exception as exc: # noqa: BLE001
192+
print(f"error: {exc}", file=sys.stderr)
193+
return 1
194+
return 0
195+
196+
197+
if __name__ == "__main__":
198+
raise SystemExit(main())

0 commit comments

Comments
 (0)