Skip to content

Commit 0dc5f9e

Browse files
committed
Redo MIH Areas
1 parent 184ffd8 commit 0dc5f9e

File tree

6 files changed

+189
-120
lines changed

6 files changed

+189
-120
lines changed

products/pluto/models/qaqc/intermediate/qaqc_int__mihareas_questionable_assignments.sql

Lines changed: 46 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,57 +2,54 @@
22
materialized = 'view'
33
) }}
44
-- Analysis: MIH Areas Questionable Assignments
5-
-- Purpose: Identify tax lots with multiple MIH area assignments and analyze coverage patterns
5+
-- Purpose: Identify lots with "iffy" MIH area assignments due to low spatial overlap
6+
--
7+
-- This view shows lots where the MIH assignment is questionable due to:
8+
-- 1. Low percentage of lot covered by MIH area (between 10-25%)
9+
-- 2. Low percentage of MIH area covered by lot (between 50-75%)
10+
-- 3. Edge cases that barely meet assignment thresholds
611
--
7-
-- This analysis identifies BBLs that intersect with multiple MIH areas
8-
-- and pulls in the relevant geometries for analysis purposes.
12+
-- Unlike transit zones, multiple MIH areas can legitimately apply to a single lot.
13+
-- We focus on identifying assignments with marginal spatial overlap that may need review.
914

10-
WITH bbls_with_multiple_mihareas AS (
11-
-- Find BBLs that have multiple MIH area assignments
12-
SELECT bbl
13-
FROM {{ source("build_sources", "mihperorder") }}
14-
WHERE row_number = 2 -- BBLs with at least 2 MIH assignments
15-
),
16-
17-
mihareas_coverage_analysis AS (
18-
-- Calculate coverage metrics for BBLs with multiple MIH area assignments
15+
WITH questionable_assignments AS (
1916
SELECT
20-
m.bbl,
21-
m.project_name,
22-
m.affordability_option,
23-
m.perbblgeom AS pct_covered,
24-
p.geom AS lot_geometry,
17+
mlo.bbl,
18+
mlo.project_id,
19+
mlo.mih_id,
20+
mlo.cleaned_option,
21+
mlo.perbblgeom AS pct_lot_covered,
22+
mlo.maxpermihgeom AS pct_mih_covered,
23+
-- Calculate how "iffy" this assignment is (lower scores = more questionable)
24+
LEAST(
25+
CASE WHEN mlo.perbblgeom >= 10 THEN mlo.perbblgeom ELSE 0 END,
26+
CASE WHEN mlo.maxpermihgeom >= 50 THEN mlo.maxpermihgeom ELSE 0 END
27+
) AS assignment_strength,
28+
p.geom AS lot_geom,
2529
p.address,
26-
d.wkb_geometry AS mih_geometry,
27-
-- Calculate how close the coverage percentage is to 50% (most ambiguous case)
28-
50 - ABS(m.perbblgeom - 50.0) AS ambiguity_score,
29-
-- Get the maximum ambiguity score for each BBL to identify most questionable cases
30-
MAX(50 - ABS(m.perbblgeom - 50.0)) OVER (PARTITION BY m.bbl) AS max_bbl_ambiguity_score
31-
FROM {{ source("build_sources", "mihperorder") }} AS m
32-
INNER JOIN bbls_with_multiple_mihareas AS bmm
33-
ON m.bbl = bmm.bbl
34-
INNER JOIN {{ source("build_sources", "pluto") }} AS p
35-
ON m.bbl = p.bbl
36-
INNER JOIN {{ source("build_sources", "dcp_mih") }} AS d
37-
ON
38-
m.project_name = d.project_name
39-
AND m.affordability_option = d.mih_option
40-
),
41-
42-
final AS (
43-
SELECT DISTINCT ON (project_name, affordability_option, bbl, max_bbl_ambiguity_score)
44-
bbl,
45-
project_name,
46-
affordability_option,
47-
pct_covered,
48-
ambiguity_score,
49-
max_bbl_ambiguity_score,
50-
mih_geometry,
51-
lot_geometry,
52-
address
53-
FROM mihareas_coverage_analysis
30+
p.zonedist1
31+
FROM mih_lot_overlap AS mlo
32+
LEFT JOIN pluto AS p ON mlo.bbl = p.bbl
33+
WHERE mlo.perbblgeom BETWEEN 10 AND 30
34+
), assignment_context AS (
35+
-- Add basic context and geometry
36+
SELECT
37+
qa.*,
38+
mc.wkb_geometry AS mih_geom
39+
FROM questionable_assignments AS qa
40+
LEFT JOIN mih_cleaned AS mc ON qa.mih_id = mc.mih_id
5441
)
55-
56-
SELECT *
57-
FROM final
58-
ORDER BY max_bbl_ambiguity_score DESC, bbl ASC, pct_covered DESC
42+
SELECT
43+
bbl,
44+
project_id,
45+
cleaned_option,
46+
pct_lot_covered,
47+
pct_mih_covered,
48+
assignment_strength,
49+
address,
50+
zonedist1,
51+
lot_geom,
52+
mih_geom,
53+
ST_ENVELOPE(ST_BUFFER(lot_geom, 0.005)) AS area_of_interest_geom
54+
FROM assignment_context
55+
ORDER BY assignment_strength ASC, pct_lot_covered ASC;

products/pluto/pluto_build/sql/create.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,10 @@ CREATE TABLE pluto (
102102
masdate text,
103103
polidate text,
104104
edesigdate text,
105-
mih_project_name text,
106-
mih_affordability_option text,
105+
mih_opt1 text,
106+
mih_opt2 text,
107+
mih_opt3 text,
108+
mih_opt4 text,
107109
trnstzone text,
108110
affresfar text,
109111
mnffar text

products/pluto/pluto_build/sql/export.sql

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@ SELECT
9292
latitude::numeric(19, 7),
9393
longitude::numeric(19, 7),
9494
notes::varchar(20),
95-
mih_affordability_option::varchar(50),
96-
mih_project_name::varchar(50),
95+
mih_opt1::varchar(1),
96+
mih_opt2::varchar(1),
97+
mih_opt3::varchar(1),
98+
mih_opt4::varchar(1),
9799
trnstzone::varchar(50),
98100
affresfar::numeric(19, 11),
99101
mnffar::numeric(19, 11)
@@ -203,8 +205,10 @@ SELECT
203205
a.latitude AS "Latitude",
204206
a.longitude AS "Longitude",
205207
a.notes AS "Notes",
206-
a.mih_affordability_option AS "MIHAffOption",
207-
a.mih_project_name AS "MIHProjectName",
208+
a.mih_opt1 AS "MIHOption1",
209+
a.mih_opt2 AS "MIHOption2",
210+
a.mih_opt3 AS "MIHOption3",
211+
a.mih_opt4 AS "MIHOption4",
208212
a.trnstzone AS "TrnstZone",
209213
a.affresfar AS "AffResFAR",
210214
a.mnffar AS "ManuFAR"

products/pluto/pluto_build/sql/export_mappluto_gdb.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,10 @@ SELECT
9595
round(st_length(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Leng",
9696
round(st_area(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Area",
9797
st_makevalid(b.:GEOM) AS geom,
98-
a.mih_affordability_option AS "MIHAffOption",
99-
a.mih_project_name AS "MIHProjectName",
98+
a.mih_opt1 AS "MIHOption1",
99+
a.mih_opt2 AS "MIHOption2",
100+
a.mih_opt3 AS "MIHOption3",
101+
a.mih_opt4 AS "MIHOption4",
100102
a.trnstzone AS "TrnstZone",
101103
a.affresfar AS "AffResFAR",
102104
a.mnffar AS "ManuFAR"

products/pluto/pluto_build/sql/export_mappluto_shp.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,10 @@ SELECT
9595
round(st_length(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Leng",
9696
round(st_area(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Area",
9797
st_makevalid(b.:GEOM) AS geom,
98-
a.mih_affordability_option AS "MIHAffOption",
99-
a.mih_project_name AS "MIHProjectName",
98+
a.mih_opt1 AS "MIHOption1",
99+
a.mih_opt2 AS "MIHOption2",
100+
a.mih_opt3 AS "MIHOption3",
101+
a.mih_opt4 AS "MIHOption4",
100102
a.trnstzone AS "TrnstZone",
101103
a.affresfar AS "AffResFAR",
102104
a.mnffar AS "ManuFAR"
Lines changed: 123 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,139 @@
1-
-- Similar logic to zoning districts:
2-
-- calculate how much (total area and percentage) of each lot is covered by MIH areas
3-
-- assign the MIH project ID to each tax lot based on which MIH area covers the
4-
-- majority of the lot
5-
-- a MIH area is only assigned if more than 10% of the lot is covered by the MIH area
6-
-- OR more than 50% of the MIH area overlaps with the lot
7-
DROP TABLE IF EXISTS mihperorder;
8-
CREATE TABLE mihperorder AS
9-
WITH mih_unioned AS (
10-
SELECT
11-
project_name,
12-
mih_option AS affordability_option,
13-
ST_UNION(wkb_geometry) AS wkb_geometry
14-
FROM dcp_mih
15-
GROUP BY project_name, mih_option
16-
),
17-
mihper AS (
1+
-- Mandatory Inclusionary Housing (MIH) Area Assignment Logic
2+
--
3+
-- Assign MIH affordability options to tax lots based on spatial overlap with MIH areas
4+
--
5+
-- Assignment Strategy:
6+
-- Unlike transit zones where each lot gets assigned to exactly one zone, MIH areas can have
7+
-- multiple overlapping affordability options that ALL apply to a single lot. A lot is assigned
8+
-- to a MIH option if either:
9+
-- 1. ≥10% of the lot area is covered by the MIH area, OR
10+
-- 2. ≥50% of the MIH area is covered by the lot
11+
--
12+
-- Multiple Options Per Lot:
13+
-- A single lot can legitimately have multiple MIH options (e.g., Option 1, Option 2, Deep Affordability).
14+
-- These are not competing assignments but rather cumulative policy options that apply to development
15+
-- on that lot. The final output pivots these into binary flags (mih_opt1, mih_opt2, etc.).
16+
--
17+
-- Data Flow:
18+
-- 1. Clean MIH option names and create unique identifiers (mih_cleaned table)
19+
-- 2. Calculate spatial overlaps between lots and MIH areas (mih_lot_overlap table)
20+
-- 3. Filter to assignments meeting the coverage thresholds
21+
-- 4. Pivot multiple options per lot into binary columns on the pluto table
22+
23+
24+
DROP TABLE IF EXISTS mih_cleaned;
25+
CREATE TABLE mih_cleaned AS
26+
SELECT
27+
project_id || '-' || mih_option AS mih_id,
28+
*,
29+
trim(
30+
-- Step 2b: collapse any sequence of commas (e.g., ",,", ",,,")
31+
regexp_replace(
32+
-- Step 2a: Replace "and" or "," (with any spaces) with a single comma
33+
regexp_replace(
34+
-- Step 1: Add space between "Option" and number
35+
regexp_replace(
36+
replace(mih_option, 'Affordablility', 'Affordability'), -- should probably fix this in the source data
37+
'Option(\d)', -- ← match "Option" followed by a digit
38+
'Option \1', -- ← insert space
39+
'g'
40+
),
41+
'\s*(,|and)\s*', -- ← match a comma or "and" (with spaces)
42+
',', -- ← replace with a comma
43+
'g'
44+
),
45+
',+', -- ← match one or more commas in a row
46+
',', -- ← replace with a single comma
47+
'g'
48+
),
49+
', ' -- ← trim comma and space FROM start/end
50+
) AS cleaned_option
51+
FROM dcp_mih;
52+
53+
54+
DROP TABLE IF EXISTS mih_lot_overlap CASCADE;
55+
CREATE TABLE mih_lot_overlap AS
56+
WITH mih_per_area AS (
1857
SELECT
19-
p.id,
2058
p.bbl,
21-
m.project_name,
22-
m.affordability_option,
23-
ST_AREA(
59+
m.project_id,
60+
m.mih_id,
61+
m.wkb_geometry AS mih_geom,
62+
p.geom AS lot_geom,
63+
m.cleaned_option,
64+
st_area(
2465
CASE
25-
WHEN ST_COVEREDBY(p.geom, m.wkb_geometry) THEN p.geom
26-
ELSE ST_MULTI(ST_INTERSECTION(p.geom, m.wkb_geometry))
66+
WHEN st_coveredby(p.geom, m.wkb_geometry) THEN p.geom
67+
ELSE st_multi(st_intersection(p.geom, m.wkb_geometry))
2768
END
2869
) AS segbblgeom,
29-
ST_AREA(p.geom) AS allbblgeom,
30-
ST_AREA(
70+
st_area(p.geom) AS allbblgeom,
71+
st_area(
3172
CASE
32-
WHEN ST_COVEREDBY(m.wkb_geometry, p.geom) THEN m.wkb_geometry
33-
ELSE ST_MULTI(ST_INTERSECTION(m.wkb_geometry, p.geom))
73+
WHEN st_coveredby(m.wkb_geometry, p.geom) THEN m.wkb_geometry
74+
ELSE st_multi(st_intersection(m.wkb_geometry, p.geom))
3475
END
3576
) AS segmihgeom,
36-
ST_AREA(m.wkb_geometry) AS allmihgeom
77+
st_area(m.wkb_geometry) AS allmihgeom
3778
FROM pluto AS p
38-
INNER JOIN mih_unioned AS m
39-
ON ST_INTERSECTS(p.geom, m.wkb_geometry)
79+
INNER JOIN mih_cleaned AS m
80+
ON st_intersects(p.geom, m.wkb_geometry)
4081
),
41-
grouped AS (
82+
mih_areas AS (
4283
SELECT
43-
id,
4484
bbl,
45-
project_name,
46-
affordability_option,
47-
SUM(segbblgeom) AS segbblgeom,
48-
SUM(segmihgeom) AS segmihgeom,
49-
SUM(segbblgeom / allbblgeom) * 100 AS perbblgeom,
50-
MAX(segmihgeom / allmihgeom) * 100 AS maxpermihgeom
51-
FROM mihper
52-
GROUP BY id, bbl, project_name, affordability_option
85+
cleaned_option,
86+
project_id,
87+
mih_id,
88+
sum(segbblgeom) AS segbblgeom,
89+
sum(segmihgeom) AS segmihgeom,
90+
sum(segbblgeom / allbblgeom) * 100 AS perbblgeom,
91+
max(segmihgeom / allmihgeom) * 100 AS maxpermihgeom
92+
FROM mih_per_area
93+
GROUP BY bbl, cleaned_option, project_id, mih_id
5394
)
54-
SELECT
55-
id,
56-
bbl,
57-
project_name,
58-
affordability_option,
59-
segbblgeom,
60-
perbblgeom,
61-
maxpermihgeom,
62-
ROW_NUMBER() OVER (
63-
PARTITION BY id
64-
ORDER BY segbblgeom DESC, segmihgeom DESC
65-
) AS row_number
66-
FROM grouped
95+
SELECT * FROM mih_areas
6796
WHERE perbblgeom >= 10 OR maxpermihgeom >= 50;
6897

69-
-- assign the MIH project name and affordability option with the highest overlap to each lot
70-
UPDATE pluto a
98+
99+
-- NOTE: GIS will likely refactor dcp_mih into this pivoted format,
100+
-- so much this code will likely disappear.
101+
--
102+
-- Find all distinct MIH areas that apply to a lot, and pivot to columns.
103+
-- e.g. if we have two rows from our geospatial join like so:
104+
-- bbl=123, mih_options=Option 1,Option 2
105+
-- bbl=123, mih_options=Option 2,Option 3
106+
-- we first aggregate to
107+
-- bbl=123, Option 1,Option 2,Option 2,Option 3
108+
-- then pivot into distinct columns
109+
WITH bbls_with_all_options AS (
110+
SELECT
111+
bbl,
112+
string_agg(cleaned_option, ',') AS all_options
113+
FROM mih_lot_overlap
114+
GROUP BY bbl
115+
), pivoted AS (
116+
SELECT
117+
bbl,
118+
CASE
119+
WHEN (all_options LIKE '%Option 1%') = true THEN '1'
120+
END AS mih_opt1,
121+
CASE
122+
WHEN (all_options LIKE '%Option 2%') = true THEN '1'
123+
END AS mih_opt2,
124+
CASE
125+
WHEN (all_options LIKE '%Option 3%' OR all_options LIKE '%Deep Affordability Option%') = true THEN '1'
126+
END AS mih_opt3,
127+
CASE
128+
WHEN (all_options LIKE '%Deep Affordability Option%') = true THEN '1'
129+
END AS mih_opt4
130+
FROM bbls_with_all_options
131+
)
132+
UPDATE pluto
71133
SET
72-
mih_project_name = b.project_name,
73-
mih_affordability_option = b.affordability_option
74-
FROM mihperorder AS b
75-
WHERE
76-
a.id = b.id
77-
AND row_number = 1;
134+
mih_opt1 = m.mih_opt1,
135+
mih_opt2 = m.mih_opt2,
136+
mih_opt3 = m.mih_opt3,
137+
mih_opt4 = m.mih_opt4
138+
FROM pivoted AS m
139+
WHERE pluto.bbl = m.bbl

0 commit comments

Comments
 (0)