Skip to content

Commit 24b6699

Browse files
committed
Redo MIH Areas
1 parent 184ffd8 commit 24b6699

File tree

6 files changed

+169
-120
lines changed

6 files changed

+169
-120
lines changed

products/pluto/models/qaqc/intermediate/qaqc_int__mihareas_questionable_assignments.sql

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,57 +2,57 @@
22
materialized = 'view'
33
) }}
44
-- Analysis: MIH Areas Questionable Assignments
5-
-- Purpose: Identify tax lots with multiple MIH area assignments and analyze coverage patterns
5+
-- Purpose: Identify lots with "iffy" MIH area assignments due to low spatial overlap
6+
--
7+
-- This view shows lots where the MIH assignment is questionable due to:
8+
-- 1. Low percentage of lot covered by MIH area (between 10-25%)
9+
-- 2. Low percentage of MIH area covered by lot (between 50-75%)
10+
-- 3. Edge cases that barely meet assignment thresholds
611
--
7-
-- This analysis identifies BBLs that intersect with multiple MIH areas
8-
-- and pulls in the relevant geometries for analysis purposes.
12+
-- Unlike transit zones, multiple MIH areas can legitimately apply to a single lot.
13+
-- We focus on identifying assignments with marginal spatial overlap that may need review.
914

10-
WITH bbls_with_multiple_mihareas AS (
11-
-- Find BBLs that have multiple MIH area assignments
12-
SELECT bbl
13-
FROM {{ source("build_sources", "mihperorder") }}
14-
WHERE row_number = 2 -- BBLs with at least 2 MIH assignments
15-
),
16-
17-
mihareas_coverage_analysis AS (
18-
-- Calculate coverage metrics for BBLs with multiple MIH area assignments
15+
WITH questionable_assignments AS (
1916
SELECT
20-
m.bbl,
21-
m.project_name,
22-
m.affordability_option,
23-
m.perbblgeom AS pct_covered,
24-
p.geom AS lot_geometry,
17+
mlo.bbl,
18+
mlo.project_id,
19+
mlo.mih_id,
20+
mlo.cleaned_option,
21+
mlo.perbblgeom AS pct_lot_covered,
22+
mlo.maxpermihgeom AS pct_mih_covered,
23+
-- Calculate how "iffy" this assignment is (lower scores = more questionable)
24+
LEAST(
25+
CASE WHEN mlo.perbblgeom >= 10 THEN mlo.perbblgeom ELSE 0 END,
26+
CASE WHEN mlo.maxpermihgeom >= 50 THEN mlo.maxpermihgeom ELSE 0 END
27+
) AS assignment_strength,
28+
p.geom AS lot_geom,
2529
p.address,
26-
d.wkb_geometry AS mih_geometry,
27-
-- Calculate how close the coverage percentage is to 50% (most ambiguous case)
28-
50 - ABS(m.perbblgeom - 50.0) AS ambiguity_score,
29-
-- Get the maximum ambiguity score for each BBL to identify most questionable cases
30-
MAX(50 - ABS(m.perbblgeom - 50.0)) OVER (PARTITION BY m.bbl) AS max_bbl_ambiguity_score
31-
FROM {{ source("build_sources", "mihperorder") }} AS m
32-
INNER JOIN bbls_with_multiple_mihareas AS bmm
33-
ON m.bbl = bmm.bbl
34-
INNER JOIN {{ source("build_sources", "pluto") }} AS p
35-
ON m.bbl = p.bbl
36-
INNER JOIN {{ source("build_sources", "dcp_mih") }} AS d
37-
ON
38-
m.project_name = d.project_name
39-
AND m.affordability_option = d.mih_option
40-
),
41-
42-
final AS (
43-
SELECT DISTINCT ON (project_name, affordability_option, bbl, max_bbl_ambiguity_score)
44-
bbl,
45-
project_name,
46-
affordability_option,
47-
pct_covered,
48-
ambiguity_score,
49-
max_bbl_ambiguity_score,
50-
mih_geometry,
51-
lot_geometry,
52-
address
53-
FROM mihareas_coverage_analysis
30+
p.zonedist1
31+
FROM mih_lot_overlap AS mlo
32+
LEFT JOIN pluto AS p ON mlo.bbl = p.bbl
33+
WHERE
34+
-- Focus on assignments that barely meet thresholds
35+
(mlo.perbblgeom BETWEEN 10 AND 20) -- Low lot coverage
36+
), assignment_context AS (
37+
-- Add basic context and geometry
38+
SELECT
39+
qa.*,
40+
mc.wkb_geometry AS mih_geom
41+
FROM questionable_assignments AS qa
42+
LEFT JOIN mih_cleaned AS mc ON qa.mih_id = mc.mih_id
5443
)
55-
56-
SELECT *
57-
FROM final
58-
ORDER BY max_bbl_ambiguity_score DESC, bbl ASC, pct_covered DESC
44+
SELECT
45+
bbl,
46+
project_id,
47+
cleaned_option,
48+
pct_lot_covered,
49+
pct_mih_covered,
50+
assignment_strength,
51+
address,
52+
zonedist1,
53+
lot_geom,
54+
mih_geom,
55+
ST_ENVELOPE(ST_BUFFER(lot_geom, 0.005)) AS area_of_interest_geom
56+
FROM assignment_context
57+
WHERE zonedist1 != 'PARK'
58+
ORDER BY assignment_strength ASC, pct_lot_covered ASC;

products/pluto/pluto_build/sql/create.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,10 @@ CREATE TABLE pluto (
102102
masdate text,
103103
polidate text,
104104
edesigdate text,
105-
mih_project_name text,
106-
mih_affordability_option text,
105+
mih_opt1 text,
106+
mih_opt2 text,
107+
mih_opt3 text,
108+
mih_opt4 text,
107109
trnstzone text,
108110
affresfar text,
109111
mnffar text

products/pluto/pluto_build/sql/export.sql

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@ SELECT
9292
latitude::numeric(19, 7),
9393
longitude::numeric(19, 7),
9494
notes::varchar(20),
95-
mih_affordability_option::varchar(50),
96-
mih_project_name::varchar(50),
95+
mih_opt1::varchar(1),
96+
mih_opt2::varchar(1),
97+
mih_opt3::varchar(1),
98+
mih_opt4::varchar(1),
9799
trnstzone::varchar(50),
98100
affresfar::numeric(19, 11),
99101
mnffar::numeric(19, 11)
@@ -203,8 +205,10 @@ SELECT
203205
a.latitude AS "Latitude",
204206
a.longitude AS "Longitude",
205207
a.notes AS "Notes",
206-
a.mih_affordability_option AS "MIHAffOption",
207-
a.mih_project_name AS "MIHProjectName",
208+
a.mih_opt1 AS "MIHOption1",
209+
a.mih_opt2 AS "MIHOption2",
210+
a.mih_opt3 AS "MIHOption3",
211+
a.mih_opt4 AS "MIHOption4",
208212
a.trnstzone AS "TrnstZone",
209213
a.affresfar AS "AffResFAR",
210214
a.mnffar AS "ManuFAR"

products/pluto/pluto_build/sql/export_mappluto_gdb.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,10 @@ SELECT
9595
round(st_length(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Leng",
9696
round(st_area(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Area",
9797
st_makevalid(b.:GEOM) AS geom,
98-
a.mih_affordability_option AS "MIHAffOption",
99-
a.mih_project_name AS "MIHProjectName",
98+
a.mih_opt1 AS "MIHOption1",
99+
a.mih_opt2 AS "MIHOption2",
100+
a.mih_opt3 AS "MIHOption3",
101+
a.mih_opt4 AS "MIHOption4",
100102
a.trnstzone AS "TrnstZone",
101103
a.affresfar AS "AffResFAR",
102104
a.mnffar AS "ManuFAR"

products/pluto/pluto_build/sql/export_mappluto_shp.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,10 @@ SELECT
9595
round(st_length(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Leng",
9696
round(st_area(b.:GEOM)::numeric, 11)::numeric(19, 7) AS "Shape_Area",
9797
st_makevalid(b.:GEOM) AS geom,
98-
a.mih_affordability_option AS "MIHAffOption",
99-
a.mih_project_name AS "MIHProjectName",
98+
a.mih_opt1 AS "MIHOption1",
99+
a.mih_opt2 AS "MIHOption2",
100+
a.mih_opt3 AS "MIHOption3",
101+
a.mih_opt4 AS "MIHOption4",
100102
a.trnstzone AS "TrnstZone",
101103
a.affresfar AS "AffResFAR",
102104
a.mnffar AS "ManuFAR"
Lines changed: 100 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,116 @@
1-
-- Similar logic to zoning districts:
2-
-- calculate how much (total area and percentage) of each lot is covered by MIH areas
3-
-- assign the MIH project ID to each tax lot based on which MIH area covers the
4-
-- majority of the lot
5-
-- a MIH area is only assigned if more than 10% of the lot is covered by the MIH area
6-
-- OR more than 50% of the MIH area overlaps with the lot
7-
DROP TABLE IF EXISTS mihperorder;
8-
CREATE TABLE mihperorder AS
9-
WITH mih_unioned AS (
10-
SELECT
11-
project_name,
12-
mih_option AS affordability_option,
13-
ST_UNION(wkb_geometry) AS wkb_geometry
14-
FROM dcp_mih
15-
GROUP BY project_name, mih_option
16-
),
17-
mihper AS (
1+
DROP TABLE IF EXISTS mih_cleaned;
2+
CREATE TABLE mih_cleaned AS
3+
SELECT
4+
project_id || '-' || mih_option AS mih_id,
5+
*,
6+
trim(
7+
-- Step 2b: collapse any sequence of commas (e.g., ",,", ",,,")
8+
regexp_replace(
9+
-- Step 2a: Replace "and" or "," (with any spaces) with a single comma
10+
regexp_replace(
11+
-- Step 1: Add space between "Option" and number
12+
regexp_replace(
13+
replace(mih_option, 'Affordablility', 'Affordability'), -- should probably fix this in the source data
14+
'Option(\d)', -- ← match "Option" followed by a digit
15+
'Option \1', -- ← insert space
16+
'g'
17+
),
18+
'\s*(,|and)\s*', -- ← match a comma or "and" (with spaces)
19+
',', -- ← replace with a comma
20+
'g'
21+
),
22+
',+', -- ← match one or more commas in a row
23+
',', -- ← replace with a single comma
24+
'g'
25+
),
26+
', ' -- ← trim comma and space FROM start/end
27+
) AS cleaned_option
28+
FROM dcp_mih;
29+
30+
31+
DROP TABLE IF EXISTS mih_lot_overlap CASCADE;
32+
CREATE TABLE mih_lot_overlap AS
33+
WITH mih_per_area AS (
1834
SELECT
19-
p.id,
2035
p.bbl,
21-
m.project_name,
22-
m.affordability_option,
23-
ST_AREA(
36+
m.project_id,
37+
m.mih_id,
38+
m.wkb_geometry AS mih_geom,
39+
p.geom AS lot_geom,
40+
m.cleaned_option,
41+
st_area(
2442
CASE
25-
WHEN ST_COVEREDBY(p.geom, m.wkb_geometry) THEN p.geom
26-
ELSE ST_MULTI(ST_INTERSECTION(p.geom, m.wkb_geometry))
43+
WHEN st_coveredby(p.geom, m.wkb_geometry) THEN p.geom
44+
ELSE st_multi(st_intersection(p.geom, m.wkb_geometry))
2745
END
2846
) AS segbblgeom,
29-
ST_AREA(p.geom) AS allbblgeom,
30-
ST_AREA(
47+
st_area(p.geom) AS allbblgeom,
48+
st_area(
3149
CASE
32-
WHEN ST_COVEREDBY(m.wkb_geometry, p.geom) THEN m.wkb_geometry
33-
ELSE ST_MULTI(ST_INTERSECTION(m.wkb_geometry, p.geom))
50+
WHEN st_coveredby(m.wkb_geometry, p.geom) THEN m.wkb_geometry
51+
ELSE st_multi(st_intersection(m.wkb_geometry, p.geom))
3452
END
3553
) AS segmihgeom,
36-
ST_AREA(m.wkb_geometry) AS allmihgeom
54+
st_area(m.wkb_geometry) AS allmihgeom
3755
FROM pluto AS p
38-
INNER JOIN mih_unioned AS m
39-
ON ST_INTERSECTS(p.geom, m.wkb_geometry)
56+
INNER JOIN mih_cleaned AS m
57+
ON st_intersects(p.geom, m.wkb_geometry)
4058
),
41-
grouped AS (
59+
mih_areas AS (
4260
SELECT
43-
id,
4461
bbl,
45-
project_name,
46-
affordability_option,
47-
SUM(segbblgeom) AS segbblgeom,
48-
SUM(segmihgeom) AS segmihgeom,
49-
SUM(segbblgeom / allbblgeom) * 100 AS perbblgeom,
50-
MAX(segmihgeom / allmihgeom) * 100 AS maxpermihgeom
51-
FROM mihper
52-
GROUP BY id, bbl, project_name, affordability_option
62+
cleaned_option,
63+
project_id,
64+
mih_id,
65+
sum(segbblgeom) AS segbblgeom,
66+
sum(segmihgeom) AS segmihgeom,
67+
sum(segbblgeom / allbblgeom) * 100 AS perbblgeom,
68+
max(segmihgeom / allmihgeom) * 100 AS maxpermihgeom
69+
FROM mih_per_area
70+
GROUP BY bbl, cleaned_option, project_id, mih_id
5371
)
54-
SELECT
55-
id,
56-
bbl,
57-
project_name,
58-
affordability_option,
59-
segbblgeom,
60-
perbblgeom,
61-
maxpermihgeom,
62-
ROW_NUMBER() OVER (
63-
PARTITION BY id
64-
ORDER BY segbblgeom DESC, segmihgeom DESC
65-
) AS row_number
66-
FROM grouped
72+
SELECT * FROM mih_areas
6773
WHERE perbblgeom >= 10 OR maxpermihgeom >= 50;
6874

69-
-- assign the MIH project name and affordability option with the highest overlap to each lot
70-
UPDATE pluto a
75+
76+
-- NOTE: GIS will likely refactor dcp_mih into this pivoted format,
77+
-- so much this code will likely disappear.
78+
--
79+
-- Find all distinct MIH areas that apply to a lot, and pivot to columns.
80+
-- e.g. if we have two rows from our geospatial join like so:
81+
-- bbl=123, mih_options=Option 1,Option 2
82+
-- bbl=123, mih_options=Option 2,Option 3
83+
-- we first aggregate to
84+
-- bbl=123, Option 1,Option 2,Option 2,Option 3
85+
-- then pivot into distinct columns
86+
WITH bbls_with_all_options AS (
87+
SELECT
88+
bbl,
89+
string_agg(cleaned_option, ',') AS all_options
90+
FROM mih_lot_overlap
91+
GROUP BY bbl
92+
), pivoted AS (
93+
SELECT
94+
bbl,
95+
CASE
96+
WHEN (all_options LIKE '%Option 1%') = true THEN '1'
97+
END AS mih_opt1,
98+
CASE
99+
WHEN (all_options LIKE '%Option 2%') = true THEN '1'
100+
END AS mih_opt2,
101+
CASE
102+
WHEN (all_options LIKE '%Option 3%' OR all_options LIKE '%Deep Affordability Option%') = true THEN '1'
103+
END AS mih_opt3,
104+
CASE
105+
WHEN (all_options LIKE '%Deep Affordability Option%') = true THEN '1'
106+
END AS mih_opt4
107+
FROM bbls_with_all_options
108+
)
109+
UPDATE pluto
71110
SET
72-
mih_project_name = b.project_name,
73-
mih_affordability_option = b.affordability_option
74-
FROM mihperorder AS b
75-
WHERE
76-
a.id = b.id
77-
AND row_number = 1;
111+
mih_opt1 = m.mih_opt1,
112+
mih_opt2 = m.mih_opt2,
113+
mih_opt3 = m.mih_opt3,
114+
mih_opt4 = m.mih_opt4
115+
FROM pivoted AS m
116+
WHERE pluto.bbl = m.bbl

0 commit comments

Comments
 (0)