Skip to content

Commit e49eb2b

Browse files
committed
Add MIH Areas data validation test
1 parent e46fcb4 commit e49eb2b

File tree

3 files changed

+53
-1
lines changed

3 files changed

+53
-1
lines changed

products/pluto/models/_sources.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ sources:
6565
description: Cleaned MIH option names with unique identifiers (from miharea.sql)
6666
- name: mih_lot_overlap
6767
description: Spatial overlaps between lots and MIH areas (from miharea.sql)
68+
- name: mih_distinct_options
69+
description: QAQC view of all distinct MIH options found in the data (from miharea.sql)
6870
- name: transit_zones_block_to_tz_ranked
6971
description: Ranked transit zone assignments by block (from transitzone.sql)
7072
- name: transit_zones_bbl_to_tz_ranked

products/pluto/pluto_build/sql/miharea.sql

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,20 @@ SELECT * FROM mih_areas
9696
WHERE perbblgeom >= 10 OR maxpermihgeom >= 50;
9797

9898

99+
-- QAQC: Create a view of all distinct MIH options found in the data
100+
-- This should only contain the four valid options: Option 1, Option 2, Deep Affordability Option, Workforce Option
101+
-- If additional options appear, there's a source data issue
102+
DROP VIEW IF EXISTS mih_distinct_options CASCADE;
103+
CREATE VIEW mih_distinct_options AS
104+
WITH split_options AS (
105+
SELECT DISTINCT
106+
unnest(string_to_array(cleaned_option, ',')) AS option
107+
FROM mih_lot_overlap
108+
)
109+
SELECT DISTINCT trim(option) AS option
110+
FROM split_options
111+
ORDER BY option;
112+
99113
-- NOTE: GIS will likely refactor dcp_mih into this pivoted format,
100114
-- so much this code will likely disappear.
101115
--
@@ -125,7 +139,7 @@ WITH bbls_with_all_options AS (
125139
WHEN (all_options LIKE '%Option 3%' OR all_options LIKE '%Deep Affordability Option%') = true THEN '1'
126140
END AS mih_opt3,
127141
CASE
128-
WHEN (all_options LIKE '%Deep Affordability Option%') = true THEN '1'
142+
WHEN (all_options LIKE '%Workforce Option%') = true THEN '1'
129143
END AS mih_opt4
130144
FROM bbls_with_all_options
131145
)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{{
2+
config(
3+
tags = ['de_check', 'minor', 'major'],
4+
meta = {
5+
'description': '''
6+
This test checks that only the four valid MIH options appear in the source data:
7+
- Option 1
8+
- Option 2
9+
- Deep Affordability Option
10+
- Workforce Option
11+
12+
Any additional options indicate a source data issue that needs to be investigated.
13+
''',
14+
'next_steps': 'Contact GIS to investigate unexpected MIH option values in source data'
15+
}
16+
)
17+
}}
18+
19+
WITH valid_options AS (
20+
SELECT option FROM (VALUES
21+
('Option 1'),
22+
('Option 2'),
23+
('Option 3'),
24+
('Deep Affordability Option'),
25+
('Workforce Option')
26+
) AS t(option)
27+
),
28+
29+
actual_options AS (
30+
SELECT * FROM {{ source('build_sources', 'mih_distinct_options') }}
31+
)
32+
33+
-- Return any options that are NOT in the valid list (test fails if any rows returned)
34+
SELECT option
35+
FROM actual_options
36+
WHERE option NOT IN (SELECT option FROM valid_options)

0 commit comments

Comments
 (0)