Skip to content

Commit 526a856

Browse files
authored
add GX DQ tests for Contacts, People, Tenancies and Property NEC data loads (#2541)
1 parent 68bd947 commit 526a856

5 files changed

+214
-46
lines changed

scripts/helpers/housing_nec_migration_gx_dq_inputs.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
sql_config = {
22
"properties": {"id_field": "LPRO_PROPREF"},
33
"tenancies": {"id_field": "LTCY_ALT_REF"},
4+
"people": {"id_field": "LPAR_PER_ALT_REF"},
5+
"contacts": {"id_field": "LCDE_LEGACY_REF"},
46
}
57

6-
data_load_list = ["properties", "tenancies"]
8+
data_load_list = ["properties", "tenancies", "people", "contacts"]
79

810
table_list = {
911
"properties": [
@@ -18,10 +20,17 @@
1820
"properties_4b",
1921
"properties_4c",
2022
"properties_7a",
21-
"properties_all_tranches"
23+
"properties_all_tranches",
2224
],
23-
"tenancies": ["tenancies_1a",
24-
"tenancies_1c"]
25+
"tenancies": [
26+
"tenancies_1a",
27+
"tenancies_1c",
28+
"tenancies_2a",
29+
"tenancies_all",
30+
"tenancies_other",
31+
],
32+
"people": ["people_1a", "people_1b", "people_1c", "people_2a"],
33+
"contacts": ["contacts_1a", "contacts_1b", "contacts_2a"]
2534
}
2635

2736
partition_keys = ["import_date"]
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
11+
column: str = "LCDE_LEGACY_REF"
12+
description: str = "Expect LCDE_LEGACY_REF (person ref) values to not be Null in contacts load"
13+
14+
15+
class ExpectValueColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LCDE_CONTACT_VALUE"
17+
description: str = "Expect LCDE_CONTACT_VALUE (contact value) to not be Null"
18+
19+
20+
class ExpectContactTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
21+
column: str = "LCDE_FRV_CME_CODE,"
22+
value_set: list = ["WORKTEL", "MOBILETEL", "HOMETEL", "EMAIL", "OTHER"]
23+
description: str = "Expect contact type code to be one of the set"
24+
25+
26+
class ExpectContactsColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
27+
column_list = [
28+
"LCDE_START_DATE",
29+
"LCDE_PRIMARY_REF",
30+
"LCDE_SECONDARY_REF",
31+
"LCDE_PRECEDENCE",
32+
"LCDE_LEGACY_TYPE",
33+
"LCDE_LEGACY_REF",
34+
"LCDE_FRV_COMM_PREF_CODE",
35+
"LCDE_FRV_CME_CODE",
36+
"LCDE_END_DATE",
37+
"LCDE_CREATED_DATE",
38+
"LCDE_CREATED_BY",
39+
"LCDE_CONTACT_VALUE",
40+
"LCDE_CONTACT_NAME",
41+
"LCDE_COMMENTS",
42+
"LCDE_ALLOW_TEXTS",
43+
]
44+
description: str = "Expect columns to match ordered list exactly"
45+
46+
47+
arg_key = ["s3_target_location"]
48+
args = getResolvedOptions(sys.argv, arg_key)
49+
locals().update(args)
50+
51+
# add to GX context
52+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
53+
54+
suite = gx.ExpectationSuite(name="properties_data_load_suite")
55+
56+
suite.add_expectation(ExpectContactsColumnsToMatchOrderedList())
57+
suite.add_expectation(ExpectContactTypeCodeToBeInSet())
58+
suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull())
59+
suite.add_expectation(ExpectValueColumnValuesToNotBeNull())
60+
suite = context.suites.add(suite)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPersonRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
11+
column: str = "LPAR_PER_ALT_REF"
12+
description: str = "Expect LPAR_PER_ALT_REF (person ref) values to be unique"
13+
14+
15+
class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LPAR_PER_ALT_REF"
17+
description: str = "Expect LPAR_PER_ALT_REF (person ref) values to not be Null"
18+
19+
20+
class ExpectTitleToBeInSet(gxe.ExpectColumnValuesToBeInSet):
21+
column: str = "LPAR_PER_TITLE"
22+
value_set: list = [
23+
"DAME",
24+
"DR",
25+
"LAD",
26+
"LORD",
27+
"MASTER",
28+
"MISS",
29+
"MR",
30+
"MRS",
31+
"MS",
32+
"MX",
33+
"PROFESSOR",
34+
"RABBI",
35+
"REVEREND",
36+
"SIR",
37+
None,
38+
]
39+
description: str = "Expect title to be one of the set"
40+
41+
42+
class ExpectPeopleColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
43+
column_list = [
44+
"LPAR_TIN_HRV_TIR_CODE",
45+
"LPAR_TIN_STAT_SUCCESSOR_IND",
46+
"LPAR_TIN_START_DATE",
47+
"LPAR_TIN_MAIN_TENANT_IND",
48+
"LPAR_TIN_END_DATE",
49+
"LPAR_TCY_IND",
50+
"LPAR_TCY_ALT_REF",
51+
"LPAR_PHONE",
52+
"LPAR_PER_TITLE",
53+
"LPAR_PER_SURNAME",
54+
"LPAR_PER_OTHER_NAME",
55+
"LPAR_PER_NI_NO",
56+
"LPAR_PER_INITIALS",
57+
"LPAR_PER_HOU_OAP_IND",
58+
"LPAR_PER_HOU_HRV_HMS_CODE",
59+
"LPAR_PER_HOU_EMPLOYER",
60+
"LPAR_PER_HOU_DISABLED_IND",
61+
"LPAR_PER_FRV_HGO_CODE",
62+
"LPAR_PER_FRV_FNL_CODE",
63+
"LPAR_PER_FRV_FGE_CODE",
64+
"LPAR_PER_FRV_FEO_CODE",
65+
"LPAR_PER_FORENAME",
66+
"LPAR_PER_DATE_OF_BIRTH",
67+
"LPAR_PER_ALT_REF",
68+
"LPAR_HOP_START_DATE",
69+
"LPAR_HOP_HRV_REL_CODE",
70+
"LPAR_HOP_HPSR_CODE",
71+
"LPAR_HOP_HPER_CODE",
72+
"LPAR_HOP_END_DATE",
73+
]
74+
description: str = "Expect people load columns to match ordered list exactly"
75+
76+
77+
arg_key = ["s3_target_location"]
78+
args = getResolvedOptions(sys.argv, arg_key)
79+
locals().update(args)
80+
81+
# add to GX context
82+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
83+
84+
suite = gx.ExpectationSuite(name="tenancies_data_load_suite")
85+
86+
suite.add_expectation(ExpectPersonRefColumnValuesToBeUnique())
87+
suite.add_expectation(ExpectTitleToBeInSet())
88+
suite.add_expectation(ExpectPeopleColumnsToMatchOrderedList())
89+
suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull())
90+
suite = context.suites.add(suite)

scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
1212
description: str = "Expect UPRN (LPRO_PROPREF) values to be unique"
1313

1414

15+
class ExpectPropRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LPRO_PROPREF"
17+
description: str = "Expect LPRO_PROPREF (prop ref) values to not be Null"
18+
19+
1520
class ExpectPropTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
1621
column: str = "LPRO_HOU_PTV_CODE"
1722
value_set: list = [
@@ -158,4 +163,5 @@ class ExpectPropColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedLi
158163
suite.add_expectation(ExpectResIndicatorToBeInSet())
159164
suite.add_expectation(ExpectPropTypeValuesToBeInSet())
160165
suite.add_expectation(ExpectPropColumnsToMatchOrderedList())
166+
suite.add_expectation(ExpectPropRefColumnValuesToNotBeNull())
161167
suite = context.suites.add(suite)

scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,47 @@ class ExpectTagRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
1212
description: str = "Expect LTCY_ALT_REF (tenancy ref) values to be unique"
1313

1414

15+
class ExpectTenancyRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LTCY_ALT_REF"
17+
description: str = "Expect LTCY_ALT_REF (tenancy ref) values to not be Null"
18+
19+
1520
class ExpectTenancyTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
1621
column: str = "LTCY_TTY_CODE"
1722
value_set: list = [
18-
'ASH',
19-
'ASY',
20-
'DEC',
21-
'Demoted',
22-
'FRE',
23-
'FRS',
24-
'HAL',
25-
'LIVINGRT',
26-
'INT',
27-
'LEA',
28-
'LHS',
29-
'LTA',
30-
'MPA',
31-
'NON',
32-
'PVG',
33-
'RTM',
34-
'SEC',
35-
'SHO',
36-
'SLL',
37-
'SPS',
38-
'SSE',
39-
'TACCFLAT',
40-
'TBB',
41-
'TBBFam',
42-
'THO',
43-
'TGA',
44-
'THL',
45-
'THGF',
46-
'TLA',
47-
'TPL',
48-
'TRA',
49-
'UNDER18',
50-
'OFFICESE'
23+
"ASH",
24+
"ASY",
25+
"DEC",
26+
"Demoted",
27+
"FRE",
28+
"FRS",
29+
"HAL",
30+
"LIVINGRT",
31+
"INT",
32+
"LEA",
33+
"LHS",
34+
"LTA",
35+
"MPA",
36+
"NON",
37+
"PVG",
38+
"RTM",
39+
"SEC",
40+
"SHO",
41+
"SLL",
42+
"SPS",
43+
"SSE",
44+
"TACCFLAT",
45+
"TBB",
46+
"TBBFam",
47+
"THO",
48+
"TGA",
49+
"THL",
50+
"THGF",
51+
"TLA",
52+
"TPL",
53+
"TRA",
54+
"UNDER18",
55+
"OFFICESE",
5156
]
5257
description: str = "Expect tenancy type code to contain one of the set"
5358

@@ -62,22 +67,17 @@ class ExpectTenureTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
6267
"TEMPORARY",
6368
"FREEHOLD",
6469
"COMMERCIAL",
65-
"LIVINGRENT"
70+
"LIVINGRENT",
6671
]
6772
description: str = "Expect tenure type code to be one of the set"
6873

6974

7075
class ExpectTenancyStatusCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
7176
column: str = "LTCY_HRV_TST_CODE"
72-
value_set: list = [
73-
"DECANT",
74-
"NOTICE",
75-
"UNAUTHOCC"
76-
]
77+
value_set: list = ["DECANT", "NOTICE", "UNAUTHOCC"]
7778
description: str = "Expect tenancy status code to be one of the set"
7879

7980

80-
8181
class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
8282
column_list = [
8383
"LTCY_ALT_REF",
@@ -125,10 +125,11 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere
125125
"LTCY_THO_END_DATE6",
126126
"LTCY_THO_HRV_TTR_CODE6",
127127
"LTCY_PHONE",
128-
"LTCY_REVIEW_DATE"
128+
"LTCY_REVIEW_DATE",
129129
]
130130
description: str = "Expect tenancy load columns to match ordered list exactly"
131131

132+
132133
arg_key = ["s3_target_location"]
133134
args = getResolvedOptions(sys.argv, arg_key)
134135
locals().update(args)
@@ -143,4 +144,6 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere
143144
suite.add_expectation(ExpectTenureTypeCodeToBeInSet())
144145
suite.add_expectation(ExpectTenancyStatusCodeToBeInSet())
145146
suite.add_expectation(ExpectTenancyColumnsToMatchOrderedList())
146-
suite = context.suites.add(suite)
147+
suite.add_expectation(ExpectTenancyRefColumnValuesToNotBeNull())
148+
149+
context.suites.add(suite)

0 commit comments

Comments
 (0)