Skip to content

Commit aab2c63

Browse files
authored
Add more tables to dq (#2558)
* update sql scripts according to changes in MTFH reshape files * add more GX expectation suites and tests; add additional tables for testing
1 parent 2868f0b commit aab2c63

5 files changed

+285
-4
lines changed

scripts/helpers/housing_nec_migration_gx_dq_inputs.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,22 @@
33
"tenancies": {"id_field": "LTCY_ALT_REF"},
44
"people": {"id_field": "LPAR_PER_ALT_REF"},
55
"contacts": {"id_field": "LCDE_LEGACY_REF"},
6+
"arrears_actions": {"id_field": "LACA_PAY_REF"},
7+
"revenue_accounts": {"id_field": "LRAC_PAY_REF"},
8+
"transactions": {"id_field": "LTRN_ALT_REF"},
9+
"addresses": {"id_field": "LAUS_LEGACY_REF"}
610
}
711

8-
data_load_list = ["properties", "tenancies", "people", "contacts", "arrears_actions"]
12+
data_load_list = [
13+
"properties",
14+
"tenancies",
15+
"people",
16+
"contacts",
17+
"arrears_actions",
18+
"revenue_accounts",
19+
"transactions",
20+
"addresses",
21+
]
922

1023
table_list = {
1124
"properties": [
@@ -31,7 +44,26 @@
3144
],
3245
"people": ["people_1a", "people_1b", "people_1c", "people_2a", "people_all"],
3346
"contacts": ["contacts_1a", "contacts_1b", "contacts_2a", "contacts_all"],
34-
"arrears_actions": ["arrears_actions_1a", "arrears_actions_1c", "arrears_actions_2a"],
47+
"arrears_actions": [
48+
"arrears_actions_1a",
49+
"arrears_actions_1c",
50+
"arrears_actions_2a",
51+
],
52+
"revenue_accounts": [
53+
"revenue_accounts_1a",
54+
"revenue_accounts_1b_sc",
55+
"revenue_accounts_1c",
56+
"revenue_accounts_2a",
57+
"revenue_accounts_other",
58+
],
59+
"transactions": [
60+
"transactions_1a",
61+
"transactions_1c",
62+
"transactions_2a",
63+
"transactions_other",
64+
"transactions_all",
65+
],
66+
"addresses": ["addresses_1a", "addresses_2a"],
3567
}
3668

3769
partition_keys = ["import_date"]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
11+
column: str = "LAUS_LEGACY_REF"
12+
description: str = "Expect LAUS_LEGACY_REF values to be unique"
13+
14+
class ExpectPropRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
15+
column: str = "LAUS_LEGACY_REF"
16+
description: str = "Expect LAUS_LEGACY_REF values to not be Null"
17+
18+
class ExpectUPRNColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
19+
column: str = "LADR_UPRN"
20+
description: str = "Expect UPRN values to be unique"
21+
22+
class ExpectUPRNColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
23+
column: str = "LADR_UPRN"
24+
description: str = "Expect UPRN values to not be Null"
25+
26+
class ExpectAddressColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
27+
column_list = [
28+
"LAUS_LEGACY_REF",
29+
"LAUS_AUT_FAO_CODE",
30+
"LAUS_AUT_FAR_CODE",
31+
"LAUS_START_DATE",
32+
"LAUS_END_DATE",
33+
"LADR_FLAT",
34+
"LADR_BUILDING",
35+
"LADR_STREET_NUMBER",
36+
"LAEL_STREET",
37+
"LAEL_SUB_STREET1",
38+
"LAEL_SUB_STREET2",
39+
"LAEL_SUB_STREET3",
40+
"LAEL_AREA",
41+
"LAEL_TOWN",
42+
"LAEL_COUNTY",
43+
"LAEL_COUNTRY",
44+
"LAEL_POSTCODE",
45+
"LAEL_LOCAL_IND",
46+
"LAEL_ABROAD_IND",
47+
"LADD_ADDL1",
48+
"LADD_ADDL2",
49+
"LADD_ADDL3",
50+
"LAEL_STREET_INDEX_CODE",
51+
"LAUS_CONTACT_NAME",
52+
"LADR_EASTINGS",
53+
"LADR_NORTHINGS",
54+
"LADR_UPRN"
55+
]
56+
description: str = "Expect columns to match ordered list exactly"
57+
58+
59+
arg_key = ["s3_target_location"]
60+
args = getResolvedOptions(sys.argv, arg_key)
61+
locals().update(args)
62+
63+
# add to GX context
64+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
65+
66+
suite = gx.ExpectationSuite(name="addresses_data_load_suite")
67+
68+
suite.add_expectation(ExpectPropRefColumnValuesToBeUnique())
69+
suite.add_expectation(ExpectUPRNColumnValuesToBeUnique())
70+
suite.add_expectation(ExpectAddressColumnsToMatchOrderedList())
71+
suite.add_expectation(ExpectPropRefColumnValuesToNotBeNull())
72+
suite.add_expectation(ExpectUPRNColumnValuesToNotBeNull())
73+
suite = context.suites.add(suite)

scripts/jobs/housing/housing_nec_migration_apply_gx_dq_tests.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
import scripts.jobs.housing.housing_nec_migration_people_data_load_gx_suite
2121
import scripts.jobs.housing.housing_nec_migration_contacts_data_load_gx_suite
2222
import scripts.jobs.housing.housing_nec_migration_arrears_actions_data_load_gx_suite
23+
import scripts.jobs.housing.housing_nec_migration_revenue_accounts_data_load_gx_suite
24+
import scripts.jobs.housing.housing_nec_migration_transactions_data_load_gx_suite
25+
import scripts.jobs.housing.housing_nec_migration_addresses_data_load_gx_suite
2326

2427
logging.basicConfig(level=logging.INFO)
2528
logger = logging.getLogger(__name__)
@@ -86,8 +89,13 @@ def main():
8689
batch_parameters = {"dataframe": df}
8790

8891
# get expectation suite for dataset
89-
suite = context.suites.get(name=f"{data_load}_data_load_suite")
90-
expectations = suite.expectations
92+
try:
93+
suite = context.suites.get(name=f"{data_load}_data_load_suite")
94+
except Exception as e:
95+
logger.info(f"Problem found with {data_load}: GX suite {e}, skipping suite.")
96+
continue
97+
else:
98+
expectations = suite.expectations
9199

92100
validation_definition = gx.ValidationDefinition(
93101
data=batch_definition,
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPayRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
11+
column: str = "LRAC_PAY_REF"
12+
description: str = (
13+
"Expect LLRAC_PAY_REF (pay ref) values to not be Null in contacts load"
14+
)
15+
16+
17+
class ExpectTenancyRefToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
18+
column: str = "LRAC_TCY_ALT_REF"
19+
description: str = "Expect LRAC_TCY_ALT_REF to not be Null"
20+
21+
22+
class ExpectArrearCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
23+
column: str = "LACA_ARA_CODE"
24+
value_set: list = [
25+
"BAGF",
26+
"BAG1",
27+
"RTRN",
28+
"FRA2",
29+
"ADVR",
30+
"RDCN",
31+
"DWPR",
32+
"DWPN",
33+
"EDCL",
34+
"NOTE",
35+
"RHCN",
36+
"CORT",
37+
"IUPO",
38+
"IRA1",
39+
"NTQ",
40+
"CON3",
41+
"RREQ",
42+
"RRFN",
43+
"STAT",
44+
"SHB",
45+
"SNW",
46+
"ARRN",
47+
"POP",
48+
"FRA1",
49+
"BCOL",
50+
"DWPC",
51+
"DWPT",
52+
"COUT",
53+
"NFA",
54+
"NRA2",
55+
"FRET",
56+
"SCH",
57+
"SNP",
58+
"VISN",
59+
"WOA",
60+
"WOC",
61+
"WOH",
62+
"WON",
63+
"CDAT",
64+
"CNOK",
65+
"ADVC",
66+
"EVIC",
67+
"FINC",
68+
"HBN",
69+
"SRA1",
70+
"TRA1",
71+
"NRA1",
72+
"IRA1",
73+
"MRA1",
74+
"TELO",
75+
"RPAN",
76+
"RRHB",
77+
"RRF",
78+
"SAR",
79+
"SBA",
80+
"SCM",
81+
"SSA",
82+
"VISI",
83+
"WOF",
84+
"RCHN",
85+
"RDDN",
86+
"CDL",
87+
"FINI",
88+
"GRA1",
89+
"AGRL",
90+
"SRA2",
91+
"TRA2",
92+
"NRA2",
93+
"IRA2",
94+
"MRA2",
95+
"CWAL",
96+
"TELI",
97+
"RELI",
98+
"LREF",
99+
"NOSP",
100+
"INTV",
101+
"SUP",
102+
"UCC"
103+
]
104+
description: str = "Expect arrear code to be one of the set"
105+
106+
107+
class ExpectArrearsActionsColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
108+
column_list = [
109+
"LACA_BALANCE",
110+
"LACA_PAY_REF",
111+
"LACA_TYPE",
112+
"LACA_CREATED_BY",
113+
"LACA_CREATED_DATE",
114+
"LACA_ARREARS_DISPUTE_IND",
115+
"LACA_ARA_CODE",
116+
"LACA_STATUS",
117+
"LACA_HRV_ADL_CODE",
118+
"LACA_EAC_EPO_CODE",
119+
"LACA_EFFECTIVE_DATE",
120+
"LACA_EXPIRY_DATE",
121+
"LACA_NEXT_ACTION_DATE",
122+
"LACA_AUTH_DATE",
123+
"LACA_AUTH_USERNAME",
124+
"LACA_PRINT_DATE",
125+
"LACA_DEL_"
126+
]
127+
description: str = "Expect columns to match ordered list exactly"
128+
129+
130+
arg_key = ["s3_target_location"]
131+
args = getResolvedOptions(sys.argv, arg_key)
132+
locals().update(args)
133+
134+
# add to GX context
135+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
136+
137+
suite = gx.ExpectationSuite(name="revenue_accounts_data_load_suite")
138+
139+
suite.add_expectation(ExpectArrearsActionsColumnsToMatchOrderedList())
140+
suite.add_expectation(ExpectArrearCodeToBeInSet())
141+
suite.add_expectation(ExpectPayRefColumnValuesToNotBeNull())
142+
suite.add_expectation(ExpectTenancyRefToNotBeNull())
143+
suite = context.suites.add(suite)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPayRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
11+
column: str = "LTRN_ALT_REF"
12+
description: str = "Expect LTRN_ALT_REF values to not be Null in contacts load"
13+
14+
15+
arg_key = ["s3_target_location"]
16+
args = getResolvedOptions(sys.argv, arg_key)
17+
locals().update(args)
18+
19+
# add to GX context
20+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
21+
22+
suite = gx.ExpectationSuite(name="transactions_data_load_suite")
23+
24+
suite.add_expectation(ExpectPayRefColumnValuesToNotBeNull())
25+
suite = context.suites.add(suite)

0 commit comments

Comments
 (0)