Skip to content

Commit cb4b1b8

Browse files
authored
Merge pull request #1940 from LBHackney-IT/di-448-housing-dq-tests-using-gx
Add additional inputs including id fields and a dq dimensions map.
2 parents 543770c + 565f3a9 commit cb4b1b8

File tree

2 files changed

+78
-6
lines changed

2 files changed

+78
-6
lines changed
Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,33 @@
11
sql_config = {'person_reshape': {
2-
'sql': """SELECT *, cast(date_parse(substr(startdate, 1, 10), '%Y-%m-%d') as date) as startdate_parsed, cast(date_parse(substr(enddate, 1, 10), '%Y-%m-%d') as date) as enddate_parsed, cast(date_parse(substr(dateofbirth, 1, 10), '%Y-%m-%d') as date) as dateofbirth_parsed FROM "housing-refined-zone"."person_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."person_reshape") and enddate is NULL and type in ('Secure', 'Introductory')"""},
2+
'sql': """SELECT *, cast(date_parse(substr(startdate, 1, 10), '%Y-%m-%d') as date) as startdate_parsed, cast(date_parse(substr(enddate, 1, 10), '%Y-%m-%d') as date) as enddate_parsed, cast(date_parse(substr(dateofbirth, 1, 10), '%Y-%m-%d') as date) as dateofbirth_parsed FROM "housing-refined-zone"."person_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."person_reshape") and enddate is NULL and type in ('Secure', 'Introductory')""",
3+
'id_field': 'person_id'},
34
'tenure_reshape': {
4-
'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date>'20240412' and import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape" where import_date>'20240412') and isterminated=False and description in ('Secure', 'Introductory')"""},
5+
'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date>'20240412' and import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape" where import_date>'20240412') and isterminated=False and description in ('Secure', 'Introductory')""",
6+
'id_field': 'tenure_id'},
57
'contacts_reshape': {
6-
'sql': """SELECT id, targetid, createdat, contacttype, subtype, value, lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True"""},
8+
'sql': """SELECT id, targetid, createdat, contacttype, subtype, value, lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""",
9+
'id_field': 'id'},
710
'housing_homeowner_record_sheet': {
8-
'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")"""},
11+
'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""",
12+
'id_field': 'property_no'},
913
'housing_dwellings_list': {
10-
'sql': """SELECT * FROM "housing-raw-zone"."housing_dwellings_list" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")"""}
14+
'sql': """SELECT * FROM "housing-raw-zone"."housing_dwellings_list" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""",
15+
'id_field': 'property_dwelling_reference_number'},
16+
'assets_reshape': {
17+
'sql': """SELECT * FROM "housing-refined-zone"."assets_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."assets_reshape") and assettype = 'Dwelling'""",
18+
'id_field': 'asset_id'}
1119
}
1220

1321
table_list = ['person_reshape', 'tenure_reshape', 'contacts_reshape', 'housing_homeowner_record_sheet',
14-
'housing_dwellings_list']
22+
'housing_dwellings_list', 'assets_reshape']
23+
1524
partition_keys = ['import_year', 'import_month', 'import_day', 'import_date']
25+
26+
dq_dimensions_map = {'expect_column_value_lengths_to_be_between': 'ACCURACY',
27+
'expect_column_values_to_be_unique': 'UNIQUENESS',
28+
'expect_column_values_to_match_regex': 'VALIDITY',
29+
'expect_column_values_to_be_in_set': 'CONSISTENCY',
30+
'expect_select_column_values_to_be_unique_within_record': 'UNIQUENESS',
31+
'expect_column_values_to_not_be_null': 'COMPLETENESS',
32+
'expect_column_values_to_be_between': 'VALIDITY'
33+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
arg_key = ['s3_target_location']
10+
args = getResolvedOptions(sys.argv, arg_key)
11+
locals().update(args)
12+
13+
# add to GX context
14+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
15+
16+
suite = gx.ExpectationSuite(name='assets_reshape_suite')
17+
suite.add_expectation(
18+
gxe.ExpectColumnValueLengthsToBeBetween(
19+
column="uprn",
20+
min_value=11,
21+
max_value=12)
22+
)
23+
suite.add_expectation(
24+
gxe.ExpectColumnValuesToMatchRegex(
25+
column="uprn",
26+
regex=r"^[1-9]\d{10,11}")
27+
)
28+
suite.add_expectation(
29+
gxe.ExpectColumnValuesToBeInSet(
30+
column='type',
31+
value_set=['Dwelling'])
32+
)
33+
suite.add_expectation(
34+
gxe.ExpectColumnValuesToBeUnique(
35+
column='asset_id')
36+
)
37+
suite.add_expectation(
38+
gxe.ExpectColumnValuesToNotBeNull(
39+
column='asset_id')
40+
)
41+
suite.add_expectation(
42+
gxe.ExpectColumnValuesToNotBeNull(
43+
column='uprn')
44+
)
45+
suite.add_expectation(
46+
gxe.ExpectColumnValuesToNotBeNull(
47+
column='estate_name')
48+
)
49+
suite.add_expectation(
50+
gxe.ExpectColumnValuesToNotBeNull(
51+
column='type')
52+
)
53+
54+
suite = context.suites.add(suite)

0 commit comments

Comments
 (0)