Skip to content

Commit d838f9f

Browse files
committed
- add two new tables, table specific tests and GX test suites to GX DQ testing: maproperty and matenancyagreement.
1 parent 7b69de7 commit d838f9f

File tree

3 files changed

+133
-4
lines changed

3 files changed

+133
-4
lines changed

scripts/helpers/housing_gx_dq_inputs.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
'sql': """SELECT * FROM "housing-refined-zone"."tenure_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."tenure_reshape") and description in ('Secure', 'Introductory', 'Mesne Profit Ac', 'Non-Secure') and (endoftenuredate is null or substr(endoftenuredate, 1, 11) = '1900-01-01')""",
77
'id_field': 'tenancy_id'},
88
'contacts_reshape': {
9-
'sql': """SELECT id, targetid, createdat, contacttype, subtype, value, lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""",
9+
'sql': """SELECT id, targetid, substr(createdat, 1, 10) as createdat, contacttype, subtype, value, substr(lastmodified, 1, 10) as lastmodified, targettype, isactive, person_id, import_date FROM "housing-refined-zone"."contacts_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."contacts_reshape") and isactive=True""",
1010
'id_field': 'id'},
1111
'housing_homeowner_record_sheet': {
1212
'sql': """SELECT * FROM "housing-raw-zone"."housing_homeowner_record_sheet" where import_date=(select max(import_date) from "housing-raw-zone"."housing_homeowner_record_sheet")""",
@@ -16,11 +16,17 @@
1616
'id_field': 'property_dwelling_reference_number'},
1717
'assets_reshape': {
1818
'sql': """SELECT * FROM "housing-refined-zone"."assets_reshape" where import_date=(select max(import_date) from "housing-refined-zone"."assets_reshape") and assettype = 'Dwelling'""",
19-
'id_field': 'asset_id'}
19+
'id_field': 'asset_id'},
20+
'matenancyagreement': {
21+
'sql': """SELECT *, substr(cast(eot as varchar), 1, 10) as eot_parsed, substr(cast(cot as varchar), 1, 10) as cot_parsed FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_matenancyagreement")""",
22+
'id_field': 'tag_ref'},
23+
'maproperty': {
24+
'sql': """SELECT * FROM "housing-raw-zone"."sow2b_dbo_maproperty" where import_date=(select max(import_date) FROM "housing-raw-zone"."sow2b_dbo_maproperty")""",
25+
'id_field': 'prop_ref'}
2026
}
2127

2228
table_list = ['person_reshape', 'tenure_reshape', 'contacts_reshape', 'housing_homeowner_record_sheet',
23-
'housing_dwellings_list', 'assets_reshape']
29+
'housing_dwellings_list', 'assets_reshape', 'matenancyagreement', 'maproperty']
2430

2531
partition_keys = ['import_year', 'import_month', 'import_day', 'import_date']
2632

@@ -39,10 +45,12 @@
3945
'expect_contact_value_column_values_to_be_unique': 'UNIQUENESS',
4046
'expect_contact_value_column_values_to_not_be_null': 'COMPLETENESS',
4147
'expect_date_of_birth_column_values_to_not_be_null': 'COMPLETENESS',
42-
'expect_date_of_birth_to_be_between': 'VALIDITY',
48+
'expect_date_of_birth_to_be_between': 'TIMELINESS',
4349
'expect_description_values_to_be_in_set': 'CONSISTENCY',
4450
'expect_estate_ref_no_column_values_to_match_regex': 'VALIDITY',
4551
'expect_first_name_column_value_length': 'VALIDITY',
52+
'expect_is_organisation_column_values_to_not_be_null': 'COMPLETENESS',
53+
'expect_is_organisation_values_to_be_in_set': 'CONSISTENCY',
4654
'expect_llpg_and_prop_ref_column_values_to_be_unique_within_record': 'UNIQUENESS',
4755
'expect_llpg_column_value_lengths_between': 'VALIDITY',
4856
'expect_llpg_column_values_to_be_unique': 'UNIQUENESS',
@@ -70,6 +78,7 @@
7078
'expect_sub_type_column_values_to_not_be_null': 'COMPLETENESS',
7179
'expect_surname_column_value_length': 'VALIDITY',
7280
'expect_firstname_column_value_length': 'VALIDITY',
81+
'expect_tag_ref_column_not_to_be_null': 'COMPLETENESS',
7382
'expect_target_id_and_value_column_values_to_be_unique_within_record': 'UNIQUENESS',
7483
'expect_target_id_column_values_to_not_be_null': 'COMPLETENESS',
7584
'expect_target_type_column_values_to_be_in_set': 'CONSISTENCY',
@@ -78,6 +87,7 @@
7887
'expect_tenancy_id_column_not_to_be_null': 'COMPLETENESS',
7988
'expect_tenure_code_column_not_to_be_null': 'COMPLETENESS',
8089
'expect_tenure_type_column_values_to_be_in_set': 'CONSISTENCY',
90+
'expect_tenure_code_values_to_be_in_set': 'CONSISTENCY',
8191
'expect_uprn_column_value_lengths_between': 'VALIDITY',
8292
'expect_uprn_column_values_to_match_regex': 'VALIDITY',
8393
'expect_uprn_column_values_to_not_be_null': 'COMPLETENESS',
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# flake8: noqa: F821
2+
import sys
3+
4+
from awsglue.utils import getResolvedOptions
5+
import great_expectations as gx
6+
import great_expectations.expectations as gxe
7+
8+
arg_key = ['s3_target_location']
9+
args = getResolvedOptions(sys.argv, arg_key)
10+
locals().update(args)
11+
12+
13+
class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
14+
column: str = 'prop_ref'
15+
description: str = "Expect Prop Ref field to be unique for a property type"
16+
17+
18+
class ExpectArrPatchNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
19+
column: str = "arr_patch"
20+
description: str = "Expect Arrears Patch column to be complete with no missing values"
21+
22+
23+
class ExpectPropRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
24+
column: str = "prop_ref"
25+
description: str = "Expect Prop Ref column to be complete with no missing values"
26+
27+
28+
# add to GX context
29+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
30+
31+
suite = gx.ExpectationSuite(name='maproperty_suite')
32+
33+
suite.add_expectation(ExpectPropRefColumnValuesToBeUnique())
34+
suite.add_expectation(ExpectArrPatchNotToBeNull())
35+
suite.add_expectation(ExpectPropRefNotToBeNull())
36+
37+
suite = context.suites.add(suite)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# flake8: noqa: F821
2+
from datetime import datetime
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
arg_key = ['s3_target_location']
10+
args = getResolvedOptions(sys.argv, arg_key)
11+
locals().update(args)
12+
13+
14+
class ExpectTagRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
15+
column: str = 'tag_ref'
16+
description: str = "Expect Tag Ref field to be unique for a tenancy"
17+
18+
19+
class ExpectTagRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
20+
column: str = "tag_ref"
21+
description: str = "Expect Tag Ref column to be complete with no missing values"
22+
23+
24+
class ExpectPropRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
25+
column: str = "prop_ref"
26+
description: str = "Expect Prop Ref column to be complete with no missing values"
27+
28+
29+
class ExpectCoTNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
30+
column: str = "cot"
31+
description: str = "Expect Tenancy start date column (cot) to be complete with no missing values"
32+
33+
34+
class ExpectTenureNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
35+
column: str = "tenure"
36+
description: str = "Expect tenure to be complete with no missing values"
37+
38+
39+
class ExpectSaffRentAccNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
40+
column: str = "u_saff_rentacc"
41+
description: str = "Expect Saff rent account (payment ref) to be complete with no missing values"
42+
43+
44+
class ExpectRentGroupRefNotToBeNull(gxe.ExpectColumnValuesToNotBeNull):
45+
column: str = "rentgrp_ref"
46+
description: str = "Expect Rent Group ref column to be complete with no missing values"
47+
48+
49+
class ExpectEoTToBeBetween(gxe.ExpectColumnValuesToBeBetween):
50+
column: str = 'eot_parsed'
51+
min_value: str = datetime(1920, 1, 1, 0, 0, 0).isoformat()
52+
max_value: str = datetime.today().isoformat()
53+
description: str = "Expect eot_parsed be between 1920-01-01 and today's date"
54+
condition_parser: str = 'great_expectations'
55+
row_condition: str = 'col("eot_parsed").notNull()'
56+
57+
58+
class ExpectCoTToBeBetween(gxe.ExpectColumnValuesToBeBetween):
59+
column: str = 'cot_parsed'
60+
min_value: str = datetime(1920, 1, 1, 0, 0, 0).isoformat()
61+
max_value: str = datetime.today().isoformat()
62+
description: str = "Expect cot_parsed be between 1920-01-01 and today's date"
63+
condition_parser: str = 'great_expectations'
64+
row_condition: str = 'col("cot").notNull()'
65+
66+
67+
# add to GX context
68+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
69+
70+
suite = gx.ExpectationSuite(name='matenancyagreement_suite')
71+
72+
suite.add_expectation(ExpectTagRefColumnValuesToBeUnique())
73+
suite.add_expectation(ExpectTagRefNotToBeNull())
74+
suite.add_expectation(ExpectPropRefNotToBeNull())
75+
suite.add_expectation(ExpectCoTNotToBeNull())
76+
suite.add_expectation(ExpectTenureNotToBeNull())
77+
suite.add_expectation(ExpectSaffRentAccNotToBeNull())
78+
suite.add_expectation(ExpectRentGroupRefNotToBeNull())
79+
suite.add_expectation(ExpectEoTToBeBetween())
80+
suite.add_expectation(ExpectCoTToBeBetween())
81+
82+
suite = context.suites.add(suite)

0 commit comments

Comments
 (0)