Skip to content

Commit 809ec4a

Browse files
Merge branch 'main' into dl-120-disable-mtfh-glue-jobs
2 parents 1aa944e + ae916f4 commit 809ec4a

File tree

54 files changed

+1910
-1896
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1910
-1896
lines changed
Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
sql_config = {
22
"properties": {"id_field": "LPRO_PROPREF"},
33
"tenancies": {"id_field": "LTCY_ALT_REF"},
4+
"people": {"id_field": "LPAR_PER_ALT_REF"},
5+
"contacts": {"id_field": "LCDE_LEGACY_REF"},
46
}
57

6-
data_load_list = ["properties", "tenancies"]
8+
data_load_list = ["properties", "tenancies", "people", "contacts"]
79

810
table_list = {
911
"properties": [
@@ -13,11 +15,22 @@
1315
"properties_1d",
1416
"properties_1e",
1517
"properties_2a",
18+
"properties_3a",
1619
"properties_4a",
1720
"properties_4b",
18-
"properties_4c"
21+
"properties_4c",
22+
"properties_7a",
23+
"properties_all_tranches",
1924
],
20-
"tenancies": ["tenancies_1a"],
25+
"tenancies": [
26+
"tenancies_1a",
27+
"tenancies_1c",
28+
"tenancies_2a",
29+
"tenancies_all",
30+
"tenancies_other",
31+
],
32+
"people": ["people_1a", "people_1b", "people_1c", "people_2a"],
33+
"contacts": ["contacts_1a", "contacts_1b", "contacts_2a"]
2134
}
2235

2336
partition_keys = ["import_date"]

scripts/jobs/housing/housing_nec_migration_apply_gx_dq_tests.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
)
1818
import scripts.jobs.housing.housing_nec_migration_properties_data_load_gx_suite
1919
import scripts.jobs.housing.housing_nec_migration_tenancies_data_load_gx_suite
20-
20+
import scripts.jobs.housing.housing_nec_migration_people_data_load_gx_suite
21+
import scripts.jobs.housing.housing_nec_migration_contacts_data_load_gx_suite
2122

2223
logging.basicConfig(level=logging.INFO)
2324
logger = logging.getLogger(__name__)
@@ -69,7 +70,11 @@ def main():
6970

7071
conn = connect(s3_staging_dir=s3_staging_location, region_name=region_name)
7172

72-
df = pd.read_sql_query(sql_query, conn)
73+
try:
74+
df = pd.read_sql_query(sql_query, conn)
75+
except Exception as e:
76+
logger.info(f"Problem found with {table}: {e}, skipping table.")
77+
continue
7378

7479
# set up batch
7580
data_source = context.data_sources.add_pandas(f"{table}_pandas")
@@ -128,9 +133,17 @@ def main():
128133

129134
table_results_df["unexpected_id_list"] = pd.Series(dtype="object")
130135
for i, row in query_df.iterrows():
131-
table_results_df.loc[i, "unexpected_id_list"] = str(
136+
try:
132137
list(df[id_field].iloc[row["result.unexpected_index_list"]])
133-
)
138+
except Exception as e:
139+
logger.info(
140+
f"Problem found with {table}: {e}, skipping making unexpected_id_list."
141+
)
142+
continue
143+
else:
144+
table_results_df.loc[i, "unexpected_id_list"] = str(
145+
list(df[id_field].iloc[row["result.unexpected_index_list"]])
146+
)
134147

135148
# drop columns not needed in metatdata
136149
cols_to_drop_meta = [
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
11+
column: str = "LCDE_LEGACY_REF"
12+
description: str = "Expect LCDE_LEGACY_REF (person ref) values to not be Null in contacts load"
13+
14+
15+
class ExpectValueColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LCDE_CONTACT_VALUE"
17+
description: str = "Expect LCDE_CONTACT_VALUE (contact value) to not be Null"
18+
19+
20+
class ExpectContactTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
21+
column: str = "LCDE_FRV_CME_CODE,"
22+
value_set: list = ["WORKTEL", "MOBILETEL", "HOMETEL", "EMAIL", "OTHER"]
23+
description: str = "Expect contact type code to be one of the set"
24+
25+
26+
class ExpectContactsColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
27+
column_list = [
28+
"LCDE_START_DATE",
29+
"LCDE_PRIMARY_REF",
30+
"LCDE_SECONDARY_REF",
31+
"LCDE_PRECEDENCE",
32+
"LCDE_LEGACY_TYPE",
33+
"LCDE_LEGACY_REF",
34+
"LCDE_FRV_COMM_PREF_CODE",
35+
"LCDE_FRV_CME_CODE",
36+
"LCDE_END_DATE",
37+
"LCDE_CREATED_DATE",
38+
"LCDE_CREATED_BY",
39+
"LCDE_CONTACT_VALUE",
40+
"LCDE_CONTACT_NAME",
41+
"LCDE_COMMENTS",
42+
"LCDE_ALLOW_TEXTS",
43+
]
44+
description: str = "Expect columns to match ordered list exactly"
45+
46+
47+
arg_key = ["s3_target_location"]
48+
args = getResolvedOptions(sys.argv, arg_key)
49+
locals().update(args)
50+
51+
# add to GX context
52+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
53+
54+
suite = gx.ExpectationSuite(name="contacts_data_load_suite")
55+
56+
suite.add_expectation(ExpectContactsColumnsToMatchOrderedList())
57+
suite.add_expectation(ExpectContactTypeCodeToBeInSet())
58+
suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull())
59+
suite.add_expectation(ExpectValueColumnValuesToNotBeNull())
60+
suite = context.suites.add(suite)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# flake8: noqa: F821
2+
3+
import sys
4+
5+
from awsglue.utils import getResolvedOptions
6+
import great_expectations as gx
7+
import great_expectations.expectations as gxe
8+
9+
10+
class ExpectPersonRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
11+
column: str = "LPAR_PER_ALT_REF"
12+
description: str = "Expect LPAR_PER_ALT_REF (person ref) values to be unique"
13+
14+
15+
class ExpectPersonRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LPAR_PER_ALT_REF"
17+
description: str = "Expect LPAR_PER_ALT_REF (person ref) values to not be Null"
18+
19+
20+
class ExpectTitleToBeInSet(gxe.ExpectColumnValuesToBeInSet):
21+
column: str = "LPAR_PER_TITLE"
22+
value_set: list = [
23+
"DAME",
24+
"DR",
25+
"LAD",
26+
"LORD",
27+
"MASTER",
28+
"MISS",
29+
"MR",
30+
"MRS",
31+
"MS",
32+
"MX",
33+
"PROFESSOR",
34+
"RABBI",
35+
"REVEREND",
36+
"SIR",
37+
None,
38+
]
39+
description: str = "Expect title to be one of the set"
40+
41+
42+
class ExpectPeopleColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
43+
column_list = [
44+
"LPAR_HOP_START_DATE",
45+
"LPAR_PER_SURNAME",
46+
"LPAR_TCY_ALT_REF",
47+
"LPAR_PER_FORENAME",
48+
"LPAR_HOP_HPSR_CODE",
49+
"LPAR_PER_TITLE",
50+
"LPAR_PER_INITIALS",
51+
"LPAR_PER_DATE_OF_BIRTH",
52+
"LPAR_PER_HOU_DISABLED_IND",
53+
"LPAR_PER_HOU_OAP_IND",
54+
"LPAR_PER_FRV_FGE_CODE",
55+
"LPAR_HOP_HRV_REL_CODE",
56+
"LPAR_PER_HOU_EMPLOYER",
57+
"LPAR_PER_HOU_HRV_HMS_CODE",
58+
"LPAR_PHONE",
59+
"LPAR_HOP_END_DATE",
60+
"LPAR_HOP_HPER_CODE",
61+
"LPAR_TCY_IND",
62+
"LPAR_TIN_MAIN_TENANT_IND",
63+
"LPAR_TIN_START_DATE",
64+
"LPAR_TIN_END_DATE",
65+
"PAR_TIN_HRV_TIR_CODE",
66+
"LPAR_TIN_STAT_SUCCESSOR_IND",
67+
"LPAR_PER_ALT_REF",
68+
"LPAR_PER_FRV_FEO_CODE",
69+
"LPAR_PER_NI_NO",
70+
"LPAR_PER_FRV_HGO_CODE",
71+
"LPAR_PER_FRV_FNL_CODE",
72+
"LPAR_PER_OTHER_NAME",
73+
]
74+
description: str = "Expect people load columns to match ordered list exactly"
75+
76+
77+
arg_key = ["s3_target_location"]
78+
args = getResolvedOptions(sys.argv, arg_key)
79+
locals().update(args)
80+
81+
# add to GX context
82+
context = gx.get_context(mode="file", project_root_dir=s3_target_location)
83+
84+
suite = gx.ExpectationSuite(name="people_data_load_suite")
85+
86+
suite.add_expectation(ExpectPersonRefColumnValuesToBeUnique())
87+
suite.add_expectation(ExpectTitleToBeInSet())
88+
suite.add_expectation(ExpectPeopleColumnsToMatchOrderedList())
89+
suite.add_expectation(ExpectPersonRefColumnValuesToNotBeNull())
90+
suite = context.suites.add(suite)

scripts/jobs/housing/housing_nec_migration_properties_data_load_gx_suite.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ class ExpectPropRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
1212
description: str = "Expect UPRN (LPRO_PROPREF) values to be unique"
1313

1414

15+
class ExpectPropRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LPRO_PROPREF"
17+
description: str = "Expect LPRO_PROPREF (prop ref) values to not be Null"
18+
19+
1520
class ExpectPropTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
1621
column: str = "LPRO_HOU_PTV_CODE"
1722
value_set: list = [
@@ -158,4 +163,5 @@ class ExpectPropColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedLi
158163
suite.add_expectation(ExpectResIndicatorToBeInSet())
159164
suite.add_expectation(ExpectPropTypeValuesToBeInSet())
160165
suite.add_expectation(ExpectPropColumnsToMatchOrderedList())
166+
suite.add_expectation(ExpectPropRefColumnValuesToNotBeNull())
161167
suite = context.suites.add(suite)

scripts/jobs/housing/housing_nec_migration_tenancies_data_load_gx_suite.py

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,47 @@ class ExpectTagRefColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique):
1212
description: str = "Expect LTCY_ALT_REF (tenancy ref) values to be unique"
1313

1414

15+
class ExpectTenancyRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull):
16+
column: str = "LTCY_ALT_REF"
17+
description: str = "Expect LTCY_ALT_REF (tenancy ref) values to not be Null"
18+
19+
1520
class ExpectTenancyTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
1621
column: str = "LTCY_TTY_CODE"
1722
value_set: list = [
18-
'ASH',
19-
'ASY',
20-
'DEC',
21-
'Demoted',
22-
'FRE',
23-
'FRS',
24-
'HAL',
25-
'LIVINGRT',
26-
'INT',
27-
'LEA',
28-
'LHS',
29-
'LTA',
30-
'MPA',
31-
'NON',
32-
'PVG',
33-
'RTM',
34-
'SEC',
35-
'SHO',
36-
'SLL',
37-
'SPS',
38-
'SSE',
39-
'TACCFLAT',
40-
'TBB',
41-
'TBBFam',
42-
'THO',
43-
'TGA',
44-
'THL',
45-
'THGF',
46-
'TLA',
47-
'TPL',
48-
'TRA',
49-
'UNDER18',
50-
'OFFICESE'
23+
"ASH",
24+
"ASY",
25+
"DEC",
26+
"Demoted",
27+
"FRE",
28+
"FRS",
29+
"HAL",
30+
"LIVINGRT",
31+
"INT",
32+
"LEA",
33+
"LHS",
34+
"LTA",
35+
"MPA",
36+
"NON",
37+
"PVG",
38+
"RTM",
39+
"SEC",
40+
"SHO",
41+
"SLL",
42+
"SPS",
43+
"SSE",
44+
"TACCFLAT",
45+
"TBB",
46+
"TBBFam",
47+
"THO",
48+
"TGA",
49+
"THL",
50+
"THGF",
51+
"TLA",
52+
"TPL",
53+
"TRA",
54+
"UNDER18",
55+
"OFFICESE",
5156
]
5257
description: str = "Expect tenancy type code to contain one of the set"
5358

@@ -62,22 +67,17 @@ class ExpectTenureTypeCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
6267
"TEMPORARY",
6368
"FREEHOLD",
6469
"COMMERCIAL",
65-
"LIVINGRENT"
70+
"LIVINGRENT",
6671
]
6772
description: str = "Expect tenure type code to be one of the set"
6873

6974

7075
class ExpectTenancyStatusCodeToBeInSet(gxe.ExpectColumnValuesToBeInSet):
7176
column: str = "LTCY_HRV_TST_CODE"
72-
value_set: list = [
73-
"DECANT",
74-
"NOTICE",
75-
"UNAUTHOCC"
76-
]
77+
value_set: list = ["DECANT", "NOTICE", "UNAUTHOCC"]
7778
description: str = "Expect tenancy status code to be one of the set"
7879

7980

80-
8181
class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrderedList):
8282
column_list = [
8383
"LTCY_ALT_REF",
@@ -125,10 +125,11 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere
125125
"LTCY_THO_END_DATE6",
126126
"LTCY_THO_HRV_TTR_CODE6",
127127
"LTCY_PHONE",
128-
"LTCY_REVIEW_DATE"
128+
"LTCY_REVIEW_DATE",
129129
]
130130
description: str = "Expect tenancy load columns to match ordered list exactly"
131131

132+
132133
arg_key = ["s3_target_location"]
133134
args = getResolvedOptions(sys.argv, arg_key)
134135
locals().update(args)
@@ -143,4 +144,6 @@ class ExpectTenancyColumnsToMatchOrderedList(gxe.ExpectTableColumnsToMatchOrdere
143144
suite.add_expectation(ExpectTenureTypeCodeToBeInSet())
144145
suite.add_expectation(ExpectTenancyStatusCodeToBeInSet())
145146
suite.add_expectation(ExpectTenancyColumnsToMatchOrderedList())
146-
suite = context.suites.add(suite)
147+
suite.add_expectation(ExpectTenancyRefColumnValuesToNotBeNull())
148+
149+
context.suites.add(suite)

0 commit comments

Comments
 (0)