|
10 | 10 | args = getResolvedOptions(sys.argv, arg_key) |
11 | 11 | locals().update(args) |
12 | 12 |
|
| 13 | + |
| 14 | +class ExpectFirstNameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween): |
| 15 | + column: str = "firstname" |
| 16 | + min_value: int = 1 |
| 17 | + description: str = "Expect first name to be at least 1 character length" |
| 18 | + |
| 19 | + |
| 20 | +class ExpectSurnameColumnValueLength(gxe.ExpectColumnValueLengthsToBeBetween): |
| 21 | + column: str = "surname" |
| 22 | + min_value: int = 1 |
| 23 | + description: str = "Expect surname to be at least 1 character length" |
| 24 | + |
| 25 | + |
| 26 | +class ExpectUPRNColumnValueLengthsBetween(gxe.ExpectColumnValueLengthsToBeBetween): |
| 27 | + column: str = "uprn" |
| 28 | + min_value: int = 11 |
| 29 | + max_value: int = 12 |
| 30 | + description: str = "Expect UPRN to be between 11 and 12 characters length inclusive" |
| 31 | + |
| 32 | + |
| 33 | +class ExpectUPRNColumnValuesToMatchRegex(gxe.ExpectColumnValuesToMatchRegex): |
| 34 | + column: str = "uprn" |
| 35 | + regex: str = r"^[1-9]\d{10,11}" |
| 36 | + description: str = "Expect UPRN to match regex ^[1-9]\d{10,11} (starting with digit 1-9, followed by 10 or 11 digits" |
| 37 | + |
| 38 | + |
| 39 | +class ExpectUPRNNotToBeNull(gxe.ExpectColumnValuesToNotBeNull): |
| 40 | + column: str = "uprn" |
| 41 | + description: str = "Expect UPRN column to be complete with no missing values" |
| 42 | + |
| 43 | + |
| 44 | +class ExpectPersonTypeValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): |
| 45 | + column: str = 'person_type' |
| 46 | + value_set: list = ['Tenant', 'HouseholdMember', 'Leaseholder', 'Freeholder', 'Occupant', 'HousingOfficer', |
| 47 | + 'HousingAreaManager'] |
| 48 | + description: str = "Expect person types values to contain one of Tenant, HouseholdMember, Leaseholder, Freeholder, Occupant HousingOfficer, HousingAreaManager" |
| 49 | + |
| 50 | + |
| 51 | +class ExpectPreferredTitleValuesToBeInSet(gxe.ExpectColumnValuesToBeInSet): |
| 52 | + column: str = 'preferredtitle' |
| 53 | + value_set: list = ['Dr', 'Master', 'Miss', 'Mr', 'Mrs', 'Ms', 'Rabbi', 'Reverend', 'Mx'] |
| 54 | + description: str = "Expect preferred titles to be one of Dr, Master, Miss, Mr, Mrs, Ms, Mx, Rabbi, Reverend" |
| 55 | + |
| 56 | + |
| 57 | +class ExpectPersonIDColumnValuesToBeUnique(gxe.ExpectColumnValuesToBeUnique): |
| 58 | + column: str = 'person_id' |
| 59 | + description: str = "Expect Person ID to be unique within dataset" |
| 60 | + |
| 61 | + |
| 62 | +class ExpectPersonIDColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): |
| 63 | + column: str = 'person_id' |
| 64 | + description: str = "Expect Person ID be complete with no missing values" |
| 65 | + |
| 66 | + |
| 67 | +class ExpectPersonIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord( |
| 68 | + gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord): |
| 69 | + column_list: list = ['person_id', 'propertyreference'] |
| 70 | + description: str = "Expect Person ID and Property Reference to be unique within dataset" |
| 71 | + |
| 72 | + |
| 73 | +class ExpectPropertyRefColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): |
| 74 | + column: str = 'propertyreference' |
| 75 | + description: str = "Expect Property Reference be complete with no missing values" |
| 76 | + |
| 77 | + |
| 78 | +class ExpectPersonIDAndPaymentReferenceColumnValuesToBeUniqueWithinRecord( |
| 79 | + gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord): |
| 80 | + column_list: list = ['person_id', 'paymentreference'] |
| 81 | + description: str = "Expect Person ID and Payment Reference to be unique within dataset" |
| 82 | + |
| 83 | + |
| 84 | +class ExpectUPRNColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): |
| 85 | + column: str = 'uprn' |
| 86 | + description: str = "Expect UPRN be complete with no missing values" |
| 87 | + |
| 88 | + |
| 89 | +class ExpectDateOfBirthColumnValuesToNotBeNull(gxe.ExpectColumnValuesToNotBeNull): |
| 90 | + column: str = 'dateofbirth_parsed' |
| 91 | + description: str = "Expect dateofbirth_parsed be complete with no missing values" |
| 92 | + |
| 93 | + |
| 94 | +class ExpectDateOfBirthToBeBetween(gxe.ExpectColumnValuesToBeBetween): |
| 95 | + column: str = 'dateofbirth_parsed' |
| 96 | + min_value: str = datetime(1900, 1, 1, 0, 0, 0).isoformat() |
| 97 | + max_value: str = datetime.today().isoformat() |
| 98 | + condition_parser: str = "pandas" |
| 99 | + row_condition: str = 'df["dateofbirth_parsed"].str[:10] >= "1850-01-01" and df["dateofbirth_parsed"].str[:10] < "2025-01-01" and df["startdate_parsed"].str[:10] > "1900-01-01" and df["startdate_parsed"].str[:10] < "2100-01-01"' |
| 100 | + description: str = "Expect dateofbirth_parsed be complete with no missing values" |
| 101 | + |
| 102 | + |
13 | 103 | # add to GX context |
14 | 104 | context = gx.get_context(mode="file", project_root_dir=s3_target_location) |
15 | 105 |
|
16 | 106 | suite = gx.ExpectationSuite(name='person_reshape_suite') |
17 | | -suite.add_expectation( |
18 | | - gxe.ExpectColumnValueLengthsToBeBetween( |
19 | | - column="firstname", |
20 | | - min_value=1) |
21 | | -) |
22 | | -suite.add_expectation( |
23 | | - gxe.ExpectColumnValueLengthsToBeBetween( |
24 | | - column="surname", |
25 | | - min_value=1) |
26 | | -) |
27 | | -suite.add_expectation( |
28 | | - gxe.ExpectColumnValueLengthsToBeBetween( |
29 | | - column="uprn", |
30 | | - min_value=11, |
31 | | - max_value=12) |
32 | | -) |
33 | | -suite.add_expectation( |
34 | | - gxe.ExpectColumnValuesToMatchRegex( |
35 | | - column="uprn", |
36 | | - regex=r"^[1-9]\d{10,11}") |
37 | | -) |
38 | | -suite.add_expectation( |
39 | | - gxe.ExpectColumnValuesToNotBeNull( |
40 | | - column='uprn') |
41 | | -) |
42 | | -suite.add_expectation( |
43 | | - gxe.ExpectColumnValuesToBeInSet( |
44 | | - column='type', |
45 | | - value_set=['Asylum Seeker', 'Commercial Let', 'Temp Decant', 'Freehold', 'Freehold (Serv)', 'Introductory', |
46 | | - 'Leasehold (RTB)', 'Lse 100% Stair', 'License Temp Ac', 'Mesne Profit Ac', 'Non-Secure', |
47 | | - 'Private Garage', 'Registered Social Landlord', 'RenttoMortgage', 'Secure', 'Shared Owners', |
48 | | - 'Short Life Lse', 'Private Sale LH', 'Shared Equity', 'Tenant Acc Flat', 'Temp B&B', 'Tenant Garage', |
49 | | - 'Temp Hostel Lse', 'Temp Hostel', 'Temp Annex', 'Temp Private Lt', 'Temp Traveller']) |
50 | | -) |
51 | | -suite.add_expectation( |
52 | | - gxe.ExpectColumnValuesToBeInSet( |
53 | | - column='person_type', |
54 | | - value_set=['Tenant', 'HouseholdMember', 'Leaseholder', 'Freeholder', 'Occupant', 'HousingOfficer', |
55 | | - 'HousingAreaManager']) |
56 | | -) |
57 | | -suite.add_expectation( |
58 | | - gxe.ExpectColumnValuesToBeInSet( |
59 | | - column='preferredtitle', |
60 | | - value_set=['Dr', 'Master', 'Miss', 'Mr', 'Mrs', 'Ms', 'Other', 'Rabbi', 'Reverend']) |
61 | | -) |
62 | | -suite.add_expectation( |
63 | | - gxe.ExpectColumnValuesToBeUnique( |
64 | | - column='person_id') |
65 | | -) |
66 | | -suite.add_expectation( |
67 | | - gxe.ExpectColumnValuesToNotBeNull( |
68 | | - column='person_id') |
69 | | -) |
70 | | -suite.add_expectation( |
71 | | - gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord( |
72 | | - column_list=['person_id', 'propertyreference']) |
73 | | -) |
74 | | -suite.add_expectation( |
75 | | - gxe.ExpectColumnValuesToNotBeNull( |
76 | | - column='propertyreference') |
77 | | -) |
78 | | -suite.add_expectation( |
79 | | - gxe.ExpectSelectColumnValuesToBeUniqueWithinRecord( |
80 | | - column_list=['person_id', 'paymentreference']) |
81 | | -) |
82 | | -suite.add_expectation( |
83 | | - gxe.ExpectColumnValuesToNotBeNull( |
84 | | - column='uprn') |
85 | | -) |
86 | | -suite.add_expectation( |
87 | | - gxe.ExpectColumnValuesToNotBeNull( |
88 | | - column='dateofbirth_parsed') |
89 | | -) |
90 | | -suite.add_expectation( |
91 | | - gxe.ExpectColumnValuesToBeBetween( |
92 | | - column='dateofbirth_parsed', |
93 | | - min_value=datetime(1900, 1, 1, 0, 0, 0).isoformat(), |
94 | | - max_value=datetime.today().isoformat() |
95 | | - ) |
96 | | -) |
| 107 | +suite.add_expectation(ExpectFirstNameColumnValueLength()) |
| 108 | +suite.add_expectation(ExpectSurnameColumnValueLength()) |
| 109 | +suite.add_expectation(ExpectUPRNColumnValueLengthsBetween()) |
| 110 | +suite.add_expectation(ExpectUPRNColumnValuesToMatchRegex()) |
| 111 | +suite.add_expectation(ExpectUPRNNotToBeNull()) |
| 112 | +suite.add_expectation(ExpectPersonTypeValuesToBeInSet()) |
| 113 | +suite.add_expectation(ExpectPreferredTitleValuesToBeInSet()) |
| 114 | +suite.add_expectation(ExpectPersonIDColumnValuesToBeUnique()) |
| 115 | +suite.add_expectation(ExpectPersonIDColumnValuesToNotBeNull()) |
| 116 | +suite.add_expectation(ExpectPersonIDAndPropertyReferenceColumnValuesToBeUniqueWithinRecord()) |
| 117 | +suite.add_expectation(ExpectPropertyRefColumnValuesToNotBeNull()) |
| 118 | +suite.add_expectation(ExpectPersonIDAndPaymentReferenceColumnValuesToBeUniqueWithinRecord()) |
| 119 | +suite.add_expectation(ExpectUPRNColumnValuesToNotBeNull()) |
| 120 | +suite.add_expectation(ExpectDateOfBirthColumnValuesToNotBeNull()) |
| 121 | +suite.add_expectation(ExpectDateOfBirthToBeBetween()) |
97 | 122 |
|
98 | 123 | suite = context.suites.add(suite) |
0 commit comments