Skip to content

Commit 566d3b2

Browse files
authored
Merge pull request #77 from datakind/DKW-718-revert-entity-id-to-be-unique-string-and-not-uuid
Dkw 718 revert entity id to be unique string and not UUID
2 parents c955ebb + 5f2dd83 commit 566d3b2

17 files changed

+661
-769
lines changed

db/dot/1-schema.sql

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ CREATE TABLE IF NOT EXISTS dot.entity_categories (
7979

8080
CREATE TABLE IF NOT EXISTS dot.configured_entities (
8181
project_id VARCHAR(300) NOT NULL,
82-
entity_id UUID,
83-
entity_name VARCHAR(300),
82+
entity_id VARCHAR(300),
8483
entity_category VARCHAR(300),
8584
entity_definition VARCHAR(4096),
8685
date_added TIMESTAMP WITH TIME ZONE NOT NULL,
@@ -104,7 +103,7 @@ CREATE TABLE IF NOT EXISTS dot.configured_tests(
104103
description VARCHAR(1000) NOT NULL,
105104
impact VARCHAR(1000) NULL,
106105
proposed_remediation VARCHAR(1000) NULL,
107-
entity_id UUID NOT NULL references dot.configured_entities on update cascade,
106+
entity_id VARCHAR(300) NOT NULL references dot.configured_entities on update cascade,
108107
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
109108
column_name VARCHAR(300) NULL,
110109
column_description VARCHAR(1000) NULL,
@@ -140,7 +139,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
140139
test_result_id VARCHAR(300) NOT NULL,
141140
run_id UUID,
142141
test_id UUID references dot.configured_tests on update cascade,
143-
entity_id UUID,
142+
entity_id VARCHAR(300),
144143
status TEXT,
145144
view_name VARCHAR(300) NULL,
146145
id_column_name TEXT,
@@ -157,7 +156,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
157156
CREATE TABLE IF NOT EXISTS dot.test_results_summary (
158157
run_id UUID,
159158
test_id UUID references dot.configured_tests on update cascade,
160-
entity_id UUID references dot.configured_entities on update cascade,
159+
entity_id VARCHAR(300) references dot.configured_entities on update cascade,
161160
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
162161
column_name VARCHAR(300) NULL,
163162
test_parameters VARCHAR(1000) NULL,
@@ -253,8 +252,6 @@ AS $$
253252
declare
254253
KEY_STRING text;
255254
BEGIN
256-
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
257-
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
258255
new.date_added := NOW();
259256
new.date_modified := NOW();
260257
RETURN NEW;
@@ -267,11 +264,9 @@ LANGUAGE plpgsql
267264
AS $$
268265
declare
269266
KEY_STRING text;
270-
OLD_ENTITY_ID uuid;
271-
NEW_ENTITY_ID uuid;
267+
OLD_ENTITY_ID VARCHAR(300);
268+
NEW_ENTITY_ID VARCHAR(300);
272269
BEGIN
273-
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
274-
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
275270
new.date_modified := NOW();
276271
RETURN NEW;
277272
END;

db/dot/4-upload_sample_dot_data.sql

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,66 +6,66 @@ INSERT INTO dot.entity_categories VALUES('ZAG', 'Zagreb airport flights');
66
INSERT INTO dot.entity_categories VALUES('ETH', 'Ethiopian Airlines');
77

88
-- configured entities - db views of the data we want to scan
9-
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
9+
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
1010
{% set schema = <schema> %}
1111
select *
1212
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');
1313

14-
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
14+
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
1515
{% set schema = <schema> %}
1616
select *
1717
from {{ schema }}.flight_data WHERE origin_airport=''Zagreb airport'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');
1818

19-
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
19+
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
2020
{% set schema = <schema> %}
2121
select *
2222
from {{ schema }}.flight_data WHERE airline=''Ethiopian Airlines'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');
2323

24-
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
24+
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
2525
{% set schema = <schema> %}
2626
select *
2727
from {{ schema }}.airport_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');
2828

29-
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
29+
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
3030
{% set schema = <schema> %}
3131
select DISTINCT airline
3232
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');
3333

3434

3535
-- Note these UUIDs get reset by the trigger
3636
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '549c0575-e64c-3605-85a9-70356a23c4d2', 'MISSING-1', 3,
37-
'Origin airport is not null', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_null', 'origin_airport', '',
37+
'Origin airport is not null', '', '', 'all_flight_data', 'not_null', 'origin_airport', '',
3838
NULL, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');
3939

4040
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '8aca2bee-9e95-3f8a-90e9-153714e05367', 'INCONSISTENT-1',
41-
5, 'Price is not negative', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_negative_string_column', 'price', '',
41+
5, 'Price is not negative', '', '', 'all_flight_data', 'not_negative_string_column', 'price', '',
4242
'{"name": "price"}', '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');
4343

4444
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '52d7352e-56ee-3084-9c67-e5ab24afc3a3', 'DUPLICATE-1',
45-
3, 'Airport not unique', '', '', '7b689796-afde-3930-87be-ed8b7c7a0474', 'unique', 'airport', '', NULL,
45+
3, 'Airport not unique', '', '', 'all_airports_data', 'unique', 'airport', '', NULL,
4646
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');
4747

4848
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '935e6b61-b664-3eab-9d67-97c2c9c2bec0', 'INCONSISTENT-1',
49-
3, 'Disallowed FP methods entered in form', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'accepted_values', 'stops',
49+
3, 'Disallowed FP methods entered in form', '', '', 'all_flight_data', 'accepted_values', 'stops',
5050
'', $${"values": [ "1", "2", "3", "Non-stop"]}$$, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');
5151

5252
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f08', 'ASSESS-1', 3,
53-
'Flight with no airport record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'relationships', 'origin_airport',
53+
'Flight with no airport record', '', '', 'all_flight_data', 'relationships', 'origin_airport',
5454
'', $${"name": "flight_with_no_airport", "to": "ref('dot_model__all_airports_data')", "field": "airport"}$$,
5555
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');
5656

5757
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f18', 'BIAS-1', 6,
58-
'Price outlier airlines', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'expect_similar_means_across_reporters',
58+
'Price outlier airlines', '', '', 'all_flight_data', 'expect_similar_means_across_reporters',
5959
'price', '', $${"key": "airline","quantity": "price","data_table": "dot_model__all_flight_data","id_column": "airline",
6060
"target_table":"dot_model__airlines_data"}$$, '2022-01-19 20:00:00.000 -0500', '2022-01-19 20:00:00.000 -0500', 'Matt');
6161

6262
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '36d33837-bd92-370a-963a-264a4d5b2bac', 'DUPLICATE-1',
63-
6, 'Duplicate flight record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'possible_duplicate_forms', '', '',
63+
6, 'Duplicate flight record', '', '', 'all_flight_data', 'possible_duplicate_forms', '', '',
6464
$${"table_specific_reported_date": "departure_time", "table_specific_patient_uuid": "airline", "table_specific_uuid":
6565
"uuid", "table_specific_period": "day"}$$, '2021-12-23 19:00:00.000 -0500', '2022-03-21 19:00:00.000 -0500', 'Matt');
6666

6767
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'c4a3da8f-32f4-4e9b-b135-354de203ca90', 'TREAT-1',
68-
5, 'Number of stops has a reasonible value', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'custom_sql', '', '',
68+
5, 'Number of stops has a reasonible value', '', '', 'all_flight_data', 'custom_sql', '', '',
6969
format('{%s: %s}',
7070
to_json('query'::text),
7171
to_json($query$

docker/appsmith/DOT App V2.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

dot/self_tests/data/expected/extract_df_from_dbt_test_results_json.csv

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
,run_id,test_id,entity_id,test_type,column_name,id_column_name,test_parameters,test_status,test_status_message,failed_tests_view,failed_tests_view_sql
2-
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,dac4c545-f610-3dae-ad82-1ddf27dae144,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
2+
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,cad13f73-27b5-3427-be8f-4d213bba3b19,all_flight_data,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
33
SELECT dot_model__all_flight_data.stops AS value_field,
44
count(*) AS n_records
55
FROM self_tests_public_tests.dot_model__all_flight_data
@@ -9,11 +9,12 @@ test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_
99
all_values.n_records
1010
FROM all_values
1111
WHERE all_values.value_field::text <> ALL (ARRAY['1'::character varying::text, '2'::character varying::text, '3'::character varying::text, 'Non-stop'::character varying::text]);"
12-
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,49aa2fd3-511c-3d84-a782-a5daf57f98da,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
12+
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,ed27037a-4054-3070-9d88-fdf9cd0231c8,all_flight_data,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
1313
FROM self_tests_public_tests.dot_model__all_flight_data
1414
WHERE dot_model__all_flight_data.price::character varying::text ~~ '-%'::text
1515
HAVING count(*) > 0;"
16-
test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,983a5746-bea7-3072-9a80-2c1c6706ceed,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
16+
17+
test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,df44c2f4-65f8-3170-a03f-6035aaa45660,all_flight_data,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
1718
dot_model__all_flight_data.departure_time,
1819
dot_model__all_flight_data.airline,
1920
dot_model__all_flight_data.origin_airport,
@@ -24,15 +25,17 @@ test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4
2425
dot_model__all_flight_data.price
2526
FROM self_tests_public_tests.dot_model__all_flight_data
2627
WHERE dot_model__all_flight_data.origin_airport IS NULL;"
27-
test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,7aa26bda-57e1-39b8-a3e9-979a3d882577,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
28+
29+
test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,2ba7f3e8-cd62-37ac-854f-01f704489130,all_flight_data,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
2830
FROM ( SELECT dot_model__all_flight_data.uuid AS from_uuid,
2931
dot_model__all_flight_data.origin_airport AS from_column_id
3032
FROM self_tests_public_tests.dot_model__all_flight_data) from_model
3133
LEFT JOIN ( SELECT dot_model__all_airports_data.airport AS to_id
3234
FROM self_tests_public_tests.dot_model__all_airports_data) to_model ON to_model.to_id::text = from_model.from_column_id::text
3335
WHERE from_model.from_column_id IS NOT NULL AND to_model.to_id IS NULL
3436
HAVING count(*) > 0;"
35-
test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,aa1c361c-a9ba-350e-9959-e92a5654f7dc,7b689796-afde-3930-87be-ed8b7c7a0474,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
37+
38+
test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,942f4496-1202-3768-9cfe-96128bcd754c,all_airports_data,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
3639
count(*) AS n_records
3740
FROM self_tests_public_tests.dot_model__all_airports_data
3841
WHERE dot_model__all_airports_data.airport IS NOT NULL

0 commit comments

Comments
 (0)