Skip to content

Commit 9ac1d91

Browse files
authored
Merge pull request #1697 from elementary-data/ele-3608-test-metadata-in-the-report
Test models in the report
2 parents b42805a + 6511879 commit 9ac1d91

File tree

15 files changed

+498265
-218362
lines changed

15 files changed

+498265
-218362
lines changed

.github/workflows/test-warehouse.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,8 @@ jobs:
210210
--aws-access-key-id "$AWS_ACCESS_KEY_ID"
211211
--aws-secret-access-key "$AWS_SECRET_ACCESS_KEY"
212212
--s3-bucket-name elementary-ci-artifacts
213-
--google-service-account-path /tmp/gcs_keyfile.json
214-
--gcs-bucket-name elementary_ci_artifacts
213+
# --google-service-account-path /tmp/gcs_keyfile.json
214+
# --gcs-bucket-name elementary_ci_artifacts
215215
--azure-connection-string "$AZURE_CONNECTION_STRING"
216216
--azure-container-name reports
217217
--update-bucket-website true

elementary/monitor/api/report/report.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@
2828
from elementary.monitor.api.source_freshnesses.source_freshnesses import (
2929
SourceFreshnessesAPI,
3030
)
31-
from elementary.monitor.api.tests.schema import TestResultSchema, TestRunSchema
31+
from elementary.monitor.api.tests.schema import (
32+
TestResultSchema,
33+
TestRunSchema,
34+
TestSchema,
35+
)
3236
from elementary.monitor.api.tests.tests import TestsAPI
3337
from elementary.monitor.api.totals_schema import TotalsSchema
3438
from elementary.monitor.data_monitoring.schema import SelectorFilterSchema
@@ -103,6 +107,7 @@ def get_report_data(
103107
)
104108
coverages = models_api.get_test_coverages()
105109

110+
tests = tests_api.get_tests()
106111
test_invocation = invocations_api.get_test_invocation_from_filter(filter)
107112

108113
test_results = tests_api.get_test_results(
@@ -149,6 +154,7 @@ def get_report_data(
149154
"totals"
150155
]
151156
serializable_models_coverages = self._serialize_coverages(coverages)
157+
serializable_tests = self._serialize_tests(tests)
152158
serializable_test_results = self._serialize_test_results(union_test_results)
153159
serializable_test_results_totals = self._serialize_totals(
154160
test_results_totals
@@ -175,6 +181,7 @@ def get_report_data(
175181
days_back=days_back,
176182
models=serializable_models,
177183
groups=serializable_groups,
184+
tests=serializable_tests,
178185
invocation=serializable_invocation,
179186
test_results=serializable_test_results,
180187
test_results_totals=serializable_test_results_totals,
@@ -230,6 +237,12 @@ def _serialize_test_results(
230237
)
231238
return serializable_test_results
232239

240+
def _serialize_tests(self, tests: Dict[str, TestSchema]) -> Dict[str, dict]:
241+
serializable_tests = dict()
242+
for key, test in tests.items():
243+
serializable_tests[key] = test.dict()
244+
return serializable_tests
245+
233246
def _serialize_test_runs(
234247
self,
235248
test_runs: Dict[str, List[Union[TestRunSchema, SourceFreshnessRunSchema]]],

elementary/monitor/api/report/schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class ReportDataSchema(BaseModel):
1515
models: dict = dict()
1616
groups: dict = dict()
1717
invocation: dict = dict()
18+
tests: dict = dict()
1819
test_results: dict = dict()
1920
test_results_totals: dict = dict()
2021
test_runs: dict = dict()

elementary/monitor/api/tests/schema.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,31 @@ class TestMetadataSchema(BaseModel):
6767
normalized_full_path: Optional[str] = None
6868

6969

70+
class TestSchema(BaseModel):
71+
unique_id: str
72+
model_unique_id: Optional[str] = None
73+
table_unique_id: Optional[str] = None
74+
database_name: Optional[str] = None
75+
schema_name: str
76+
table_name: Optional[str] = None
77+
column_name: Optional[str] = None
78+
name: str
79+
display_name: str
80+
original_path: Optional[str] = None
81+
type: str
82+
test_type: Optional[str] = None
83+
test_sub_type: Optional[str] = None
84+
test_params: dict
85+
description: Optional[str] = None
86+
configuration: dict
87+
tags: List[str] = Field(default_factory=list)
88+
normalized_full_path: Optional[str] = None
89+
created_at: Optional[str] = None
90+
latest_run_time: Optional[str] = None
91+
latest_run_time_utc: Optional[str] = None
92+
latest_run_status: Optional[str] = None
93+
94+
7095
class TestResultSchema(BaseModel):
7196
metadata: TestMetadataSchema
7297
test_results: Union[DbtTestResultSchema, ElementaryTestResultSchema]

elementary/monitor/api/tests/tests.py

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import re
22
import statistics
33
from collections import defaultdict
4-
from typing import Any, DefaultDict, Dict, List, Optional, Union, cast
4+
from typing import DefaultDict, Dict, List, Optional, Union, cast
55

66
from dateutil import tz
77

@@ -16,11 +16,19 @@
1616
TestResultSchema,
1717
TestResultSummarySchema,
1818
TestRunSchema,
19+
TestSchema,
20+
)
21+
from elementary.monitor.api.tests.utils import (
22+
get_display_name,
23+
get_normalized_full_path,
24+
get_table_full_name,
25+
get_test_configuration,
1926
)
2027
from elementary.monitor.api.totals_schema import TotalsSchema
2128
from elementary.monitor.data_monitoring.schema import SelectorFilterSchema
2229
from elementary.monitor.fetchers.tests.schema import (
2330
NormalizedTestSchema,
31+
TestDBRowSchema,
2432
TestResultDBRowSchema,
2533
)
2634
from elementary.monitor.fetchers.tests.tests import TestsFetcher
@@ -130,6 +138,13 @@ def _get_test_subscribers(test_meta: dict, model_meta: dict) -> List[str]:
130138
def get_singular_tests(self) -> List[NormalizedTestSchema]:
131139
return self.tests_fetcher.get_singular_tests()
132140

141+
def get_tests(self) -> Dict[str, TestSchema]:
142+
tests_db_rows = self.tests_fetcher.get_tests()
143+
return {
144+
test_db_row.unique_id: self._parse_test_db_row(test_db_row)
145+
for test_db_row in tests_db_rows
146+
}
147+
133148
def get_test_results(
134149
self,
135150
invocation_id: Optional[str],
@@ -304,26 +319,17 @@ def _parse_affected_row(results_description: str) -> Optional[int]:
304319
def _get_test_metadata_from_test_result_db_row(
305320
test_result_db_row: TestResultDBRowSchema,
306321
) -> TestMetadataSchema:
307-
test_display_name = (
308-
test_result_db_row.test_name.replace("_", " ").title()
309-
if test_result_db_row.test_name
310-
else ""
311-
)
322+
test_display_name = get_display_name(test_result_db_row.test_name)
312323
detected_at_datetime = convert_utc_iso_format_to_datetime(
313324
test_result_db_row.detected_at
314325
)
315326
detected_at_utc = detected_at_datetime
316327
detected_at = detected_at_datetime.astimezone(tz.tzlocal())
317-
table_full_name_parts = [
318-
name
319-
for name in [
320-
test_result_db_row.database_name,
321-
test_result_db_row.schema_name,
322-
test_result_db_row.table_name,
323-
]
324-
if name
325-
]
326-
table_full_name = ".".join(table_full_name_parts).lower()
328+
table_full_name = get_table_full_name(
329+
test_result_db_row.database_name,
330+
test_result_db_row.schema_name,
331+
test_result_db_row.table_name,
332+
)
327333
test_params = test_result_db_row.test_params
328334
test_query = (
329335
test_result_db_row.test_results_query.strip()
@@ -336,23 +342,11 @@ def _get_test_metadata_from_test_result_db_row(
336342
result_query=test_query,
337343
)
338344

339-
configuration: Dict[str, Any]
340-
if test_result_db_row.test_type == "dbt_test":
341-
configuration = dict(
342-
test_name=test_result_db_row.test_name,
343-
test_params=test_params,
344-
)
345-
else:
346-
time_bucket_configuration = test_params.get("time_bucket", {})
347-
time_bucket_count = time_bucket_configuration.get("count", 1)
348-
time_bucket_period = time_bucket_configuration.get("period", "day")
349-
configuration = dict(
350-
test_name=test_result_db_row.test_name,
351-
timestamp_column=test_params.get("timestamp_column"),
352-
testing_timeframe=f"{time_bucket_count} {time_bucket_period}{'s' if time_bucket_count > 1 else ''}",
353-
anomaly_threshold=test_params.get("sensitivity")
354-
or test_params.get("anomaly_sensitivity"),
355-
)
345+
configuration = get_test_configuration(
346+
test_type=test_result_db_row.test_type,
347+
name=test_result_db_row.test_name,
348+
test_params=test_params,
349+
)
356350

357351
return TestMetadataSchema(
358352
test_unique_id=test_result_db_row.test_unique_id,
@@ -378,7 +372,56 @@ def _get_test_metadata_from_test_result_db_row(
378372
result=result,
379373
configuration=configuration,
380374
test_tags=test_result_db_row.test_tags,
381-
normalized_full_path=test_result_db_row.normalized_full_path,
375+
normalized_full_path=get_normalized_full_path(
376+
test_result_db_row.package_name, test_result_db_row.original_path
377+
),
378+
)
379+
380+
@classmethod
381+
def _parse_test_db_row(cls, test_db_row: TestDBRowSchema) -> TestSchema:
382+
latest_run_datetime = (
383+
convert_utc_iso_format_to_datetime(test_db_row.latest_run_time)
384+
if test_db_row.latest_run_time
385+
else None
386+
)
387+
388+
return TestSchema(
389+
unique_id=test_db_row.unique_id,
390+
model_unique_id=test_db_row.model_unique_id,
391+
table_unique_id=get_table_full_name(
392+
test_db_row.database_name,
393+
test_db_row.schema_name,
394+
test_db_row.table_name,
395+
),
396+
database_name=test_db_row.database_name,
397+
schema_name=test_db_row.schema_name,
398+
table_name=test_db_row.table_name,
399+
column_name=test_db_row.column_name,
400+
name=test_db_row.name,
401+
display_name=get_display_name(test_db_row.name),
402+
original_path=test_db_row.original_path,
403+
type=test_db_row.type,
404+
test_type=test_db_row.test_type,
405+
test_sub_type=test_db_row.test_sub_type,
406+
test_params=test_db_row.test_params,
407+
description=test_db_row.meta.get("description"),
408+
configuration=get_test_configuration(
409+
test_db_row.test_type, test_db_row.name, test_db_row.test_params
410+
),
411+
tags=list(set(test_db_row.tags + test_db_row.model_tags)),
412+
normalized_full_path=get_normalized_full_path(
413+
test_db_row.package_name, test_db_row.original_path
414+
),
415+
created_at=test_db_row.created_at if test_db_row.created_at else None,
416+
latest_run_time=latest_run_datetime.isoformat()
417+
if latest_run_datetime
418+
else None,
419+
latest_run_time_utc=latest_run_datetime.astimezone(tz.tzlocal()).isoformat()
420+
if latest_run_datetime
421+
else None,
422+
latest_run_status=test_db_row.latest_run_status
423+
if test_db_row.latest_run_status
424+
else None,
382425
)
383426

384427
@staticmethod
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from typing import Any, Dict, Optional
2+
3+
4+
def get_table_full_name(
5+
database_name: Optional[str],
6+
schema_name: Optional[str],
7+
table_name: Optional[str],
8+
) -> str:
9+
if not table_name:
10+
return ""
11+
12+
table_full_name_parts = [
13+
name
14+
for name in [
15+
database_name,
16+
schema_name,
17+
table_name,
18+
]
19+
if name
20+
]
21+
table_full_name = ".".join(table_full_name_parts).lower()
22+
return table_full_name
23+
24+
25+
def get_display_name(name: str) -> str:
26+
return name.replace("_", " ").title()
27+
28+
29+
def get_test_configuration(
30+
test_type: Optional[str], name: str, test_params: Dict
31+
) -> Dict[str, Any]:
32+
if test_type is None:
33+
return dict()
34+
if test_type == "dbt_test":
35+
return dict(
36+
test_name=name,
37+
test_params=test_params,
38+
)
39+
else:
40+
time_bucket_configuration = test_params.get("time_bucket", {})
41+
time_bucket_count = time_bucket_configuration.get("count", 1)
42+
time_bucket_period = time_bucket_configuration.get("period", "day")
43+
return dict(
44+
test_name=name,
45+
timestamp_column=test_params.get("timestamp_column"),
46+
testing_timeframe=f"{time_bucket_count} {time_bucket_period}{'s' if time_bucket_count > 1 else ''}",
47+
anomaly_threshold=test_params.get("sensitivity")
48+
or test_params.get("anomaly_sensitivity"),
49+
)
50+
51+
52+
def get_normalized_full_path(
53+
package_name: Optional[str], original_path: Optional[str]
54+
) -> Optional[str]:
55+
if not original_path:
56+
return None
57+
if package_name:
58+
return f"{package_name}/{original_path}"
59+
return original_path

elementary/monitor/dbt_project/macros/base_queries/tests.sql

Lines changed: 0 additions & 56 deletions
This file was deleted.

0 commit comments

Comments
 (0)