Skip to content
This repository was archived by the owner on Sep 2, 2025. It is now read-only.

Commit b59a087

Browse files
committed
Add tests for data profile scan
1 parent 7f80a97 commit b59a087

File tree

1 file changed

+371
-0
lines changed

1 file changed

+371
-0
lines changed
Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
import os
2+
import pytest
3+
import yaml
4+
from unittest.mock import patch, MagicMock
5+
from dbt.adapters.bigquery.relation import BigQueryRelation
6+
from dbt.tests.util import run_dbt, get_connection, relation_from_name, read_file, write_config_file
7+
8+
SCAN_LOCATION = "us-central1"
9+
SCAN_ID = "bigquery_data_profile_scan_test"
10+
MODEL_NAME = "test_model"
11+
12+
ORIGINAL_LABELS = {
13+
"my_label_key": "my_label_value",
14+
}
15+
16+
PROFILE_SCAN_LABELS = [
17+
"dataplex-dp-published-scan",
18+
"dataplex-dp-published-project",
19+
"dataplex-dp-published-location",
20+
]
21+
22+
SQL_CONTENT = """
23+
{{
24+
config(
25+
materialized="table"
26+
)
27+
}}
28+
select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all
29+
select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
30+
"""
31+
32+
YAML_CONTENT = f"""version: 2
33+
models:
34+
- name: {MODEL_NAME}
35+
"""
36+
37+
YAML_CONTENT_WITH_PROFILE_SCAN_SETTING = f"""version: 2
38+
models:
39+
- name: {MODEL_NAME}
40+
config:
41+
data_profile_scan:
42+
location: us-central1
43+
scan_id: yasuhisa-test4
44+
sampling_percent: 10
45+
row_filter: "TRUE"
46+
cron: "CRON_TZ=Asia/New_York 0 9 * * *"
47+
"""
48+
49+
50+
class TestDataProfileScanWithProjectProfileScanSetting:
51+
@pytest.fixture(scope="class")
52+
def project_config_update(self):
53+
return {
54+
"models": {
55+
"+labels": ORIGINAL_LABELS,
56+
"+data_profile_scan": {
57+
"location": SCAN_LOCATION,
58+
"scan_id": SCAN_ID,
59+
"sampling_percent": 10,
60+
"row_filter": "TRUE",
61+
},
62+
},
63+
}
64+
65+
@pytest.fixture(scope="class")
66+
def models(self):
67+
return {
68+
f"{MODEL_NAME}.sql": SQL_CONTENT,
69+
f"{MODEL_NAME}.yml": YAML_CONTENT,
70+
}
71+
72+
def test_create_data_profile_scan(self, project):
73+
with patch(
74+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
75+
) as MockDataScanClient:
76+
mock_data_scan_client = MockDataScanClient.return_value
77+
78+
results = run_dbt()
79+
assert len(results) == 1
80+
81+
mock_data_scan_client.create_data_scan.assert_called_once()
82+
mock_data_scan_client.run_data_scan.assert_called_once()
83+
84+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
85+
with get_connection(project.adapter) as conn:
86+
table = conn.handle.get_table(
87+
project.adapter.connections.get_bq_table(
88+
relation.database, relation.schema, relation.table
89+
)
90+
)
91+
labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys())
92+
assert set(table.labels.keys()) == set(labels_to_be_created)
93+
94+
95+
class TestDataProfileScanWithProjectProfileScanSettingAndCron:
96+
@pytest.fixture(scope="class")
97+
def project_config_update(self):
98+
return {
99+
"models": {
100+
"+labels": ORIGINAL_LABELS,
101+
"+data_profile_scan": {
102+
"location": SCAN_LOCATION,
103+
"scan_id": SCAN_ID,
104+
"sampling_percent": 10,
105+
"row_filter": "TRUE",
106+
"cron": "CRON_TZ=Asia/New_York 0 9 * * *",
107+
},
108+
},
109+
}
110+
111+
@pytest.fixture(scope="class")
112+
def models(self):
113+
return {
114+
f"{MODEL_NAME}.sql": SQL_CONTENT,
115+
f"{MODEL_NAME}.yml": YAML_CONTENT,
116+
}
117+
118+
def test_create_data_profile_scan(self, project):
119+
with patch(
120+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
121+
) as MockDataScanClient:
122+
mock_data_scan_client = MockDataScanClient.return_value
123+
124+
results = run_dbt()
125+
assert len(results) == 1
126+
127+
mock_data_scan_client.create_data_scan.assert_called_once()
128+
mock_data_scan_client.run_data_scan.assert_not_called()
129+
130+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
131+
with get_connection(project.adapter) as conn:
132+
table = conn.handle.get_table(
133+
project.adapter.connections.get_bq_table(
134+
relation.database, relation.schema, relation.table
135+
)
136+
)
137+
labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys())
138+
assert set(table.labels.keys()) == set(labels_to_be_created)
139+
140+
141+
class TestDataProfileScanWithModelProfileScanSetting:
142+
@pytest.fixture(scope="class")
143+
def models(self):
144+
sql_content = f"""
145+
{{{{
146+
config(
147+
materialized="table",
148+
labels={ORIGINAL_LABELS},
149+
)
150+
}}}}
151+
select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all
152+
select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour
153+
"""
154+
155+
return {
156+
f"{MODEL_NAME}.sql": sql_content,
157+
f"{MODEL_NAME}.yml": YAML_CONTENT_WITH_PROFILE_SCAN_SETTING,
158+
}
159+
160+
def test_create_data_profile_scan(self, project):
161+
with patch(
162+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
163+
) as MockDataScanClient:
164+
mock_data_scan_client = MockDataScanClient.return_value
165+
166+
results = run_dbt()
167+
assert len(results) == 1
168+
169+
mock_data_scan_client.create_data_scan.assert_called_once()
170+
mock_data_scan_client.run_data_scan.assert_not_called()
171+
172+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
173+
with get_connection(project.adapter) as conn:
174+
table = conn.handle.get_table(
175+
project.adapter.connections.get_bq_table(
176+
relation.database, relation.schema, relation.table
177+
)
178+
)
179+
labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys())
180+
assert set(table.labels.keys()) == set(labels_to_be_created)
181+
182+
183+
class TestDataProfileScanWithoutProfileScanSetting:
184+
@pytest.fixture(scope="class")
185+
def models(self):
186+
return {
187+
f"{MODEL_NAME}.sql": SQL_CONTENT,
188+
f"{MODEL_NAME}.yml": YAML_CONTENT,
189+
}
190+
191+
def test_create_data_profile_scan(self, project):
192+
with patch(
193+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
194+
) as MockDataScanClient:
195+
mock_data_scan_client = MockDataScanClient.return_value
196+
197+
results = run_dbt()
198+
assert len(results) == 1
199+
200+
mock_data_scan_client.create_data_scan.assert_not_called()
201+
mock_data_scan_client.run_data_scan.assert_not_called()
202+
203+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
204+
with get_connection(project.adapter) as conn:
205+
table = conn.handle.get_table(
206+
project.adapter.connections.get_bq_table(
207+
relation.database, relation.schema, relation.table
208+
)
209+
)
210+
labels_to_be_created = []
211+
assert set(table.labels.keys()) == set(labels_to_be_created)
212+
213+
214+
class TestDataProfileScanDisabledProfileScanSetting:
215+
@pytest.fixture(scope="class")
216+
def project_config_update(self):
217+
return {
218+
"models": {
219+
"+data_profile_scan": {
220+
"location": SCAN_LOCATION,
221+
"scan_id": SCAN_ID,
222+
"enabled": False,
223+
},
224+
},
225+
}
226+
227+
@pytest.fixture(scope="class")
228+
def models(self):
229+
return {
230+
f"{MODEL_NAME}.sql": SQL_CONTENT,
231+
f"{MODEL_NAME}.yml": YAML_CONTENT,
232+
}
233+
234+
def test_create_data_profile_scan(self, project):
235+
with patch(
236+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
237+
) as MockDataScanClient:
238+
mock_data_scan_client = MockDataScanClient.return_value
239+
240+
results = run_dbt()
241+
assert len(results) == 1
242+
243+
mock_data_scan_client.create_data_scan.assert_not_called()
244+
mock_data_scan_client.run_data_scan.assert_not_called()
245+
246+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
247+
with get_connection(project.adapter) as conn:
248+
table = conn.handle.get_table(
249+
project.adapter.connections.get_bq_table(
250+
relation.database, relation.schema, relation.table
251+
)
252+
)
253+
labels_to_be_created = []
254+
assert set(table.labels.keys()) == set(labels_to_be_created)
255+
256+
257+
class TestDataProfileScanUpdatedMidway:
258+
project_name = "my-project"
259+
@pytest.fixture(scope="class")
260+
def project_config_update(self):
261+
return {
262+
"models": {
263+
"+database": self.project_name,
264+
"+labels": ORIGINAL_LABELS,
265+
"+data_profile_scan": {
266+
"location": SCAN_LOCATION,
267+
"scan_id": SCAN_ID,
268+
"sampling_percent": 10,
269+
"row_filter": "TRUE",
270+
},
271+
},
272+
}
273+
274+
@pytest.fixture(scope="class")
275+
def models(self):
276+
return {
277+
f"{MODEL_NAME}.sql": SQL_CONTENT,
278+
f"{MODEL_NAME}.yml": YAML_CONTENT,
279+
}
280+
281+
def test_create_data_profile_scan(self, project):
282+
with patch(
283+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
284+
) as MockDataScanClient:
285+
mock_data_scan_client = MockDataScanClient.return_value
286+
287+
results = run_dbt()
288+
assert len(results) == 1
289+
290+
mock_data_scan_client.create_data_scan.assert_called_once()
291+
mock_data_scan_client.run_data_scan.assert_called_once()
292+
293+
def list_data_scans_mock(parent):
294+
mock_scan = MagicMock()
295+
mock_scan.name = SCAN_ID
296+
return [mock_scan]
297+
mock_data_scan_client.list_data_scans.side_effect = list_data_scans_mock
298+
299+
project_yml = os.path.join(project.project_root, "dbt_project.yml")
300+
config = yaml.safe_load(read_file(project_yml))
301+
config["models"]["+data_profile_scan"]["sampling_percent"] = None
302+
write_config_file(config, project_yml)
303+
304+
results = run_dbt()
305+
assert len(results) == 1
306+
mock_data_scan_client.update_data_scan.assert_called_once()
307+
308+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
309+
with get_connection(project.adapter) as conn:
310+
table = conn.handle.get_table(
311+
project.adapter.connections.get_bq_table(
312+
relation.database, relation.schema, relation.table
313+
)
314+
)
315+
labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys())
316+
assert set(table.labels.keys()) == set(labels_to_be_created)
317+
318+
319+
class TestDataProfileScanDisabledMidway:
320+
@pytest.fixture(scope="class")
321+
def project_config_update(self):
322+
return {
323+
"models": {
324+
"+labels": ORIGINAL_LABELS,
325+
"+data_profile_scan": {
326+
"location": SCAN_LOCATION,
327+
"scan_id": SCAN_ID,
328+
"sampling_percent": 10,
329+
"row_filter": "TRUE",
330+
},
331+
},
332+
}
333+
334+
@pytest.fixture(scope="class")
335+
def models(self):
336+
return {
337+
f"{MODEL_NAME}.sql": SQL_CONTENT,
338+
f"{MODEL_NAME}.yml": YAML_CONTENT,
339+
}
340+
341+
def test_create_data_profile_scan(self, project):
342+
with patch(
343+
"dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient"
344+
) as MockDataScanClient:
345+
mock_data_scan_client = MockDataScanClient.return_value
346+
347+
results = run_dbt()
348+
assert len(results) == 1
349+
350+
mock_data_scan_client.create_data_scan.assert_called_once()
351+
mock_data_scan_client.run_data_scan.assert_called_once()
352+
353+
# Update the project to disable the data profile scan
354+
project_yml = os.path.join(project.project_root, "dbt_project.yml")
355+
config = yaml.safe_load(read_file(project_yml))
356+
config["models"]["+data_profile_scan"]["enabled"] = False
357+
write_config_file(config, project_yml)
358+
359+
results = run_dbt()
360+
assert len(results) == 1
361+
mock_data_scan_client.delete_data_scan.assert_called_once()
362+
363+
relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME)
364+
with get_connection(project.adapter) as conn:
365+
table = conn.handle.get_table(
366+
project.adapter.connections.get_bq_table(
367+
relation.database, relation.schema, relation.table
368+
)
369+
)
370+
labels_to_be_created = list(ORIGINAL_LABELS.keys())
371+
assert set(table.labels.keys()) == set(labels_to_be_created)

0 commit comments

Comments
 (0)