Skip to content

Commit 6dacc88

Browse files
authored
fix: Databricks - quote dataset in metadata query (#2578)
1 parent 3bd798b commit 6dacc88

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

soda-databricks/src/soda_databricks/common/data_sources/databricks_data_source.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,12 @@ def _get_data_type_name_synonyms(self) -> list[list[str]]:
227227
def build_columns_metadata_query_str(self, table_namespace: DataSourceNamespace, table_name: str) -> str:
228228
database_name: str | None = table_namespace.get_database_for_metadata_query()
229229
schema_name: str = table_namespace.get_schema_for_metadata_query()
230-
return f"DESCRIBE {database_name}.{schema_name}.{table_name}"
230+
231+
fully_qualified_name = self.qualify_dataset_name(
232+
dataset_prefix=[database_name, schema_name], dataset_name=table_name
233+
)
234+
235+
return f"DESCRIBE {fully_qualified_name}"
231236

232237
def build_column_metadatas_from_query_result(self, query_result: QueryResult) -> list[ColumnMetadata]:
233238
# Filter out dataset description rows (first such line starts with #, ignore the rest) or empty

soda-databricks/tests/data_sources/test_databricks.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
import os
22

33
import pytest
4+
from helpers.data_source_test_helper import DataSourceTestHelper
5+
from helpers.mock_soda_cloud import MockResponse
46
from helpers.test_connection import TestConnection
7+
from helpers.test_table import TestTableSpecification
8+
9+
test_table_specification = (
10+
TestTableSpecification.builder()
11+
.table_purpose("1-schema_databricks-special-chars")
12+
.column_varchar("id-1")
13+
.column_integer("2-size")
14+
.column_date("/+created")
15+
.build()
16+
)
517

618
DATABRICKS_HOST = os.getenv("DATABRICKS_HOST")
719
DATABRICKS_HTTP_PATH = os.getenv("DATABRICKS_HTTP_PATH")
@@ -61,3 +73,33 @@
6173
@pytest.mark.parametrize("test_connection", test_connections, ids=[tc.test_name for tc in test_connections])
6274
def test_databricks_connections(test_connection: TestConnection):
6375
test_connection.test()
76+
77+
78+
def test_databricks_schema_check_special_chars(data_source_test_helper: DataSourceTestHelper):
79+
test_table = data_source_test_helper.ensure_test_table(test_table_specification)
80+
data_source_test_helper.enable_soda_cloud_mock(
81+
[
82+
MockResponse(status_code=200, json_object={"fileId": "a81bc81b-dead-4e5d-abff-90865d1e13b1"}),
83+
]
84+
)
85+
86+
data_source_test_helper.assert_contract_pass(
87+
test_table=test_table,
88+
contract_yaml_str=f"""
89+
checks:
90+
- schema:
91+
columns:
92+
- name: id-1
93+
data_type: {test_table.data_type('id-1')}
94+
- name: 2-size
95+
data_type: {test_table.data_type('2-size')}
96+
- name: /+created
97+
""",
98+
)
99+
100+
soda_core_insert_scan_results_command = data_source_test_helper.soda_cloud.requests[1].json
101+
check_json: dict = soda_core_insert_scan_results_command["checks"][0]
102+
schema_diagnostics: dict = check_json["diagnostics"]["v4"]
103+
assert schema_diagnostics["type"] == "schema"
104+
assert set([c["name"] for c in schema_diagnostics["actual"]]) == {"id-1", "2-size", "/+created"}
105+
assert set([c["name"] for c in schema_diagnostics["expected"]]) == {"id-1", "2-size", "/+created"}

0 commit comments

Comments
 (0)