diff --git a/soda-databricks/src/soda_databricks/common/data_sources/databricks_data_source.py b/soda-databricks/src/soda_databricks/common/data_sources/databricks_data_source.py index 733052729..7e75c2c62 100644 --- a/soda-databricks/src/soda_databricks/common/data_sources/databricks_data_source.py +++ b/soda-databricks/src/soda_databricks/common/data_sources/databricks_data_source.py @@ -227,7 +227,12 @@ def _get_data_type_name_synonyms(self) -> list[list[str]]: def build_columns_metadata_query_str(self, table_namespace: DataSourceNamespace, table_name: str) -> str: database_name: str | None = table_namespace.get_database_for_metadata_query() schema_name: str = table_namespace.get_schema_for_metadata_query() - return f"DESCRIBE {database_name}.{schema_name}.{table_name}" + + fully_qualified_name = self.qualify_dataset_name( + dataset_prefix=[database_name, schema_name], dataset_name=table_name + ) + + return f"DESCRIBE {fully_qualified_name}" def build_column_metadatas_from_query_result(self, query_result: QueryResult) -> list[ColumnMetadata]: # Filter out dataset description rows (first such line starts with #, ignore the rest) or empty diff --git a/soda-databricks/tests/data_sources/test_databricks.py b/soda-databricks/tests/data_sources/test_databricks.py index 1f3e58ec6..04d81b0e2 100644 --- a/soda-databricks/tests/data_sources/test_databricks.py +++ b/soda-databricks/tests/data_sources/test_databricks.py @@ -1,7 +1,19 @@ import os import pytest +from helpers.data_source_test_helper import DataSourceTestHelper +from helpers.mock_soda_cloud import MockResponse from helpers.test_connection import TestConnection +from helpers.test_table import TestTableSpecification + +test_table_specification = ( + TestTableSpecification.builder() + .table_purpose("1-schema_databricks-special-chars") + .column_varchar("id-1") + .column_integer("2-size") + .column_date("/+created") + .build() +) DATABRICKS_HOST = os.getenv("DATABRICKS_HOST") DATABRICKS_HTTP_PATH = os.getenv("DATABRICKS_HTTP_PATH") @@ -61,3 +73,33 @@ @pytest.mark.parametrize("test_connection", test_connections, ids=[tc.test_name for tc in test_connections]) def test_databricks_connections(test_connection: TestConnection): test_connection.test() + + +def test_databricks_schema_check_special_chars(data_source_test_helper: DataSourceTestHelper): + test_table = data_source_test_helper.ensure_test_table(test_table_specification) + data_source_test_helper.enable_soda_cloud_mock( + [ + MockResponse(status_code=200, json_object={"fileId": "a81bc81b-dead-4e5d-abff-90865d1e13b1"}), + ] + ) + + data_source_test_helper.assert_contract_pass( + test_table=test_table, + contract_yaml_str=f""" + checks: + - schema: + columns: + - name: id-1 + data_type: {test_table.data_type('id-1')} + - name: 2-size + data_type: {test_table.data_type('2-size')} + - name: /+created + """, + ) + + soda_core_insert_scan_results_command = data_source_test_helper.soda_cloud.requests[1].json + check_json: dict = soda_core_insert_scan_results_command["checks"][0] + schema_diagnostics: dict = check_json["diagnostics"]["v4"] + assert schema_diagnostics["type"] == "schema" + assert set([c["name"] for c in schema_diagnostics["actual"]]) == {"id-1", "2-size", "/+created"} + assert set([c["name"] for c in schema_diagnostics["expected"]]) == {"id-1", "2-size", "/+created"}