Skip to content

Commit ac242b6

Browse files
authored
Hide non-public methods (#1773)
Hide non-public methods of recon API
1 parent e81531c commit ac242b6

File tree

4 files changed

+10
-100
lines changed

4 files changed

+10
-100
lines changed

src/databricks/labs/ucx/recon/data_comparator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414

1515
class StandardDataComparator(DataComparator):
16-
DATA_COMPARISON_QUERY_TEMPLATE = """
16+
_DATA_COMPARISON_QUERY_TEMPLATE = """
1717
WITH compare_results AS (
1818
SELECT
1919
CASE
@@ -73,7 +73,7 @@ def compare_data(
7373
source_row_count=source_data_profile.row_count,
7474
target_row_count=target_data_profile.row_count,
7575
)
76-
comparison_query = StandardDataComparator.build_data_comparison_query(
76+
comparison_query = self._build_data_comparison_query(
7777
source_data_profile,
7878
target_data_profile,
7979
)
@@ -89,7 +89,7 @@ def compare_data(
8989
)
9090

9191
@classmethod
92-
def build_data_comparison_query(
92+
def _build_data_comparison_query(
9393
cls,
9494
source_data_profile: DataProfilingResult,
9595
target_data_profile: DataProfilingResult,
@@ -98,7 +98,7 @@ def build_data_comparison_query(
9898
target_table = target_data_profile.table_metadata.identifier
9999
source_hash_inputs = _build_data_comparison_hash_inputs(source_data_profile)
100100
target_hash_inputs = _build_data_comparison_hash_inputs(target_data_profile)
101-
comparison_query = StandardDataComparator.DATA_COMPARISON_QUERY_TEMPLATE.format(
101+
comparison_query = cls._DATA_COMPARISON_QUERY_TEMPLATE.format(
102102
source_hash_expr=f"SHA2(CONCAT_WS('|', {', '.join(source_hash_inputs)}), 256)",
103103
target_hash_expr=f"SHA2(CONCAT_WS('|', {', '.join(target_hash_inputs)}), 256)",
104104
source_table_fqn=source_table.fqn_escaped,

src/databricks/labs/ucx/recon/metadata_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def get_metadata(self, entity: TableIdentifier) -> TableMetadata:
1818
Note: This method does not handle exceptions raised during the execution of the SQL query. These exceptions are
1919
expected to be handled by the caller in a manner appropriate for their context.
2020
"""
21-
schema_query = DatabricksTableMetadataRetriever.build_metadata_query(entity)
21+
schema_query = self._build_metadata_query(entity)
2222
query_result: Iterator[Row] = self._sql_backend.fetch(schema_query)
2323
# The code uses a set comprehension to automatically deduplicate the column metadata entries,
2424
# Partition information are typically prefixed with a # symbol,
@@ -32,7 +32,7 @@ def get_metadata(self, entity: TableIdentifier) -> TableMetadata:
3232
return TableMetadata(entity, sorted(columns, key=lambda x: x.name))
3333

3434
@classmethod
35-
def build_metadata_query(cls, entity: TableIdentifier) -> str:
35+
def _build_metadata_query(cls, entity: TableIdentifier) -> str:
3636
if entity.catalog == "hive_metastore":
3737
return f"DESCRIBE TABLE {entity.fqn_escaped}"
3838

Lines changed: 4 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
1-
import re
2-
31
from databricks.labs.lsql.backends import MockBackend
42

53
from databricks.labs.ucx.recon.base import (
64
TableIdentifier,
75
DataComparisonResult,
8-
DataProfilingResult,
9-
TableMetadata,
10-
ColumnMetadata,
116
)
127
from databricks.labs.ucx.recon.data_comparator import StandardDataComparator
138
from databricks.labs.ucx.recon.data_profiler import StandardDataProfiler
@@ -22,10 +17,14 @@ def test_data_comparison(metadata_row_factory, row_count_row_factory, data_comp_
2217
f"{source.catalog}\\.information_schema\\.columns": metadata_row_factory[
2318
("col1", "int"),
2419
("col2", "string"),
20+
("col3", "array<string>"),
21+
("col4", "struct<a:int,b:int,c:array<string>>"),
2522
],
2623
f"{target.catalog}\\.information_schema\\.columns": metadata_row_factory[
2724
("col1", "int"),
2825
("col2", "string"),
26+
("col3", "array<string>"),
27+
("col4", "struct<a:int,b:int,c:array<string>>"),
2928
],
3029
f"SELECT COUNT\\(\\*\\) as row_count FROM {source.fqn_escaped}": row_count_row_factory[100,],
3130
f"SELECT COUNT\\(\\*\\) as row_count FROM {target.fqn_escaped}": row_count_row_factory[2,],
@@ -45,64 +44,3 @@ def test_data_comparison(metadata_row_factory, row_count_row_factory, data_comp_
4544
actual_comparison_result = data_comparator.compare_data(source, target, True)
4645

4746
assert actual_comparison_result == expected_comparison_result
48-
49-
50-
def test_prepare_data_comparison_query():
51-
source = TableIdentifier("hive_metastore", "db1", "table1")
52-
target = TableIdentifier("catalog1", "schema1", "table2")
53-
54-
source_data_profile = DataProfilingResult(
55-
10,
56-
TableMetadata(
57-
source,
58-
[
59-
ColumnMetadata("col1", "string"),
60-
ColumnMetadata("col2", "array<string>"),
61-
ColumnMetadata("col3", "struct<a:int,b:int,c:array<string>>"),
62-
],
63-
),
64-
)
65-
target_data_profile = DataProfilingResult(
66-
10,
67-
TableMetadata(
68-
target,
69-
[
70-
ColumnMetadata("col1", "string"),
71-
ColumnMetadata("col2", "array<string>"),
72-
ColumnMetadata("col3", "struct<a:int,b:int,c:array<string>>"),
73-
],
74-
),
75-
)
76-
77-
actual_query = (
78-
StandardDataComparator.build_data_comparison_query(
79-
source_data_profile,
80-
target_data_profile,
81-
)
82-
.strip()
83-
.lower()
84-
)
85-
86-
source_hash_columns = [
87-
"COALESCE(TRIM(col1), '')",
88-
"COALESCE(TRIM(TO_JSON(SORT_ARRAY(col2))), '')",
89-
"COALESCE(TRIM(TO_JSON(col3)), '')",
90-
]
91-
target_hash_columns = [
92-
"COALESCE(TRIM(col1), '')",
93-
"COALESCE(TRIM(TO_JSON(SORT_ARRAY(col2))), '')",
94-
"COALESCE(TRIM(TO_JSON(col3)), '')",
95-
]
96-
97-
expected_query = (
98-
StandardDataComparator.DATA_COMPARISON_QUERY_TEMPLATE.format(
99-
source_hash_expr=f"SHA2(CONCAT_WS('|', {', '.join(source_hash_columns)}), 256)",
100-
target_hash_expr=f"SHA2(CONCAT_WS('|', {', '.join(target_hash_columns)}), 256)",
101-
source_table_fqn="`hive_metastore`.`db1`.`table1`",
102-
target_table_fqn="`catalog1`.`schema1`.`table2`",
103-
)
104-
.strip()
105-
.lower()
106-
)
107-
108-
assert re.sub(r'\s+', ' ', actual_query) == re.sub(r'\s+', ' ', expected_query)

tests/unit/recon/test_metadata_retriever.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import re
2-
31
from databricks.labs.lsql.backends import MockBackend
42

53
from databricks.labs.ucx.recon.base import TableIdentifier, TableMetadata, ColumnMetadata
@@ -59,29 +57,3 @@ def test_unity_table_metadata_retrieval(metadata_row_factory):
5957
metadata_retriever = DatabricksTableMetadataRetriever(sql_backend)
6058
actual_metadata = metadata_retriever.get_metadata(table_identifier)
6159
assert actual_metadata == expected_metadata
62-
63-
64-
def test_hms_metadata_query():
65-
table_identifier = TableIdentifier("hive_metastore", "db1", "table1")
66-
actual_query = DatabricksTableMetadataRetriever.build_metadata_query(table_identifier).strip().lower()
67-
expected_query = "DESCRIBE TABLE `hive_metastore`.`db1`.`table1`".lower()
68-
assert re.sub(r'\s+', ' ', actual_query) == expected_query
69-
70-
71-
def test_unity_metadata_query():
72-
table_identifier = TableIdentifier("catalog1", "db1", "table1")
73-
actual_query = DatabricksTableMetadataRetriever.build_metadata_query(table_identifier).strip().lower()
74-
expected_query = """
75-
SELECT
76-
LOWER(column_name) AS col_name,
77-
full_data_type AS data_type
78-
FROM
79-
`catalog1`.information_schema.columns
80-
WHERE
81-
LOWER(table_catalog)='catalog1' AND
82-
LOWER(table_schema)='db1' AND
83-
LOWER(table_name) ='table1'
84-
ORDER BY col_name
85-
""".strip().lower()
86-
87-
assert re.sub(r'\s+', ' ', actual_query) == re.sub(r'\s+', ' ', expected_query)

0 commit comments

Comments
 (0)