Skip to content

Commit d97d875

Browse files
refactor
Signed-off-by: varun-edachali-dbx <[email protected]>
1 parent f5982f0 commit d97d875

File tree

5 files changed

+201
-113
lines changed

5 files changed

+201
-113
lines changed

src/databricks/sql/backend/sea/result_set.py

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@
2424
logger = logging.getLogger(__name__)
2525

2626

27+
# Column-to-column data mapping for metadata queries
28+
# Maps target column -> source column to get data from
29+
COLUMN_DATA_MAPPING = {
30+
"DATA_TYPE": "TYPE_NAME", # DATA_TYPE calculated from TYPE_NAME
31+
"NULLABLE": "IS_NULLABLE", # NULLABLE calculated from IS_NULLABLE
32+
"BUFFER_LENGTH": "TYPE_NAME", # BUFFER_LENGTH calculated from TYPE_NAME
33+
}
34+
35+
2736
class SeaResultSet(ResultSet):
2837
"""ResultSet implementation for SEA backend."""
2938

@@ -292,6 +301,40 @@ def prepare_metadata_columns(self, metadata_columns: List[ResultColumn]) -> None
292301
self._metadata_columns = metadata_columns
293302
self._prepare_column_mapping()
294303

304+
def _populate_columns_from_others(
305+
self, result_column: ResultColumn, row_data: Any
306+
) -> Any:
307+
"""
308+
Helper function to populate column data from other columns based on COLUMN_DATA_MAPPING.
309+
310+
Args:
311+
result_column: The result column that needs data
312+
row_data: Row data (list for JSON, PyArrow table for Arrow)
313+
314+
Returns:
315+
The value to use for this column, or None if not found
316+
"""
317+
target_column = result_column.column_name
318+
if target_column not in COLUMN_DATA_MAPPING:
319+
return None
320+
321+
source_column = COLUMN_DATA_MAPPING[target_column]
322+
323+
# Find the source column index
324+
for idx, col in enumerate(self._metadata_columns):
325+
if col.column_name == source_column:
326+
source_idx = self._column_index_mapping.get(idx)
327+
if source_idx is not None:
328+
# Handle Arrow table format
329+
if hasattr(row_data, "column"): # PyArrow table
330+
return row_data.column(source_idx).to_pylist()
331+
# Handle JSON row format
332+
else:
333+
return row_data[source_idx]
334+
break
335+
336+
return None
337+
295338
def _prepare_column_mapping(self) -> None:
296339
"""
297340
Prepare column index mapping for metadata queries.
@@ -359,44 +402,24 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
359402

360403
for new_idx, result_column in enumerate(self._metadata_columns):
361404
old_idx = self._column_index_mapping.get(new_idx)
362-
363-
# Get the source data
405+
406+
# Get the source data
364407
if old_idx is not None:
365408
column = table.column(old_idx)
366409
values = column.to_pylist()
367410
else:
368411
values = None
369-
412+
370413
# Special handling for columns that need data from other columns
371-
if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
372-
# Get TYPE_NAME column value for DATA_TYPE calculation
373-
for idx, col in enumerate(self._metadata_columns):
374-
if col.column_name == "TYPE_NAME":
375-
type_idx = self._column_index_mapping.get(idx)
376-
if type_idx is not None:
377-
values = table.column(type_idx).to_pylist()
378-
break
379-
elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
380-
# Get IS_NULLABLE column value for NULLABLE calculation
381-
for idx, col in enumerate(self._metadata_columns):
382-
if col.column_name == "IS_NULLABLE":
383-
nullable_idx = self._column_index_mapping.get(idx)
384-
if nullable_idx is not None:
385-
values = table.column(nullable_idx).to_pylist()
386-
break
387-
elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
388-
# Get TYPE_NAME column value for BUFFER_LENGTH calculation
389-
for idx, col in enumerate(self._metadata_columns):
390-
if col.column_name == "TYPE_NAME":
391-
type_idx = self._column_index_mapping.get(idx)
392-
if type_idx is not None:
393-
values = table.column(type_idx).to_pylist()
394-
break
395-
414+
if result_column.result_set_column_name is None:
415+
values = self._populate_columns_from_others(result_column, table)
416+
396417
# Apply transformation and create column
397418
if values is not None:
398419
if result_column.transform_value:
399-
transformed_values = [result_column.transform_value(v) for v in values]
420+
transformed_values = [
421+
result_column.transform_value(v) for v in values
422+
]
400423
column = pyarrow.array(transformed_values)
401424
else:
402425
column = pyarrow.array(values)
@@ -409,7 +432,7 @@ def _transform_arrow_table(self, table: "pyarrow.Table") -> "pyarrow.Table":
409432
else:
410433
null_array = pyarrow.nulls(table.num_rows)
411434
new_columns.append(null_array)
412-
435+
413436
column_names.append(result_column.column_name)
414437

415438
return pyarrow.Table.from_arrays(new_columns, names=column_names)
@@ -428,37 +451,15 @@ def _transform_json_rows(self, rows: List[List[str]]) -> List[List[Any]]:
428451
value = row[old_idx]
429452
else:
430453
value = None
431-
454+
432455
# Special handling for columns that need data from other columns
433-
if result_column.column_name == "DATA_TYPE" and result_column.result_set_column_name is None:
434-
# Get TYPE_NAME column value for DATA_TYPE calculation
435-
for idx, col in enumerate(self._metadata_columns):
436-
if col.column_name == "TYPE_NAME":
437-
type_idx = self._column_index_mapping.get(idx)
438-
if type_idx is not None and type_idx < len(row):
439-
value = row[type_idx]
440-
break
441-
elif result_column.column_name == "NULLABLE" and result_column.result_set_column_name is None:
442-
# Get IS_NULLABLE column value for NULLABLE calculation
443-
for idx, col in enumerate(self._metadata_columns):
444-
if col.column_name == "IS_NULLABLE":
445-
nullable_idx = self._column_index_mapping.get(idx)
446-
if nullable_idx is not None and nullable_idx < len(row):
447-
value = row[nullable_idx]
448-
break
449-
elif result_column.column_name == "BUFFER_LENGTH" and result_column.result_set_column_name is None:
450-
# Get TYPE_NAME column value for BUFFER_LENGTH calculation
451-
for idx, col in enumerate(self._metadata_columns):
452-
if col.column_name == "TYPE_NAME":
453-
type_idx = self._column_index_mapping.get(idx)
454-
if type_idx is not None and type_idx < len(row):
455-
value = row[type_idx]
456-
break
457-
456+
if result_column.result_set_column_name is None:
457+
value = self._populate_columns_from_others(result_column, row)
458+
458459
# Apply transformation if defined
459460
if result_column.transform_value:
460461
value = result_column.transform_value(value)
461-
462+
462463
new_row.append(value)
463464
transformed_rows.append(new_row)
464465
return transformed_rows

src/databricks/sql/backend/sea/utils/conversion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def convert_value(
150150
Returns:
151151
The converted value in the appropriate Python type
152152
"""
153-
153+
154154
# Handle None values directly
155155
if value is None:
156156
return None

src/databricks/sql/backend/sea/utils/filters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def _filter_sea_result_set(
8787
)
8888

8989
# Preserve metadata columns setup from original result set
90-
if hasattr(result_set, '_metadata_columns') and result_set._metadata_columns:
90+
if hasattr(result_set, "_metadata_columns") and result_set._metadata_columns:
9191
filtered_result_set._metadata_columns = result_set._metadata_columns
9292
filtered_result_set._column_index_mapping = result_set._column_index_mapping
9393
# Update the description to match the original prepared description

src/databricks/sql/backend/sea/utils/metadata_mappings.py

Lines changed: 103 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
always_null,
1212
always_null_int,
1313
always_null_smallint,
14-
identity
14+
identity,
1515
)
1616

1717

@@ -20,66 +20,132 @@ class MetadataColumnMappings:
2020

2121
# Common columns used across multiple metadata queries
2222
# FIX 1: Catalog columns - swap the mappings
23-
CATALOG_COLUMN = ResultColumn("TABLE_CAT", "catalogName", "string", transform_value=identity)
24-
CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn("TABLE_CAT", "catalog", "string", transform_value=identity)
23+
CATALOG_COLUMN = ResultColumn(
24+
"TABLE_CAT", "catalogName", "string", transform_value=identity
25+
)
26+
CATALOG_COLUMN_FOR_GET_CATALOGS = ResultColumn(
27+
"TABLE_CAT", "catalog", "string", transform_value=identity
28+
)
2529
# Remove CATALOG_COLUMN_FOR_TABLES - will use CATALOG_COLUMN instead
26-
27-
SCHEMA_COLUMN = ResultColumn("TABLE_SCHEM", "namespace", "string", transform_value=identity)
28-
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn("TABLE_SCHEM", "databaseName", "string", transform_value=identity)
29-
TABLE_NAME_COLUMN = ResultColumn("TABLE_NAME", "tableName", "string", transform_value=identity)
30-
TABLE_TYPE_COLUMN = ResultColumn("TABLE_TYPE", "tableType", "string", transform_value=transform_table_type)
31-
REMARKS_COLUMN = ResultColumn("REMARKS", "remarks", "string", transform_value=transform_remarks_default)
30+
31+
SCHEMA_COLUMN = ResultColumn(
32+
"TABLE_SCHEM", "namespace", "string", transform_value=identity
33+
)
34+
SCHEMA_COLUMN_FOR_GET_SCHEMA = ResultColumn(
35+
"TABLE_SCHEM", "databaseName", "string", transform_value=identity
36+
)
37+
TABLE_NAME_COLUMN = ResultColumn(
38+
"TABLE_NAME", "tableName", "string", transform_value=identity
39+
)
40+
TABLE_TYPE_COLUMN = ResultColumn(
41+
"TABLE_TYPE", "tableType", "string", transform_value=transform_table_type
42+
)
43+
REMARKS_COLUMN = ResultColumn(
44+
"REMARKS", "remarks", "string", transform_value=transform_remarks_default
45+
)
3246

3347
# Columns specific to getColumns()
34-
COLUMN_NAME_COLUMN = ResultColumn("COLUMN_NAME", "col_name", "string", transform_value=identity)
48+
COLUMN_NAME_COLUMN = ResultColumn(
49+
"COLUMN_NAME", "col_name", "string", transform_value=identity
50+
)
3551
DATA_TYPE_COLUMN = ResultColumn(
3652
"DATA_TYPE", None, "int", transform_value=calculate_data_type
3753
) # Calculated from columnType
38-
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", "columnType", "string", transform_value=identity)
39-
54+
TYPE_NAME_COLUMN = ResultColumn(
55+
"TYPE_NAME", "columnType", "string", transform_value=identity
56+
)
57+
4058
# FIX 5: SEA actually provides these columns
41-
COLUMN_SIZE_COLUMN = ResultColumn("COLUMN_SIZE", "columnSize", "int", transform_value=identity)
42-
DECIMAL_DIGITS_COLUMN = ResultColumn("DECIMAL_DIGITS", "decimalDigits", "int", transform_value=transform_numeric_default_zero)
43-
NUM_PREC_RADIX_COLUMN = ResultColumn("NUM_PREC_RADIX", "radix", "int", transform_value=transform_numeric_default_zero)
44-
ORDINAL_POSITION_COLUMN = ResultColumn("ORDINAL_POSITION", "ordinalPosition", "int", transform_value=transform_ordinal_position_offset)
45-
46-
NULLABLE_COLUMN = ResultColumn("NULLABLE", None, "int", transform_value=transform_nullable_to_int) # Calculated from isNullable
59+
COLUMN_SIZE_COLUMN = ResultColumn(
60+
"COLUMN_SIZE", "columnSize", "int", transform_value=identity
61+
)
62+
DECIMAL_DIGITS_COLUMN = ResultColumn(
63+
"DECIMAL_DIGITS",
64+
"decimalDigits",
65+
"int",
66+
transform_value=transform_numeric_default_zero,
67+
)
68+
NUM_PREC_RADIX_COLUMN = ResultColumn(
69+
"NUM_PREC_RADIX", "radix", "int", transform_value=transform_numeric_default_zero
70+
)
71+
ORDINAL_POSITION_COLUMN = ResultColumn(
72+
"ORDINAL_POSITION",
73+
"ordinalPosition",
74+
"int",
75+
transform_value=transform_ordinal_position_offset,
76+
)
77+
78+
NULLABLE_COLUMN = ResultColumn(
79+
"NULLABLE", None, "int", transform_value=transform_nullable_to_int
80+
) # Calculated from isNullable
4781
COLUMN_DEF_COLUMN = ResultColumn(
4882
"COLUMN_DEF", "columnType", "string", transform_value=identity
4983
) # Note: duplicate mapping
50-
SQL_DATA_TYPE_COLUMN = ResultColumn("SQL_DATA_TYPE", None, "int", transform_value=always_null_int)
51-
SQL_DATETIME_SUB_COLUMN = ResultColumn("SQL_DATETIME_SUB", None, "int", transform_value=always_null_int)
52-
CHAR_OCTET_LENGTH_COLUMN = ResultColumn("CHAR_OCTET_LENGTH", None, "int", transform_value=always_null_int)
53-
IS_NULLABLE_COLUMN = ResultColumn("IS_NULLABLE", "isNullable", "string", transform_value=transform_is_nullable)
84+
SQL_DATA_TYPE_COLUMN = ResultColumn(
85+
"SQL_DATA_TYPE", None, "int", transform_value=always_null_int
86+
)
87+
SQL_DATETIME_SUB_COLUMN = ResultColumn(
88+
"SQL_DATETIME_SUB", None, "int", transform_value=always_null_int
89+
)
90+
CHAR_OCTET_LENGTH_COLUMN = ResultColumn(
91+
"CHAR_OCTET_LENGTH", None, "int", transform_value=always_null_int
92+
)
93+
IS_NULLABLE_COLUMN = ResultColumn(
94+
"IS_NULLABLE", "isNullable", "string", transform_value=transform_is_nullable
95+
)
5496

5597
# Columns for getTables() that don't exist in SEA
56-
TYPE_CAT_COLUMN = ResultColumn("TYPE_CAT", None, "string", transform_value=always_null)
57-
TYPE_SCHEM_COLUMN = ResultColumn("TYPE_SCHEM", None, "string", transform_value=always_null)
58-
TYPE_NAME_COLUMN = ResultColumn("TYPE_NAME", None, "string", transform_value=always_null)
98+
TYPE_CAT_COLUMN = ResultColumn(
99+
"TYPE_CAT", None, "string", transform_value=always_null
100+
)
101+
TYPE_SCHEM_COLUMN = ResultColumn(
102+
"TYPE_SCHEM", None, "string", transform_value=always_null
103+
)
104+
TYPE_NAME_COLUMN = ResultColumn(
105+
"TYPE_NAME", None, "string", transform_value=always_null
106+
)
59107
SELF_REFERENCING_COL_NAME_COLUMN = ResultColumn(
60108
"SELF_REFERENCING_COL_NAME", None, "string", transform_value=always_null
61109
)
62-
REF_GENERATION_COLUMN = ResultColumn("REF_GENERATION", None, "string", transform_value=always_null)
63-
110+
REF_GENERATION_COLUMN = ResultColumn(
111+
"REF_GENERATION", None, "string", transform_value=always_null
112+
)
113+
64114
# FIX 8: Scope columns (always null per JDBC)
65-
SCOPE_CATALOG_COLUMN = ResultColumn("SCOPE_CATALOG", None, "string", transform_value=always_null)
66-
SCOPE_SCHEMA_COLUMN = ResultColumn("SCOPE_SCHEMA", None, "string", transform_value=always_null)
67-
SCOPE_TABLE_COLUMN = ResultColumn("SCOPE_TABLE", None, "string", transform_value=always_null)
68-
SOURCE_DATA_TYPE_COLUMN = ResultColumn("SOURCE_DATA_TYPE", None, "smallint", transform_value=always_null_smallint)
69-
115+
SCOPE_CATALOG_COLUMN = ResultColumn(
116+
"SCOPE_CATALOG", None, "string", transform_value=always_null
117+
)
118+
SCOPE_SCHEMA_COLUMN = ResultColumn(
119+
"SCOPE_SCHEMA", None, "string", transform_value=always_null
120+
)
121+
SCOPE_TABLE_COLUMN = ResultColumn(
122+
"SCOPE_TABLE", None, "string", transform_value=always_null
123+
)
124+
SOURCE_DATA_TYPE_COLUMN = ResultColumn(
125+
"SOURCE_DATA_TYPE", None, "smallint", transform_value=always_null_smallint
126+
)
127+
70128
# FIX 9 & 10: Auto increment and generated columns
71-
IS_AUTO_INCREMENT_COLUMN = ResultColumn("IS_AUTOINCREMENT", "isAutoIncrement", "string", transform_value=identity) # No underscore!
72-
IS_GENERATED_COLUMN = ResultColumn("IS_GENERATEDCOLUMN", "isGenerated", "string", transform_value=identity) # SEA provides this
73-
129+
IS_AUTO_INCREMENT_COLUMN = ResultColumn(
130+
"IS_AUTOINCREMENT", "isAutoIncrement", "string", transform_value=identity
131+
) # No underscore!
132+
IS_GENERATED_COLUMN = ResultColumn(
133+
"IS_GENERATEDCOLUMN", "isGenerated", "string", transform_value=identity
134+
) # SEA provides this
135+
74136
# FIX 11: Buffer length column
75-
BUFFER_LENGTH_COLUMN = ResultColumn("BUFFER_LENGTH", None, "int", transform_value=always_null_int) # Always null per JDBC
137+
BUFFER_LENGTH_COLUMN = ResultColumn(
138+
"BUFFER_LENGTH", None, "int", transform_value=always_null_int
139+
) # Always null per JDBC
76140

77141
# Column lists for each metadata operation
78142
CATALOG_COLUMNS = [CATALOG_COLUMN_FOR_GET_CATALOGS] # Use specific catalog column
79143

80144
SCHEMA_COLUMNS = [
81145
SCHEMA_COLUMN_FOR_GET_SCHEMA,
82-
ResultColumn("TABLE_CATALOG", None, "string", transform_value=always_null), # Will need special population logic
146+
ResultColumn(
147+
"TABLE_CATALOG", None, "string", transform_value=always_null
148+
), # Will need special population logic
83149
]
84150

85151
TABLE_COLUMNS = [

0 commit comments

Comments
 (0)