Skip to content

Commit 08717be

Browse files
normalise type codes
Signed-off-by: varun-edachali-dbx <[email protected]>
1 parent ad52ed4 commit 08717be

File tree

2 files changed

+82
-17
lines changed

2 files changed

+82
-17
lines changed

src/databricks/sql/backend/sea/backend.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -696,10 +696,10 @@ def get_catalogs(
696696
assert result is not None, "execute_command returned None in synchronous mode"
697697

698698
# Normalize column names to match JDBC/thrift backend
699-
from .metadata_constants import CATALOG_COLUMNS, normalize_metadata_description
699+
from .metadata_constants import CATALOG_COLUMNS, CATALOG_TYPE_CODES, normalize_metadata_description
700700

701701
result.description = normalize_metadata_description(
702-
result.description, CATALOG_COLUMNS
702+
result.description, CATALOG_COLUMNS, CATALOG_TYPE_CODES
703703
)
704704

705705
return result
@@ -737,10 +737,10 @@ def get_schemas(
737737
assert result is not None, "execute_command returned None in synchronous mode"
738738

739739
# Normalize column names to match JDBC/thrift backend
740-
from .metadata_constants import SCHEMA_COLUMNS, normalize_metadata_description
740+
from .metadata_constants import SCHEMA_COLUMNS, SCHEMA_TYPE_CODES, normalize_metadata_description
741741

742742
result.description = normalize_metadata_description(
743-
result.description, SCHEMA_COLUMNS
743+
result.description, SCHEMA_COLUMNS, SCHEMA_TYPE_CODES
744744
)
745745

746746
return result
@@ -786,10 +786,10 @@ def get_tables(
786786
assert result is not None, "execute_command returned None in synchronous mode"
787787

788788
# Normalize column names to match JDBC/thrift backend
789-
from .metadata_constants import TABLE_COLUMNS, normalize_metadata_description
789+
from .metadata_constants import TABLE_COLUMNS, TABLE_TYPE_CODES, normalize_metadata_description
790790

791791
result.description = normalize_metadata_description(
792-
result.description, TABLE_COLUMNS
792+
result.description, TABLE_COLUMNS, TABLE_TYPE_CODES
793793
)
794794

795795
# Apply client-side filtering by table_types

src/databricks/sql/backend/sea/metadata_constants.py

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
to match JDBC DatabaseMetaData standards and thrift backend behavior.
66
"""
77

8-
from typing import List, Tuple, Dict, Any
8+
from typing import List, Tuple, Dict, Any, Optional
99

1010

1111
# Columns for catalogs() - matching JDBC CATALOG_COLUMNS exactly
@@ -70,6 +70,51 @@
7070
# Note: COLUMN_DEF and TYPE_NAME both map to "columnType" - no special handling needed
7171
# since they both reference the same source column in the data
7272

73+
# Type code overrides for metadata columns to match thrift backend behavior
74+
# Format: column_name -> expected_type_code
75+
CATALOG_TYPE_CODES: Dict[str, str] = {
76+
# All columns in catalogs() are strings
77+
}
78+
79+
SCHEMA_TYPE_CODES: Dict[str, str] = {
80+
# All columns in schemas() are strings
81+
}
82+
83+
TABLE_TYPE_CODES: Dict[str, str] = {
84+
# All columns in tables() are strings
85+
}
86+
87+
COLUMN_TYPE_CODES: Dict[str, str] = {
88+
# Integer types
89+
"DATA_TYPE": "int",
90+
"COLUMN_SIZE": "int",
91+
"DECIMAL_DIGITS": "int",
92+
"NUM_PREC_RADIX": "int",
93+
"NULLABLE": "int",
94+
"SQL_DATA_TYPE": "int",
95+
"SQL_DATETIME_SUB": "int",
96+
"CHAR_OCTET_LENGTH": "int",
97+
"ORDINAL_POSITION": "int",
98+
# Small integer types
99+
"SOURCE_DATA_TYPE": "smallint",
100+
# Tiny integer types
101+
"BUFFER_LENGTH": "tinyint",
102+
# String types (explicitly listed for clarity, though they don't need override)
103+
"TABLE_CAT": "string",
104+
"TABLE_SCHEM": "string",
105+
"TABLE_NAME": "string",
106+
"COLUMN_NAME": "string",
107+
"TYPE_NAME": "string",
108+
"REMARKS": "string",
109+
"COLUMN_DEF": "string",
110+
"IS_NULLABLE": "string",
111+
"SCOPE_CATALOG": "string",
112+
"SCOPE_SCHEMA": "string",
113+
"SCOPE_TABLE": "string",
114+
"IS_AUTOINCREMENT": "string",
115+
"IS_GENERATEDCOLUMN": "string",
116+
}
117+
73118

74119
# Helper functions to work with column definitions
75120
def get_column_names(columns: List[Tuple[str, str]]) -> List[str]:
@@ -85,19 +130,22 @@ def get_column_mapping(columns: List[Tuple[str, str]]) -> Dict[str, str]:
85130

86131

87132
def normalize_metadata_description(
88-
original_description: List[Tuple], column_definitions: List[Tuple[str, str]]
133+
original_description: List[Tuple],
134+
column_definitions: List[Tuple[str, str]],
135+
type_code_overrides: Optional[Dict[str, str]] = None
89136
) -> List[Tuple]:
90137
"""
91-
Transform result set description to use JDBC-standard column names.
138+
Transform result set description to use JDBC-standard column names and type codes.
92139
93140
Args:
94141
original_description: Original PEP-249 description from SEA backend
95142
Format: [(name, type_code, display_size, internal_size,
96143
precision, scale, null_ok), ...]
97144
column_definitions: List of (jdbc_name, sea_source_name) tuples defining mappings
145+
type_code_overrides: Optional dict of column_name -> type_code overrides
98146
99147
Returns:
100-
Normalized description with JDBC column names
148+
Normalized description with JDBC column names and corrected type codes
101149
"""
102150
if not original_description:
103151
return original_description
@@ -115,14 +163,29 @@ def normalize_metadata_description(
115163
# Column exists in original description
116164
orig_idx = sea_col_to_idx[sea_name]
117165
orig_desc = original_description[orig_idx]
118-
# Replace the column name, keep other metadata
119-
new_desc = (jdbc_name,) + orig_desc[1:]
166+
167+
# Check if we need to override the type code
168+
if type_code_overrides and jdbc_name in type_code_overrides:
169+
# Override the type code (second element)
170+
new_desc = (
171+
jdbc_name,
172+
type_code_overrides[jdbc_name],
173+
) + orig_desc[2:]
174+
else:
175+
# Replace the column name, keep other metadata
176+
new_desc = (jdbc_name,) + orig_desc[1:]
177+
120178
normalized_description.append(new_desc)
121179
else:
122180
# Column doesn't exist, add with default metadata
123-
# Use VARCHAR type and nullable=None as defaults
181+
# Check if there's a type override for this column
182+
if type_code_overrides and jdbc_name in type_code_overrides:
183+
type_code = type_code_overrides[jdbc_name]
184+
else:
185+
type_code = "string" # Default type
186+
124187
normalized_description.append(
125-
(jdbc_name, "string", None, None, None, None, None)
188+
(jdbc_name, type_code, None, None, None, None, None)
126189
)
127190

128191
return normalized_description
@@ -132,13 +195,15 @@ def normalize_columns_metadata_description(
132195
original_description: List[Tuple],
133196
) -> List[Tuple]:
134197
"""
135-
Normalization for columns() metadata.
198+
Normalization for columns() metadata with type code overrides.
136199
137200
Args:
138201
original_description: Original description from SEA backend
139202
140203
Returns:
141-
Normalized description matching JDBC COLUMN_COLUMNS
204+
Normalized description matching JDBC COLUMN_COLUMNS with correct type codes
142205
"""
143206
# COLUMN_DEF and TYPE_NAME both map to "columnType" so no special handling needed
144-
return normalize_metadata_description(original_description, COLUMN_COLUMNS)
207+
return normalize_metadata_description(
208+
original_description, COLUMN_COLUMNS, COLUMN_TYPE_CODES
209+
)

0 commit comments

Comments
 (0)