Revert "some comparator stuff"

varun-edachali-dbx · varun-edachali-dbx · commit 5b272e7c50ce · 2025-07-28T04:55:22.000Z
This reverts commit c90875f.
diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py
@@ -736,19 +736,12 @@ def get_schemas(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names and transform data to match JDBC/thrift backend
-        from .metadata_constants import SCHEMA_COLUMNS, SCHEMA_TYPE_CODES, normalize_metadata_description, transform_schemas_data_rows
-        
-        # Store original description before normalization for data transformation
-        original_description = result.description[:]
-        
-        # Normalize the description (column names and types)
+        # Normalize column names to match JDBC/thrift backend
+        from .metadata_constants import SCHEMA_COLUMNS, SCHEMA_TYPE_CODES, normalize_metadata_description
+
         result.description = normalize_metadata_description(
             result.description, SCHEMA_COLUMNS, SCHEMA_TYPE_CODES
         )
-        
-        # Transform the actual data rows to match the new column order and format
-        transform_schemas_data_rows(result, catalog_name, original_description)
 
         return result
 
@@ -792,19 +785,12 @@ def get_tables(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names and transform data to match JDBC/thrift backend
-        from .metadata_constants import TABLE_COLUMNS, TABLE_TYPE_CODES, normalize_metadata_description, transform_tables_data_rows
-        
-        # Store original description before normalization for data transformation
-        original_description = result.description[:]
-        
-        # Normalize the description (column names and types)
+        # Normalize column names to match JDBC/thrift backend
+        from .metadata_constants import TABLE_COLUMNS, TABLE_TYPE_CODES, normalize_metadata_description
+
         result.description = normalize_metadata_description(
             result.description, TABLE_COLUMNS, TABLE_TYPE_CODES
         )
-        
-        # Transform the actual data rows to match the new column order and format
-        transform_tables_data_rows(result, catalog_name, original_description)
 
         # Apply client-side filtering by table_types
         from databricks.sql.backend.sea.utils.filters import ResultSetFilter
@@ -853,16 +839,9 @@ def get_columns(
         )
         assert result is not None, "execute_command returned None in synchronous mode"
 
-        # Normalize column names and transform data to match JDBC/thrift backend
-        from .metadata_constants import normalize_columns_metadata_description, transform_columns_data_rows
-        
-        # Store original description before normalization for data transformation
-        original_description = result.description[:]
-        
-        # Normalize the description (column names and types)
+        # Normalize column names to match JDBC/thrift backend
+        from .metadata_constants import normalize_columns_metadata_description
+
         result.description = normalize_columns_metadata_description(result.description)
-        
-        # Transform the actual data rows to match the new column order and format
-        transform_columns_data_rows(result, original_description)
 
         return result
diff --git a/src/databricks/sql/backend/sea/metadata_constants.py b/src/databricks/sql/backend/sea/metadata_constants.py
@@ -39,34 +39,32 @@
     ),  # REF_GENERATION_COLUMN (likely None in data)
 ]
 
-# Columns for columns() - mapping JDBC columns to actual SEA SHOW COLUMNS output
-# Based on actual SEA output: col_name, catalogName, namespace, tableName, columnType, 
-# columnSize, decimalDigits, radix, isNullable, remarks, ordinalPosition, isAutoIncrement, isGenerated
+# Columns for columns() - matching JDBC COLUMN_COLUMNS exactly
 COLUMN_COLUMNS: List[Tuple[str, str]] = [
-    ("TABLE_CAT", "catalogName"),  # Maps to existing SEA column
-    ("TABLE_SCHEM", "namespace"),  # Maps to existing SEA column  
-    ("TABLE_NAME", "tableName"),  # Maps to existing SEA column
-    ("COLUMN_NAME", "col_name"),  # Maps to existing SEA column
-    ("DATA_TYPE", None),  # Calculated from columnType
-    ("TYPE_NAME", "columnType"),  # Maps to existing SEA column
-    ("COLUMN_SIZE", "columnSize"),  # Maps to existing SEA column
-    ("BUFFER_LENGTH", None),  # Not available in SEA - default to None
-    ("DECIMAL_DIGITS", "decimalDigits"),  # Maps to existing SEA column
-    ("NUM_PREC_RADIX", "radix"),  # Maps to existing SEA column
-    ("NULLABLE", None),  # Calculated from isNullable
-    ("REMARKS", "remarks"),  # Maps to existing SEA column
-    ("COLUMN_DEF", None),  # Not available in SEA - default to None
-    ("SQL_DATA_TYPE", None),  # Not available in SEA - default to None
-    ("SQL_DATETIME_SUB", None),  # Not available in SEA - default to None
-    ("CHAR_OCTET_LENGTH", None),  # Not available in SEA - default to None
-    ("ORDINAL_POSITION", "ordinalPosition"),  # Maps to existing SEA column
-    ("IS_NULLABLE", "isNullable"),  # Maps to existing SEA column
-    ("SCOPE_CATALOG", None),  # Not available in SEA - default to None
-    ("SCOPE_SCHEMA", None),  # Not available in SEA - default to None
-    ("SCOPE_TABLE", None),  # Not available in SEA - default to None
-    ("SOURCE_DATA_TYPE", None),  # Not available in SEA - default to None
-    ("IS_AUTO_INCREMENT", "isAutoIncrement"),  # Maps to existing SEA column (renamed from IS_AUTOINCREMENT)
-    # Note: Removing IS_GENERATEDCOLUMN to match Thrift's 23 columns exactly
+    ("TABLE_CAT", "catalogName"),  # CATALOG_COLUMN
+    ("TABLE_SCHEM", "namespace"),  # SCHEMA_COLUMN
+    ("TABLE_NAME", "tableName"),  # TABLE_NAME_COLUMN
+    ("COLUMN_NAME", "col_name"),  # COL_NAME_COLUMN
+    ("DATA_TYPE", "dataType"),  # DATA_TYPE_COLUMN
+    ("TYPE_NAME", "columnType"),  # COLUMN_TYPE_COLUMN
+    ("COLUMN_SIZE", "columnSize"),  # COLUMN_SIZE_COLUMN
+    ("BUFFER_LENGTH", "bufferLength"),  # BUFFER_LENGTH_COLUMN
+    ("DECIMAL_DIGITS", "decimalDigits"),  # DECIMAL_DIGITS_COLUMN
+    ("NUM_PREC_RADIX", "radix"),  # NUM_PREC_RADIX_COLUMN
+    ("NULLABLE", "Nullable"),  # NULLABLE_COLUMN
+    ("REMARKS", "remarks"),  # REMARKS_COLUMN
+    ("COLUMN_DEF", "columnType"),  # COLUMN_DEF_COLUMN (same source as TYPE_NAME)
+    ("SQL_DATA_TYPE", "SQLDataType"),  # SQL_DATA_TYPE_COLUMN
+    ("SQL_DATETIME_SUB", "SQLDateTimeSub"),  # SQL_DATETIME_SUB_COLUMN
+    ("CHAR_OCTET_LENGTH", "CharOctetLength"),  # CHAR_OCTET_LENGTH_COLUMN
+    ("ORDINAL_POSITION", "ordinalPosition"),  # ORDINAL_POSITION_COLUMN
+    ("IS_NULLABLE", "isNullable"),  # IS_NULLABLE_COLUMN
+    ("SCOPE_CATALOG", "ScopeCatalog"),  # SCOPE_CATALOG_COLUMN
+    ("SCOPE_SCHEMA", "ScopeSchema"),  # SCOPE_SCHEMA_COLUMN
+    ("SCOPE_TABLE", "ScopeTable"),  # SCOPE_TABLE_COLUMN
+    ("SOURCE_DATA_TYPE", "SourceDataType"),  # SOURCE_DATA_TYPE_COLUMN
+    ("IS_AUTOINCREMENT", "isAutoIncrement"),  # IS_AUTO_INCREMENT_COLUMN
+    ("IS_GENERATEDCOLUMN", "isGenerated"),  # IS_GENERATED_COLUMN
 ]
 
 # Note: COLUMN_DEF and TYPE_NAME both map to "columnType" - no special handling needed
@@ -113,7 +111,8 @@
     "SCOPE_CATALOG": "string",
     "SCOPE_SCHEMA": "string",
     "SCOPE_TABLE": "string",
-    "IS_AUTO_INCREMENT": "string",
+    "IS_AUTOINCREMENT": "string",
+    "IS_GENERATEDCOLUMN": "string",
 }
 
 
@@ -204,213 +203,7 @@ def normalize_columns_metadata_description(
     Returns:
         Normalized description matching JDBC COLUMN_COLUMNS with correct type codes
     """
+    # COLUMN_DEF and TYPE_NAME both map to "columnType" so no special handling needed
     return normalize_metadata_description(
         original_description, COLUMN_COLUMNS, COLUMN_TYPE_CODES
     )
-
-
-def transform_schemas_data_rows(result_set, catalog_name: str, original_description: List[Tuple]) -> None:
-    """
-    Transform SEA schemas() data rows to match JDBC format.
-    
-    Args:
-        result_set: The SEA result set to modify
-        catalog_name: The catalog name to add as TABLE_CATALOG
-        original_description: Original column descriptions before normalization
-    """
-    if not hasattr(result_set, 'rows') or not result_set.rows:
-        return
-        
-    # Build mapping from original column names to their indices
-    original_col_to_idx = {}
-    for idx, col_desc in enumerate(original_description):
-        original_col_to_idx[col_desc[0]] = idx
-    
-    # Transform each row to JDBC format: (TABLE_SCHEM, TABLE_CATALOG)
-    new_rows = []
-    for row in result_set.rows:
-        # Convert row to list for easier manipulation
-        if hasattr(row, '_asdict'):
-            row_dict = row._asdict()
-            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
-        else:
-            row_data = list(row)
-        
-        # Extract schema name from databaseName field
-        schema_name = None
-        if 'databaseName' in original_col_to_idx:
-            idx = original_col_to_idx['databaseName']
-            schema_name = row_data[idx] if idx < len(row_data) else None
-            # Remove quotes if present
-            if schema_name and schema_name.startswith("'") and schema_name.endswith("'"):
-                schema_name = schema_name[1:-1]
-        
-        # Create new row: (TABLE_SCHEM, TABLE_CATALOG)
-        new_row_data = (schema_name, catalog_name)
-        new_rows.append(new_row_data)
-    
-    # Replace the rows in the result set
-    result_set.rows = new_rows
-
-
-def transform_tables_data_rows(result_set, catalog_name: str, original_description: List[Tuple]) -> None:
-    """
-    Transform SEA tables() data rows to match JDBC format.
-    
-    Args:
-        result_set: The SEA result set to modify  
-        catalog_name: The catalog name to add as TABLE_CAT
-        original_description: Original column descriptions before normalization
-    """
-    if not hasattr(result_set, 'rows') or not result_set.rows:
-        return
-        
-    # Build mapping from original column names to their indices
-    original_col_to_idx = {}
-    for idx, col_desc in enumerate(original_description):
-        original_col_to_idx[col_desc[0]] = idx
-    
-    # Transform each row to JDBC format
-    new_rows = []
-    for row in result_set.rows:
-        # Convert row to list for easier manipulation
-        if hasattr(row, '_asdict'):
-            row_dict = row._asdict()
-            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
-        else:
-            row_data = list(row)
-        
-        # Extract values from original SHOW TABLES output
-        table_schema = None
-        table_name = None  
-        is_temporary = None
-        
-        if 'database' in original_col_to_idx:
-            idx = original_col_to_idx['database']
-            table_schema = row_data[idx] if idx < len(row_data) else None
-            
-        if 'tableName' in original_col_to_idx:
-            idx = original_col_to_idx['tableName']
-            table_name = row_data[idx] if idx < len(row_data) else None
-            
-        if 'isTemporary' in original_col_to_idx:
-            idx = original_col_to_idx['isTemporary']
-            is_temporary = row_data[idx] if idx < len(row_data) else None
-        
-        # Determine table type based on isTemporary flag
-        table_type = "TEMPORARY TABLE" if is_temporary else "TABLE"
-        
-        # Create new row with JDBC format:
-        # (TABLE_CAT, TABLE_SCHEM, TABLE_NAME, TABLE_TYPE, REMARKS, TYPE_CAT, TYPE_SCHEM, TYPE_NAME, SELF_REFERENCING_COL_NAME, REF_GENERATION)
-        new_row_data = (
-            catalog_name,          # TABLE_CAT
-            table_schema,          # TABLE_SCHEM  
-            table_name,            # TABLE_NAME
-            table_type,            # TABLE_TYPE
-            "",                    # REMARKS (empty string)
-            None,                  # TYPE_CAT
-            None,                  # TYPE_SCHEM
-            None,                  # TYPE_NAME
-            None,                  # SELF_REFERENCING_COL_NAME
-            None,                  # REF_GENERATION
-        )
-        new_rows.append(new_row_data)
-    
-    # Replace the rows in the result set
-    result_set.rows = new_rows
-
-
-def transform_columns_data_rows(result_set, original_description: List[Tuple]) -> None:
-    """
-    Transform SEA columns() data rows to match JDBC format and column order.
-    
-    This function modifies the result_set.rows in place to:
-    1. Reorder columns to match JDBC standard
-    2. Transform data types (e.g., string to int for DATA_TYPE)
-    3. Add missing columns with appropriate defaults
-    4. Remove extra columns not in JDBC standard
-    
-    Args:
-        result_set: The SEA result set to modify
-        original_description: Original column descriptions before normalization
-    """
-    if not hasattr(result_set, 'rows') or not result_set.rows:
-        return
-        
-    # Build mapping from original column names to their indices
-    original_col_to_idx = {}
-    for idx, col_desc in enumerate(original_description):
-        original_col_to_idx[col_desc[0]] = idx
-    
-    # SQL type code mapping for DATA_TYPE field
-    TYPE_CODE_MAP = {
-        'INT': 4, 'INTEGER': 4,
-        'BIGINT': -5,
-        'SMALLINT': 5,
-        'TINYINT': -6,
-        'FLOAT': 6,
-        'DOUBLE': 8,
-        'DECIMAL': 3, 'NUMERIC': 3,
-        'STRING': 12, 'VARCHAR': 12,
-        'BOOLEAN': 16,
-        'DATE': 91,
-        'TIMESTAMP': 93,
-        'BINARY': -2,
-        'ARRAY': 2003,
-        'STRUCT': 2002,
-        'MAP': 2003,
-    }
-    
-    # Special handling for DECIMAL types with precision/scale
-    def parse_decimal_type(type_str):
-        """Parse DECIMAL(precision,scale) to extract base type."""
-        if type_str and type_str.upper().startswith('DECIMAL'):
-            return 'DECIMAL'
-        return type_str
-    
-    # Transform each row
-    new_rows = []
-    for row in result_set.rows:
-        # Convert row to list for easier manipulation
-        if hasattr(row, '_asdict'):
-            row_dict = row._asdict()
-            row_data = [row_dict.get(col_desc[0]) for col_desc in original_description]
-        else:
-            row_data = list(row)
-            
-        # Build new row according to JDBC column order
-        new_row_data = []
-        
-        for jdbc_col, sea_col in COLUMN_COLUMNS:
-            if sea_col and sea_col in original_col_to_idx:
-                # Column exists in original data
-                original_idx = original_col_to_idx[sea_col]
-                value = row_data[original_idx] if original_idx < len(row_data) else None
-                
-                # Special transformations
-                if jdbc_col == "DATA_TYPE" and value:
-                    # Convert type name to SQL type code
-                    base_type = parse_decimal_type(str(value))
-                    value = TYPE_CODE_MAP.get(str(base_type).upper(), 12)  # Default to VARCHAR
-                elif jdbc_col == "NULLABLE" and sea_col == "isNullable":
-                    # Convert boolean string to int (1=nullable, 0=not nullable)
-                    value = 1 if str(value).lower() == 'true' else 0
-                    
-                new_row_data.append(value)
-            else:
-                # Column doesn't exist in SEA, use appropriate default
-                if jdbc_col == "DATA_TYPE":
-                    new_row_data.append(12)  # Default to VARCHAR
-                elif jdbc_col == "NULLABLE":
-                    new_row_data.append(1)  # Default to nullable
-                elif jdbc_col in ["BUFFER_LENGTH", "SQL_DATA_TYPE", "SQL_DATETIME_SUB", 
-                                "CHAR_OCTET_LENGTH", "COLUMN_DEF", "SCOPE_CATALOG", 
-                                "SCOPE_SCHEMA", "SCOPE_TABLE", "SOURCE_DATA_TYPE"]:
-                    new_row_data.append(None)
-                else:
-                    new_row_data.append(None)
-        
-        new_rows.append(tuple(new_row_data))
-    
-    # Replace the rows in the result set
-    result_set.rows = new_rows
diff --git a/tests/unit/test_sea_backend.py b/tests/unit/test_sea_backend.py
@@ -683,7 +683,6 @@ def test_get_schemas(self, sea_client, sea_session_id, mock_cursor):
             ("databaseName", "string", None, None, None, None, None),
             ("catalogName", "string", None, None, None, None, None),
         ]
-        mock_result_set.rows = []  # Add empty rows for the transformation function
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
         ) as mock_execute:
@@ -755,7 +754,6 @@ def test_get_tables(self, sea_client, sea_session_id, mock_cursor):
             ("tableType", "string", None, None, None, None, None),
             ("remarks", "string", None, None, None, None, None),
         ]
-        mock_result_set.rows = []  # Add empty rows for the transformation function
 
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
@@ -849,7 +847,6 @@ def test_get_columns(self, sea_client, sea_session_id, mock_cursor):
             ("dataType", "int", None, None, None, None, None),
             ("columnType", "string", None, None, None, None, None),
         ]
-        mock_result_set.rows = []  # Add empty rows for the transformation function
         with patch.object(
             sea_client, "execute_command", return_value=mock_result_set
         ) as mock_execute: