Added better parsing

jprakash-db · jprakash-db · commit ee8cc2932d88 · 2025-05-23T22:52:52.000+05:30
diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py
@@ -337,26 +337,12 @@ class DatabricksArray(UserDefinedType):
     def __init__(self, item_type):
         self.item_type = item_type() if isinstance(item_type, type) else item_type
 
-    def get_col_spec(self, **kw):
-        if isinstance(self.item_type, UserDefinedType):
-            # If it's a UserDefinedType, call its get_col_spec directly
-            inner_type = self.item_type.get_col_spec(**kw)
-        elif isinstance(self.item_type, TypeDecorator):
-            # If it's a TypeDecorator, we need to get its dialect implementation
-            dialect = kw.get("type_expression", None)
-            if dialect:
-                dialect = dialect.dialect
-                impl = self.item_type.load_dialect_impl(dialect)
-                # Compile the implementation type
-                inner_type = impl.compile(dialect=dialect)
-            else:
-                # Fallback if no dialect available
-                inner_type = self.item_type.impl.__class__.__name__.upper()
-        else:
-            # For basic SQLAlchemy types, use class name
-            inner_type = self.item_type.__class__.__name__.upper()
 
-        return f"ARRAY<{inner_type}>"
+@compiles(DatabricksArray, "databricks")
+def compile_databricks_array(type_, compiler, **kw):
+    inner = compiler.process(type_.item_type, **kw)
+
+    return f"ARRAY<{inner}>"
 
 
 class DatabricksMap(UserDefinedType):
@@ -373,26 +359,9 @@ def __init__(self, key_type, value_type):
         self.key_type = key_type() if isinstance(key_type, type) else key_type
         self.value_type = value_type() if isinstance(value_type, type) else value_type
 
-    def get_col_spec(self, **kw):
-        def process_type(type_obj):
-            if isinstance(type_obj, UserDefinedType):
-                # If it's a UserDefinedType, call its get_col_spec directly
-                return type_obj.get_col_spec(**kw)
-            elif isinstance(type_obj, TypeDecorator):
-                # If it's a TypeDecorator, we need to get its dialect implementation
-                dialect = kw.get("type_expression", None)
-                if dialect:
-                    dialect = dialect.dialect
-                    impl = type_obj.load_dialect_impl(dialect)
-                    # Compile the implementation type
-                    return impl.compile(dialect=dialect)
-                else:
-                    # Fallback if no dialect available
-                    return type_obj.impl.__class__.__name__.upper()
-            else:
-                # For basic SQLAlchemy types, use class name
-                return type_obj.__class__.__name__.upper()
 
-        key_type = process_type(self.key_type)
-        value_type = process_type(self.value_type)
-        return f"MAP<{key_type},{value_type}>"
+@compiles(DatabricksMap, "databricks")
+def compile_databricks_map(type_, compiler, **kw):
+    key_type = compiler.process(type_.key_type, **kw)
+    value_type = compiler.process(type_.value_type, **kw)
+    return f"MAP<{key_type},{value_type}>"
diff --git a/tests/test_local/test_ddl.py b/tests/test_local/test_ddl.py
@@ -1,13 +1,14 @@
 import pytest
-from sqlalchemy import Column, MetaData, String, Table, Numeric, create_engine
+from sqlalchemy import Column, MetaData, String, Table, Numeric, Integer, create_engine
 from sqlalchemy.schema import (
     CreateTable,
     DropColumnComment,
     DropTableComment,
     SetColumnComment,
     SetTableComment,
 )
-from databricks.sqlalchemy import DatabricksArray,DatabricksMap
+from databricks.sqlalchemy import DatabricksArray, DatabricksMap
+
 
 class DDLTestBase:
     engine = create_engine(
@@ -95,21 +96,19 @@ def test_alter_table_drop_comment(self, table_with_comment):
         output = self.compile(stmt)
         assert output == "COMMENT ON TABLE martin IS NULL"
 
+
 class TestTableComplexTypeDDL(DDLTestBase):
-    @pytest.fixture
+    @pytest.fixture(scope="class")
     def metadata(self) -> MetaData:
         metadata = MetaData()
-        col1 = Column("array_array_string",DatabricksArray(DatabricksArray(String)))
-        col2 = Column("map_string_string",DatabricksMap(String,String))
-        col3 = Column("array_array_decimal",DatabricksArray(DatabricksArray(Numeric(10,2))))
-        table = Table("complex_type", metadata, col1,col2,col3)
+        col1 = Column("array_array_string", DatabricksArray(DatabricksArray(String)))
+        col2 = Column("map_string_string", DatabricksMap(String, String))
+        table = Table("complex_type", metadata, col1, col2)
         return metadata
-    
+
     def test_create_table_with_complex_type(self, metadata):
         stmt = CreateTable(metadata.tables["complex_type"])
         output = self.compile(stmt)
 
-        print(output)
         assert "array_array_string ARRAY<ARRAY<STRING>>" in output
         assert "map_string_string MAP<STRING,STRING>" in output
-        assert "array_array_decimal ARRAY<ARRAY<DECIMAL(10,2)>>" in output
diff --git a/tests/test_local/test_parsing.py b/tests/test_local/test_parsing.py
@@ -9,7 +9,29 @@
     get_comment_from_dte_output,
     DatabricksSqlAlchemyParseException,
 )
+from sqlalchemy import (
+    BigInteger,
+    Boolean,
+    Column,
+    Date,
+    DateTime,
+    Integer,
+    Numeric,
+    String,
+    Time,
+    Uuid,
+)
+
+from databricks.sqlalchemy import (
+    DatabricksArray,
+    TIMESTAMP,
+    TINYINT,
+    DatabricksMap,
+    TIMESTAMP_NTZ,
+)
+from databricks.sqlalchemy import DatabricksDialect
 
+dialect = DatabricksDialect()
 
 # These are outputs from DESCRIBE TABLE EXTENDED
 @pytest.mark.parametrize(
@@ -158,3 +180,65 @@ def test_filter_dict_by_value(match, output):
 
 def test_get_comment_from_dte_output():
     assert get_comment_from_dte_output(FMT_SAMPLE_DT_OUTPUT) == "some comment"
+
+
+def get_databricks_non_compound_types():
+    return [
+        Integer,
+        String,
+        Boolean,
+        Date,
+        DateTime,
+        Time,
+        Uuid,
+        Numeric,
+        TINYINT,
+        TIMESTAMP,
+        TIMESTAMP_NTZ,
+    ]
+
+
+@pytest.mark.parametrize("internal_type", get_databricks_non_compound_types())
+def test_array_parsing(internal_type):
+    array_type = DatabricksArray(internal_type())
+
+    actual_parsed = array_type.compile(dialect=dialect)
+    expected_parsed = "ARRAY<{}>".format(internal_type().compile(dialect=dialect))
+    assert actual_parsed == expected_parsed
+
+
+@pytest.mark.parametrize("internal_type_1", get_databricks_non_compound_types())
+@pytest.mark.parametrize("internal_type_2", get_databricks_non_compound_types())
+def test_map_parsing(internal_type_1, internal_type_2):
+    map_type = DatabricksMap(internal_type_1(), internal_type_2())
+
+    actual_parsed = map_type.compile(dialect=dialect)
+    expected_parsed = "MAP<{},{}>".format(
+        internal_type_1().compile(dialect=dialect),
+        internal_type_2().compile(dialect=dialect),
+    )
+    assert actual_parsed == expected_parsed
+
+
+@pytest.mark.parametrize("internal_type", get_databricks_non_compound_types())
+def test_multilevel_array_type_parsing(internal_type):
+    array_type = DatabricksArray(DatabricksArray(DatabricksArray(internal_type())))
+
+    actual_parsed = array_type.compile(dialect=dialect)
+    expected_parsed = "ARRAY<ARRAY<ARRAY<{}>>>".format(
+        internal_type().compile(dialect=dialect)
+    )
+    assert actual_parsed == expected_parsed
+
+
+@pytest.mark.parametrize("internal_type", get_databricks_non_compound_types())
+def test_multilevel_map_type_parsing(internal_type):
+    map_type = DatabricksMap(
+        String, DatabricksMap(String, DatabricksMap(String, internal_type()))
+    )
+
+    actual_parsed = map_type.compile(dialect=dialect)
+    expected_parsed = "MAP<STRING,MAP<STRING,MAP<STRING,{}>>>".format(
+        internal_type().compile(dialect=dialect)
+    )
+    assert actual_parsed == expected_parsed