snowflakedb
diff --git a/‎DESCRIPTION.md
Lines changed: 2 additions & 0 deletions b/‎DESCRIPTION.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/snowflake/sqlalchemy/name_utils.py
Lines changed: 36 additions & 0 deletions b/‎src/snowflake/sqlalchemy/name_utils.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎src/snowflake/sqlalchemy/parser/custom_type_parser.py
Lines changed: 3 additions & 3 deletions b/‎src/snowflake/sqlalchemy/parser/custom_type_parser.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/snowflake/sqlalchemy/snowdialect.py
Lines changed: 64 additions & 150 deletions b/‎src/snowflake/sqlalchemy/snowdialect.py
Lines changed: 64 additions & 150 deletions
@@ -7,6 +7,8 @@ Snowflake Documentation is available at:
 Source code is also available at:
 <https://github.com/snowflakedb/snowflake-sqlalchemy>
 
+# Unreleased Notes
+- Fix exception for structured type columns dropped while collecting meetadata
 # Release Notes
 - v1.8.0(TBD)
   - Drop support for python 3.8
 
@@ -0,0 +1,36 @@
+#
+# Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
+
+from sqlalchemy.sql.compiler import IdentifierPreparer
+from sqlalchemy.sql.elements import quoted_name
+
+
+class _NameUtils:
+
+    def __init__(self, identifier_preparer: IdentifierPreparer) -> None:
+        self.identifier_preparer = identifier_preparer
+
+    def normalize_name(self, name):
+        if name is None:
+            return None
+        if name == "":
+            return ""
+        if name.upper() == name and not self.identifier_preparer._requires_quotes(
+            name.lower()
+        ):
+            return name.lower()
+        elif name.lower() == name:
+            return quoted_name(name, quote=True)
+        else:
+            return name
+
+    def denormalize_name(self, name):
+        if name is None:
+            return None
+        if name == "":
+            return ""
+        elif name.lower() == name and not self.identifier_preparer._requires_quotes(
+            name.lower()
+        ):
+            name = name.upper()
+        return name
@@ -2,9 +2,8 @@
 # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
 from typing import List
 
-import sqlalchemy.types as sqltypes
-from sqlalchemy.sql.type_api import TypeEngine
-from sqlalchemy.types import (
+import sqlalchemy.sql.sqltypes as sqltypes
+from sqlalchemy.sql.sqltypes import (
     BIGINT,
     BINARY,
     BOOLEAN,
@@ -21,6 +20,7 @@
     VARCHAR,
     NullType,
 )
+from sqlalchemy.sql.type_api import TypeEngine
 
 from ..custom_types import (
     _CUSTOM_DECIMAL,
 
@@ -2,7 +2,6 @@
 # Copyright (c) 2012-2023 Snowflake Computing Inc. All rights reserved.
 #
 import operator
-import re
 from collections import defaultdict
 from enum import Enum
 from functools import reduce
@@ -16,14 +15,15 @@
 from sqlalchemy.engine import URL, default, reflection
 from sqlalchemy.schema import Table
 from sqlalchemy.sql import text
-from sqlalchemy.sql.elements import quoted_name
 from sqlalchemy.sql.sqltypes import NullType
 from sqlalchemy.types import FLOAT, Date, DateTime, Float, Time
 
 from snowflake.connector import errors as sf_errors
 from snowflake.connector.connection import DEFAULT_CONFIGURATION
 from snowflake.connector.constants import UTF8
 from snowflake.sqlalchemy.compat import returns_unicode
+from snowflake.sqlalchemy.name_utils import _NameUtils
+from snowflake.sqlalchemy.structured_type_info_manager import _StructuredTypeInfoManager
 
 from ._constants import DIALECT_NAME
 from .base import (
@@ -42,7 +42,7 @@
 )
 from .parser.custom_type_parser import *  # noqa
 from .parser.custom_type_parser import _CUSTOM_DECIMAL  # noqa
-from .parser.custom_type_parser import ischema_names, parse_index_columns, parse_type
+from .parser.custom_type_parser import ischema_names, parse_index_columns
 from .sql.custom_schema.custom_table_prefix import CustomTablePrefix
 from .util import (
     _update_connection_application_name,
@@ -157,6 +157,7 @@ def __init__(
         super().__init__(isolation_level=isolation_level, **kwargs)
         self.force_div_is_floordiv = force_div_is_floordiv
         self.div_is_floordiv = force_div_is_floordiv
+        self.name_utils = _NameUtils(self.identifier_preparer)
 
     def initialize(self, connection):
         super().initialize(connection)
@@ -282,29 +283,10 @@ def _has_object(self, connection, object_type, object_name, schema=None):
             raise
 
     def normalize_name(self, name):
-        if name is None:
-            return None
-        if name == "":
-            return ""
-        if name.upper() == name and not self.identifier_preparer._requires_quotes(
-            name.lower()
-        ):
-            return name.lower()
-        elif name.lower() == name:
-            return quoted_name(name, quote=True)
-        else:
-            return name
+        return self.name_utils.normalize_name(name)
 
     def denormalize_name(self, name):
-        if name is None:
-            return None
-        if name == "":
-            return ""
-        elif name.lower() == name and not self.identifier_preparer._requires_quotes(
-            name.lower()
-        ):
-            name = name.upper()
-        return name
+        return self.name_utils.denormalize_name(name)
 
     def _denormalize_quote_join(self, *idents):
         ip = self.identifier_preparer
@@ -491,53 +473,31 @@ def get_foreign_keys(self, connection, table_name, schema=None, **kw):
         )
         return foreign_key_map.get(table_name, [])
 
-    def table_columns_as_dict(self, columns):
-        result = {}
-        for column in columns:
-            result[column["name"]] = column
-        return result
-
     @reflection.cache
     def _get_schema_columns(self, connection, schema, **kw):
         """Get all columns in the schema, if we hit 'Information schema query returned too much data' problem return
         None, as it is cacheable and is an unexpected return type for this function"""
         ans = {}
-        current_database, _ = self._current_database_schema(connection, **kw)
+
+        schema_name = self.denormalize_name(schema)
+
+        result = self._query_all_columns_info(connection, schema_name, **kw)
+        if result is None:
+            return None
+
+        current_database, default_schema = self._current_database_schema(
+            connection, **kw
+        )
         full_schema_name = self._denormalize_quote_join(current_database, schema)
-        full_columns_descriptions = {}
-        try:
-            schema_primary_keys = self._get_schema_primary_keys(
-                connection, full_schema_name, **kw
-            )
-            schema_name = self.denormalize_name(schema)
 
-            result = connection.execute(
-                text(
-                    """
-            SELECT /* sqlalchemy:_get_schema_columns */
-                   ic.table_name,
-                   ic.column_name,
-                   ic.data_type,
-                   ic.character_maximum_length,
-                   ic.numeric_precision,
-                   ic.numeric_scale,
-                   ic.is_nullable,
-                   ic.column_default,
-                   ic.is_identity,
-                   ic.comment,
-                   ic.identity_start,
-                   ic.identity_increment
-              FROM information_schema.columns ic
-             WHERE ic.table_schema=:table_schema
-             ORDER BY ic.ordinal_position"""
-                ),
-                {"table_schema": schema_name},
-            )
-        except sa_exc.ProgrammingError as pe:
-            if pe.orig.errno == 90030:
-                # This means that there are too many tables in the schema, we need to go more granular
-                return None  # None triggers _get_table_columns while staying cacheable
-            raise
+        schema_primary_keys = self._get_schema_primary_keys(
+            connection, full_schema_name, **kw
+        )
+
+        structured_type_info_manager = _StructuredTypeInfoManager(
+            connection, self.name_utils, default_schema
+        )
+
         for (
             table_name,
             column_name,
@@ -572,25 +532,11 @@ def _get_schema_columns(self, connection, schema, **kw):
                 elif issubclass(col_type, (sqltypes.String, sqltypes.BINARY)):
                     col_type_kw["length"] = character_maximum_length
                 elif issubclass(col_type, StructuredType):
-                    if (schema_name, table_name) not in full_columns_descriptions:
-                        full_columns_descriptions[(schema_name, table_name)] = (
-                            self.table_columns_as_dict(
-                                self._get_table_columns(
-                                    connection, table_name, schema_name
-                                )
-                            )
-                        )
-
-                    if (
-                        (schema_name, table_name) in full_columns_descriptions
-                        and column_name
-                        in full_columns_descriptions[(schema_name, table_name)]
-                    ):
-                        ans[table_name].append(
-                            full_columns_descriptions[(schema_name, table_name)][
-                                column_name
-                            ]
-                        )
+                    column_info = structured_type_info_manager.get_column_info(
+                        schema_name, table_name, column_name, **kw
+                    )
+                    if column_info:
+                        ans[table_name].append(column_info)
                         continue
                     else:
                         col_type = NullType
@@ -628,72 +574,6 @@ def _get_schema_columns(self, connection, schema, **kw):
                 }
         return ans
 
-    @reflection.cache
-    def _get_table_columns(self, connection, table_name, schema=None, **kw):
-        """Get all columns in a table in a schema"""
-        ans = []
-        current_database, default_schema = self._current_database_schema(
-            connection, **kw
-        )
-        schema = schema if schema else default_schema
-        table_schema = self.denormalize_name(schema)
-        table_name = self.denormalize_name(table_name)
-        result = connection.execute(
-            text(
-                "DESC /* sqlalchemy:_get_schema_columns */"
-                f" TABLE {table_schema}.{table_name} TYPE = COLUMNS"
-            )
-        )
-        for desc_data in result:
-            column_name = desc_data[0]
-            coltype = desc_data[1]
-            is_nullable = desc_data[3]
-            column_default = desc_data[4]
-            primary_key = desc_data[5]
-            comment = desc_data[9]
-
-            column_name = self.normalize_name(column_name)
-            if column_name.startswith("sys_clustering_column"):
-                continue  # ignoring clustering column
-            type_instance = parse_type(coltype)
-            if isinstance(type_instance, NullType):
-                sa_util.warn(
-                    f"Did not recognize type '{coltype}' of column '{column_name}'"
-                )
-
-            identity = None
-            match = re.match(
-                r"IDENTITY START (?P<start>\d+) INCREMENT (?P<increment>\d+) (?P<order_type>ORDER|NOORDER)",
-                column_default if column_default else "",
-            )
-            if match:
-                identity = {
-                    "start": int(match.group("start")),
-                    "increment": int(match.group("increment")),
-                    "order_type": match.group("order_type"),
-                }
-            is_identity = identity is not None
-
-            ans.append(
-                {
-                    "name": column_name,
-                    "type": type_instance,
-                    "nullable": is_nullable == "Y",
-                    "default": None if is_identity else column_default,
-                    "autoincrement": is_identity,
-                    "comment": comment if comment != "" else None,
-                    "primary_key": primary_key == "Y",
-                }
-            )
-
-            if is_identity:
-                ans[-1]["identity"] = identity
-
-        # If we didn't find any columns for the table, the table doesn't exist.
-        if len(ans) == 0:
-            raise sa_exc.NoSuchTableError()
-        return ans
-
     def get_columns(self, connection, table_name, schema=None, **kw):
         """
         Gets all column info given the table info
@@ -704,8 +584,11 @@ def get_columns(self, connection, table_name, schema=None, **kw):
 
         schema_columns = self._get_schema_columns(connection, schema, **kw)
         if schema_columns is None:
+            column_info_manager = _StructuredTypeInfoManager(
+                connection, self.name_utils, self.default_schema_name
+            )
             # Too many results, fall back to only query about single table
-            return self._get_table_columns(connection, table_name, schema, **kw)
+            return column_info_manager.get_table_columns(table_name, schema)
         normalized_table_name = self.normalize_name(table_name)
         if normalized_table_name not in schema_columns:
             raise sa_exc.NoSuchTableError()
@@ -719,6 +602,37 @@ def get_prefixes_from_data(self, name_to_index_map, row, **kw):
                 prefixes_found.append(valid_prefix.name)
         return prefixes_found
 
+    @reflection.cache
+    def _query_all_columns_info(self, connection, schema_name, **kw):
+        try:
+            return connection.execute(
+                text(
+                    """
+            SELECT /* sqlalchemy:_get_schema_columns */
+                   ic.table_name,
+                   ic.column_name,
+                   ic.data_type,
+                   ic.character_maximum_length,
+                   ic.numeric_precision,
+                   ic.numeric_scale,
+                   ic.is_nullable,
+                   ic.column_default,
+                   ic.is_identity,
+                   ic.comment,
+                   ic.identity_start,
+                   ic.identity_increment
+              FROM information_schema.columns ic
+             WHERE ic.table_schema=:table_schema
+             ORDER BY ic.ordinal_position"""
+                ),
+                {"table_schema": schema_name},
+            )
+        except sa_exc.ProgrammingError as pe:
+            if pe.orig.errno == 90030:
+                # This means that there are too many tables in the schema, we need to go more granular
+                return None  # None triggers get_table_columns while staying cacheable
+            raise
+
     @reflection.cache
     def _get_schema_tables_info(self, connection, schema=None, **kw):
         """