override default varchar datatype when altering iceberg table columns (#1257)

colin-rogers-dbt · web-flow · commit 61221f455f59 · 2025-08-15T11:08:38.000-07:00
diff --git a/dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml b/dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml
@@ -0,0 +1,6 @@
+kind: Fixes
+body: fix issue where incorrect varchar size is applied for iceberg table columns
+time: 2025-08-12T10:19:59.653331-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "1257"
diff --git a/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql b/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql
@@ -162,6 +162,35 @@
 {% endmacro %}
 
 
+{% macro snowflake__get_column_data_type_for_alter(relation, column) %}
+  {#
+    Helper macro to get the correct data type for ALTER TABLE operations.
+    For Iceberg tables, we need to handle VARCHAR constraints differently because
+    Snowflake Iceberg tables only support max length (134,217,728) or STRING directly.
+
+    This fixes the bug where dbt generates VARCHAR(16777216) for new columns which
+    is not supported by Snowflake Iceberg tables.
+  #}
+  {% if relation.is_iceberg_format and column.is_string() %}
+    {% set data_type = column.data_type.upper() %}
+    {% if data_type.startswith('CHARACTER VARYING') or data_type.startswith('VARCHAR') %}
+      {#
+        For Iceberg tables, convert any VARCHAR specification to STRING.
+        This handles cases where:
+        - dbt auto-generates VARCHAR(16777216) for columns without explicit size
+        - users specify VARCHAR with any size (even the max 134217728)
+        Using STRING is more compatible and avoids size-related errors.
+      #}
+      STRING
+    {% else %}
+      {# Keep other string types like TEXT as-is #}
+      {{ column.data_type }}
+    {% endif %}
+  {% else %}
+    {{ column.data_type }}
+  {% endif %}
+{% endmacro %}
+
 {% macro snowflake__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
 
     {% if relation.is_dynamic_table -%}
@@ -175,7 +204,7 @@
     {% set sql -%}
        alter {{ relation.get_ddl_prefix_for_alter() }} {{ relation_type }} {{ relation.render() }} add column
           {% for column in add_columns %}
-            {{ adapter.quote(column.name) }} {{ column.data_type }}{{ ',' if not loop.last }}
+            {{ adapter.quote(column.name) }} {{ snowflake__get_column_data_type_for_alter(relation, column) }}{{ ',' if not loop.last }}
           {% endfor %}
     {%- endset -%}
 
diff --git a/dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py b/dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py
@@ -0,0 +1,149 @@
+import pytest
+from dbt.tests.util import run_dbt, write_file
+
+
+_MODEL_ICEBERG_BASE = """
+{{
+  config(
+    materialized="incremental",
+    table_format="iceberg",
+    external_volume="s3_iceberg_snow",
+    on_schema_change="append_new_columns"
+  )
+}}
+
+select 1 as id,
+cast('John' as varchar) as first_name
+"""
+
+_MODEL_ICEBERG_ADDED_COLUMN = """
+{{
+  config(
+    materialized="incremental",
+    table_format="iceberg",
+    external_volume="s3_iceberg_snow",
+    on_schema_change="append_new_columns"
+  )
+}}
+
+select 1 as id,
+cast('John' as varchar) as first_name,
+cast('Smith' as varchar) as last_name
+"""
+
+_MODEL_ICEBERG_ADDED_STRING_COLUMN = """
+{{
+  config(
+    materialized="incremental",
+    table_format="iceberg",
+    external_volume="s3_iceberg_snow",
+    on_schema_change="append_new_columns"
+  )
+}}
+
+select 1 as id,
+cast('John' as varchar) as first_name,
+cast('Smith' as string) as last_name
+"""
+
+_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN = """
+{{
+  config(
+    materialized="incremental",
+    table_format="iceberg",
+    external_volume="ICEBERG_SANDBOX",
+    catalog="SNOWFLAKE",
+    on_schema_change="append_new_columns"
+  )
+}}
+
+select 1 as id,
+cast('John' as varchar) as first_name,
+cast('Smith' as varchar(134217728)) as last_name
+"""
+
+
+class TestIcebergSchemaChange:
+    """
+    Test schema changes with Iceberg tables to ensure VARCHAR columns work correctly.
+
+    This tests the fix for the bug where adding VARCHAR columns to Iceberg tables
+    fails because dbt generates VARCHAR(16777216) which is not supported by Snowflake
+    Iceberg tables. The fix should use STRING instead for Iceberg tables.
+    """
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "test_iceberg_base.sql": _MODEL_ICEBERG_BASE,
+        }
+
+    def test_iceberg_varchar_column_addition(self, project):
+        """Test that adding VARCHAR columns to Iceberg tables works correctly."""
+
+        # First, create the initial table
+        run_dbt(["run", "--select", "test_iceberg_base"])
+
+        # Verify the table was created successfully
+        results = run_dbt(["run", "--select", "test_iceberg_base"])
+        assert len(results) == 1
+
+        # Now add a VARCHAR column by updating the model
+        write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg_base.sql")
+
+        # This should not fail with the varchar size error
+        results = run_dbt(["run", "--select", "test_iceberg_base"])
+        assert len(results) == 1
+        assert results[0].status == "success"
+
+    def test_iceberg_string_column_addition(self, project):
+        """Test that adding STRING columns to Iceberg tables works correctly."""
+
+        # First, create the initial table
+        run_dbt(["run", "--select", "test_iceberg_base"])
+
+        # Now add a STRING column by updating the model
+        write_file(_MODEL_ICEBERG_ADDED_STRING_COLUMN, "models", "test_iceberg_base.sql")
+
+        # This should work fine
+        results = run_dbt(["run", "--select", "test_iceberg_base"])
+        assert len(results) == 1
+        assert results[0].status == "success"
+
+    def test_iceberg_max_varchar_column_addition(self, project):
+        """Test that adding VARCHAR with max size to Iceberg tables works correctly."""
+
+        # First, create the initial table
+        run_dbt(["run", "--select", "test_iceberg_base"])
+
+        # Now add a VARCHAR column with max size by updating the model
+        write_file(_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN, "models", "test_iceberg_base.sql")
+
+        # This should work fine
+        results = run_dbt(["run", "--select", "test_iceberg_base"])
+        assert len(results) == 1
+        assert results[0].status == "success"
+
+
+class TestIcebergSchemaChangeIntegration:
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "test_iceberg.sql": _MODEL_ICEBERG_BASE,
+        }
+
+    def test_reproduce_and_fix_bug(self, project):
+
+        # Step 1: Create the initial incremental iceberg table
+        results = run_dbt(["run"])
+        assert len(results) == 1
+        assert results[0].status == "success"
+
+        # Step 2: Modify the model to add new column (this used to fail)
+        write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg.sql")
+
+        # Step 3: Run dbt build again - this should now work with our fix
+        results = run_dbt(["run"])
+        assert len(results) == 1
+        assert results[0].status == "success"