From a9e54a0fd08272b304ea4aaef3784cb23aa001c0 Mon Sep 17 00:00:00 2001 From: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Fri, 15 Aug 2025 11:08:38 -0700 Subject: [PATCH] override default varchar datatype when altering iceberg table columns (#1257) (cherry picked from commit 61221f455f5960daf80024febfae6d6fb4b46251) --- .../unreleased/Fixes-20250812-101959.yaml | 6 + .../dbt/include/snowflake/macros/adapters.sql | 31 +++- .../iceberg/test_iceberg_schema_change.py | 149 ++++++++++++++++++ 3 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml create mode 100644 dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py diff --git a/dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml b/dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml new file mode 100644 index 000000000..b26a878db --- /dev/null +++ b/dbt-snowflake/.changes/unreleased/Fixes-20250812-101959.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: fix issue where incorrect varchar size is applied for iceberg table columns +time: 2025-08-12T10:19:59.653331-07:00 +custom: + Author: colin-rogers-dbt + Issue: "1257" diff --git a/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql b/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql index 5d22259c2..f6e5abe54 100644 --- a/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql +++ b/dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql @@ -157,6 +157,35 @@ {% endmacro %} +{% macro snowflake__get_column_data_type_for_alter(relation, column) %} + {# + Helper macro to get the correct data type for ALTER TABLE operations. + For Iceberg tables, we need to handle VARCHAR constraints differently because + Snowflake Iceberg tables only support max length (134,217,728) or STRING directly. + + This fixes the bug where dbt generates VARCHAR(16777216) for new columns which + is not supported by Snowflake Iceberg tables. + #} + {% if relation.is_iceberg_format and column.is_string() %} + {% set data_type = column.data_type.upper() %} + {% if data_type.startswith('CHARACTER VARYING') or data_type.startswith('VARCHAR') %} + {# + For Iceberg tables, convert any VARCHAR specification to STRING. + This handles cases where: + - dbt auto-generates VARCHAR(16777216) for columns without explicit size + - users specify VARCHAR with any size (even the max 134217728) + Using STRING is more compatible and avoids size-related errors. + #} + STRING + {% else %} + {# Keep other string types like TEXT as-is #} + {{ column.data_type }} + {% endif %} + {% else %} + {{ column.data_type }} + {% endif %} +{% endmacro %} + {% macro snowflake__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} {% if relation.is_dynamic_table -%} @@ -170,7 +199,7 @@ {% set sql -%} alter {{ relation.get_ddl_prefix_for_alter() }} {{ relation_type }} {{ relation.render() }} add column {% for column in add_columns %} - {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }} + {{ adapter.quote(column.name) }} {{ snowflake__get_column_data_type_for_alter(relation, column) }}{{ ',' if not loop.last }} {% endfor %} {%- endset -%} diff --git a/dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py b/dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py new file mode 100644 index 000000000..01ecd0660 --- /dev/null +++ b/dbt-snowflake/tests/functional/iceberg/test_iceberg_schema_change.py @@ -0,0 +1,149 @@ +import pytest +from dbt.tests.util import run_dbt, write_file + + +_MODEL_ICEBERG_BASE = """ +{{ + config( + materialized="incremental", + table_format="iceberg", + external_volume="s3_iceberg_snow", + on_schema_change="append_new_columns" + ) +}} + +select 1 as id, +cast('John' as varchar) as first_name +""" + +_MODEL_ICEBERG_ADDED_COLUMN = """ +{{ + config( + materialized="incremental", + table_format="iceberg", + external_volume="s3_iceberg_snow", + on_schema_change="append_new_columns" + ) +}} + +select 1 as id, +cast('John' as varchar) as first_name, +cast('Smith' as varchar) as last_name +""" + +_MODEL_ICEBERG_ADDED_STRING_COLUMN = """ +{{ + config( + materialized="incremental", + table_format="iceberg", + external_volume="s3_iceberg_snow", + on_schema_change="append_new_columns" + ) +}} + +select 1 as id, +cast('John' as varchar) as first_name, +cast('Smith' as string) as last_name +""" + +_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN = """ +{{ + config( + materialized="incremental", + table_format="iceberg", + external_volume="ICEBERG_SANDBOX", + catalog="SNOWFLAKE", + on_schema_change="append_new_columns" + ) +}} + +select 1 as id, +cast('John' as varchar) as first_name, +cast('Smith' as varchar(134217728)) as last_name +""" + + +class TestIcebergSchemaChange: + """ + Test schema changes with Iceberg tables to ensure VARCHAR columns work correctly. + + This tests the fix for the bug where adding VARCHAR columns to Iceberg tables + fails because dbt generates VARCHAR(16777216) which is not supported by Snowflake + Iceberg tables. The fix should use STRING instead for Iceberg tables. + """ + + @pytest.fixture(scope="class") + def models(self): + return { + "test_iceberg_base.sql": _MODEL_ICEBERG_BASE, + } + + def test_iceberg_varchar_column_addition(self, project): + """Test that adding VARCHAR columns to Iceberg tables works correctly.""" + + # First, create the initial table + run_dbt(["run", "--select", "test_iceberg_base"]) + + # Verify the table was created successfully + results = run_dbt(["run", "--select", "test_iceberg_base"]) + assert len(results) == 1 + + # Now add a VARCHAR column by updating the model + write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg_base.sql") + + # This should not fail with the varchar size error + results = run_dbt(["run", "--select", "test_iceberg_base"]) + assert len(results) == 1 + assert results[0].status == "success" + + def test_iceberg_string_column_addition(self, project): + """Test that adding STRING columns to Iceberg tables works correctly.""" + + # First, create the initial table + run_dbt(["run", "--select", "test_iceberg_base"]) + + # Now add a STRING column by updating the model + write_file(_MODEL_ICEBERG_ADDED_STRING_COLUMN, "models", "test_iceberg_base.sql") + + # This should work fine + results = run_dbt(["run", "--select", "test_iceberg_base"]) + assert len(results) == 1 + assert results[0].status == "success" + + def test_iceberg_max_varchar_column_addition(self, project): + """Test that adding VARCHAR with max size to Iceberg tables works correctly.""" + + # First, create the initial table + run_dbt(["run", "--select", "test_iceberg_base"]) + + # Now add a VARCHAR column with max size by updating the model + write_file(_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN, "models", "test_iceberg_base.sql") + + # This should work fine + results = run_dbt(["run", "--select", "test_iceberg_base"]) + assert len(results) == 1 + assert results[0].status == "success" + + +class TestIcebergSchemaChangeIntegration: + + @pytest.fixture(scope="class") + def models(self): + return { + "test_iceberg.sql": _MODEL_ICEBERG_BASE, + } + + def test_reproduce_and_fix_bug(self, project): + + # Step 1: Create the initial incremental iceberg table + results = run_dbt(["run"]) + assert len(results) == 1 + assert results[0].status == "success" + + # Step 2: Modify the model to add new column (this used to fail) + write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg.sql") + + # Step 3: Run dbt build again - this should now work with our fix + results = run_dbt(["run"]) + assert len(results) == 1 + assert results[0].status == "success"