Skip to content

Commit 61221f4

Browse files
override default varchar datatype when altering iceberg table columns (#1257)
1 parent acd3177 commit 61221f4

File tree

3 files changed

+185
-1
lines changed

3 files changed

+185
-1
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Fixes
2+
body: fix issue where incorrect varchar size is applied for iceberg table columns
3+
time: 2025-08-12T10:19:59.653331-07:00
4+
custom:
5+
Author: colin-rogers-dbt
6+
Issue: "1257"

dbt-snowflake/src/dbt/include/snowflake/macros/adapters.sql

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,35 @@
162162
{% endmacro %}
163163

164164

165+
{% macro snowflake__get_column_data_type_for_alter(relation, column) %}
166+
{#
167+
Helper macro to get the correct data type for ALTER TABLE operations.
168+
For Iceberg tables, we need to handle VARCHAR constraints differently because
169+
Snowflake Iceberg tables only support max length (134,217,728) or STRING directly.
170+
171+
This fixes the bug where dbt generates VARCHAR(16777216) for new columns which
172+
is not supported by Snowflake Iceberg tables.
173+
#}
174+
{% if relation.is_iceberg_format and column.is_string() %}
175+
{% set data_type = column.data_type.upper() %}
176+
{% if data_type.startswith('CHARACTER VARYING') or data_type.startswith('VARCHAR') %}
177+
{#
178+
For Iceberg tables, convert any VARCHAR specification to STRING.
179+
This handles cases where:
180+
- dbt auto-generates VARCHAR(16777216) for columns without explicit size
181+
- users specify VARCHAR with any size (even the max 134217728)
182+
Using STRING is more compatible and avoids size-related errors.
183+
#}
184+
STRING
185+
{% else %}
186+
{# Keep other string types like TEXT as-is #}
187+
{{ column.data_type }}
188+
{% endif %}
189+
{% else %}
190+
{{ column.data_type }}
191+
{% endif %}
192+
{% endmacro %}
193+
165194
{% macro snowflake__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %}
166195

167196
{% if relation.is_dynamic_table -%}
@@ -175,7 +204,7 @@
175204
{% set sql -%}
176205
alter {{ relation.get_ddl_prefix_for_alter() }} {{ relation_type }} {{ relation.render() }} add column
177206
{% for column in add_columns %}
178-
{{ adapter.quote(column.name) }} {{ column.data_type }}{{ ',' if not loop.last }}
207+
{{ adapter.quote(column.name) }} {{ snowflake__get_column_data_type_for_alter(relation, column) }}{{ ',' if not loop.last }}
179208
{% endfor %}
180209
{%- endset -%}
181210

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import pytest
2+
from dbt.tests.util import run_dbt, write_file
3+
4+
5+
_MODEL_ICEBERG_BASE = """
6+
{{
7+
config(
8+
materialized="incremental",
9+
table_format="iceberg",
10+
external_volume="s3_iceberg_snow",
11+
on_schema_change="append_new_columns"
12+
)
13+
}}
14+
15+
select 1 as id,
16+
cast('John' as varchar) as first_name
17+
"""
18+
19+
_MODEL_ICEBERG_ADDED_COLUMN = """
20+
{{
21+
config(
22+
materialized="incremental",
23+
table_format="iceberg",
24+
external_volume="s3_iceberg_snow",
25+
on_schema_change="append_new_columns"
26+
)
27+
}}
28+
29+
select 1 as id,
30+
cast('John' as varchar) as first_name,
31+
cast('Smith' as varchar) as last_name
32+
"""
33+
34+
_MODEL_ICEBERG_ADDED_STRING_COLUMN = """
35+
{{
36+
config(
37+
materialized="incremental",
38+
table_format="iceberg",
39+
external_volume="s3_iceberg_snow",
40+
on_schema_change="append_new_columns"
41+
)
42+
}}
43+
44+
select 1 as id,
45+
cast('John' as varchar) as first_name,
46+
cast('Smith' as string) as last_name
47+
"""
48+
49+
_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN = """
50+
{{
51+
config(
52+
materialized="incremental",
53+
table_format="iceberg",
54+
external_volume="ICEBERG_SANDBOX",
55+
catalog="SNOWFLAKE",
56+
on_schema_change="append_new_columns"
57+
)
58+
}}
59+
60+
select 1 as id,
61+
cast('John' as varchar) as first_name,
62+
cast('Smith' as varchar(134217728)) as last_name
63+
"""
64+
65+
66+
class TestIcebergSchemaChange:
67+
"""
68+
Test schema changes with Iceberg tables to ensure VARCHAR columns work correctly.
69+
70+
This tests the fix for the bug where adding VARCHAR columns to Iceberg tables
71+
fails because dbt generates VARCHAR(16777216) which is not supported by Snowflake
72+
Iceberg tables. The fix should use STRING instead for Iceberg tables.
73+
"""
74+
75+
@pytest.fixture(scope="class")
76+
def models(self):
77+
return {
78+
"test_iceberg_base.sql": _MODEL_ICEBERG_BASE,
79+
}
80+
81+
def test_iceberg_varchar_column_addition(self, project):
82+
"""Test that adding VARCHAR columns to Iceberg tables works correctly."""
83+
84+
# First, create the initial table
85+
run_dbt(["run", "--select", "test_iceberg_base"])
86+
87+
# Verify the table was created successfully
88+
results = run_dbt(["run", "--select", "test_iceberg_base"])
89+
assert len(results) == 1
90+
91+
# Now add a VARCHAR column by updating the model
92+
write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg_base.sql")
93+
94+
# This should not fail with the varchar size error
95+
results = run_dbt(["run", "--select", "test_iceberg_base"])
96+
assert len(results) == 1
97+
assert results[0].status == "success"
98+
99+
def test_iceberg_string_column_addition(self, project):
100+
"""Test that adding STRING columns to Iceberg tables works correctly."""
101+
102+
# First, create the initial table
103+
run_dbt(["run", "--select", "test_iceberg_base"])
104+
105+
# Now add a STRING column by updating the model
106+
write_file(_MODEL_ICEBERG_ADDED_STRING_COLUMN, "models", "test_iceberg_base.sql")
107+
108+
# This should work fine
109+
results = run_dbt(["run", "--select", "test_iceberg_base"])
110+
assert len(results) == 1
111+
assert results[0].status == "success"
112+
113+
def test_iceberg_max_varchar_column_addition(self, project):
114+
"""Test that adding VARCHAR with max size to Iceberg tables works correctly."""
115+
116+
# First, create the initial table
117+
run_dbt(["run", "--select", "test_iceberg_base"])
118+
119+
# Now add a VARCHAR column with max size by updating the model
120+
write_file(_MODEL_ICEBERG_ADDED_SIZED_VARCHAR_COLUMN, "models", "test_iceberg_base.sql")
121+
122+
# This should work fine
123+
results = run_dbt(["run", "--select", "test_iceberg_base"])
124+
assert len(results) == 1
125+
assert results[0].status == "success"
126+
127+
128+
class TestIcebergSchemaChangeIntegration:
129+
130+
@pytest.fixture(scope="class")
131+
def models(self):
132+
return {
133+
"test_iceberg.sql": _MODEL_ICEBERG_BASE,
134+
}
135+
136+
def test_reproduce_and_fix_bug(self, project):
137+
138+
# Step 1: Create the initial incremental iceberg table
139+
results = run_dbt(["run"])
140+
assert len(results) == 1
141+
assert results[0].status == "success"
142+
143+
# Step 2: Modify the model to add new column (this used to fail)
144+
write_file(_MODEL_ICEBERG_ADDED_COLUMN, "models", "test_iceberg.sql")
145+
146+
# Step 3: Run dbt build again - this should now work with our fix
147+
results = run_dbt(["run"])
148+
assert len(results) == 1
149+
assert results[0].status == "success"

0 commit comments

Comments
 (0)