Skip to content

Commit 7bf4755

Browse files
benc-dbclaude
andauthored
fix: Respect varchar and char when using describe extended as json (#1220)
### Description Respect varchar/char, rather than casting to string, when information is available. ### Checklist - [x] I have run this code in development and it appears to resolve the stated issue - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-databricks next" section. --------- Co-authored-by: Claude <[email protected]>
1 parent 53cd1a2 commit 7bf4755

File tree

5 files changed

+127
-12
lines changed

5 files changed

+127
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Fix issue causing MV/STs to always trigger as having their config changed ([1181](http://github.com/databricks/dbt-databricks/pull/1181))
66
- Fix pydantic v2 deprecation warning "Valid config keys have changed in V2" (thanks @Korijn!) ([1194](https://github.com/databricks/dbt-databricks/pull/1194))
77
- Fix snapshots not applying databricks_tags config ([1192](https://github.com/databricks/dbt-databricks/pull/1192))
8+
- Fix to respect varchar and char when using describe extended as json ([1220](https://github.com/databricks/dbt-databricks/pull/1220))
89

910
## dbt-databricks 1.10.12 (September 8, 2025)
1011

dbt/adapters/databricks/column.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,8 @@ def _parse_type_from_json(cls, type_info: Any) -> str:
6464
- map: nested types handled
6565
- decimal: precision, scale handled
6666
- string: collation handled
67-
- varchar: Handled just in case, but the JSON should never contain a varchar type as
68-
these are just STRING types under the hood in Databricks.
69-
- char: Handled just in case, but the JSON should never contain a char type as these are
70-
just STRING types under the hood in Databricks.
67+
- varchar: length handled - preserves varchar(n) in DDL
68+
- char: length handled - preserves char(n) in DDL
7169
7270
Complex types can have other properties in the JSON schema such as nullable, defaults, etc.
7371
but those are ignored as they are not part of data type DDL
@@ -122,10 +120,16 @@ def _parse_type_from_json(cls, type_info: Any) -> str:
122120
return "timestamp"
123121

124122
elif type_name == "varchar":
125-
return "string"
123+
length = type_info.get("length")
124+
if length is not None:
125+
return f"varchar({length})"
126+
return "varchar"
126127

127128
elif type_name == "char":
128-
return "string"
129+
length = type_info.get("length")
130+
if length is not None:
131+
return f"char({length})"
132+
return "char"
129133

130134
else:
131135
# Handle primitive types and any other types

tests/functional/adapter/columns/fixtures.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,32 @@
2323
- name: struct_col
2424
- name: str_col
2525
"""
26+
27+
varchar_char_model = """
28+
{{
29+
config(
30+
materialized='table'
31+
)
32+
}}
33+
select
34+
'hello' as varchar_col,
35+
'world' as char_col,
36+
'regular string' as string_col
37+
"""
38+
39+
varchar_char_schema = """
40+
version: 2
41+
models:
42+
- name: varchar_char_model
43+
config:
44+
materialized: table
45+
contract:
46+
enforced: true
47+
columns:
48+
- name: varchar_col
49+
data_type: varchar(50)
50+
- name: char_col
51+
data_type: char(10)
52+
- name: string_col
53+
data_type: string
54+
"""

tests/functional/adapter/columns/test_get_columns.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from dbt.adapters.databricks.relation import DatabricksRelation
55
from dbt.tests import util
66
from tests.functional.adapter.columns import fixtures
7+
from tests.functional.adapter.fixtures import MaterializationV2Mixin
78

89

910
class ColumnsInRelation:
@@ -76,3 +77,39 @@ def test_columns_in_relation(self, project, expected_columns):
7677
with project.adapter.connection_named("_test"):
7778
actual_columns = project.adapter.get_columns_in_relation(my_relation)
7879
assert actual_columns == expected_columns
80+
81+
82+
class TestVarcharCharTypePreservation(MaterializationV2Mixin):
83+
"""Test that varchar and char types preserve their length constraints with mat v2."""
84+
85+
@pytest.fixture(scope="class")
86+
def models(self):
87+
return {
88+
"varchar_char_model.sql": fixtures.varchar_char_model,
89+
"schema.yml": fixtures.varchar_char_schema,
90+
}
91+
92+
@pytest.fixture(scope="class", autouse=True)
93+
def setup(self, project):
94+
util.run_dbt(["debug", "--connection"])
95+
util.run_dbt(["run"])
96+
97+
@pytest.fixture(scope="class")
98+
def expected_columns(self):
99+
return [
100+
DatabricksColumn(column="varchar_col", dtype="varchar(50)"),
101+
DatabricksColumn(column="char_col", dtype="char(10)"),
102+
DatabricksColumn(column="string_col", dtype="string"),
103+
]
104+
105+
def test_varchar_char_columns(self, project, expected_columns):
106+
my_relation = DatabricksRelation.create(
107+
database=project.database,
108+
schema=project.test_schema,
109+
identifier="varchar_char_model",
110+
type=DatabricksRelation.Table,
111+
)
112+
113+
with project.adapter.connection_named("_test"):
114+
actual_columns = project.adapter.get_columns_in_relation(my_relation)
115+
assert actual_columns == expected_columns

tests/unit/test_column.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,40 @@ def test_from_json_metadata_happy_path(self):
130130
assert result[2].dtype == "struct<field1:string,field2:int>"
131131
assert result[2].comment is None
132132

133+
def test_from_json_metadata_with_varchar_and_char(self):
134+
"""Test from_json_metadata properly handles varchar and char with length"""
135+
json_metadata = json.dumps(
136+
{
137+
"columns": [
138+
{"name": "id", "type": {"name": "bigint"}, "comment": "Primary key"},
139+
{
140+
"name": "code",
141+
"type": {"name": "char", "length": 5},
142+
"comment": "Fixed code",
143+
},
144+
{
145+
"name": "description",
146+
"type": {"name": "varchar", "length": 100},
147+
"comment": "Variable description",
148+
},
149+
]
150+
}
151+
)
152+
153+
result = DatabricksColumn.from_json_metadata(json_metadata)
154+
155+
assert len(result) == 3
156+
assert result[0].column == "id"
157+
assert result[0].dtype == "bigint"
158+
159+
assert result[1].column == "code"
160+
assert result[1].dtype == "char(5)"
161+
assert result[1].comment == "Fixed code"
162+
163+
assert result[2].column == "description"
164+
assert result[2].dtype == "varchar(100)"
165+
assert result[2].comment == "Variable description"
166+
133167
def test_from_json_metadata_empty_columns(self):
134168
"""Test from_json_metadata with empty columns list"""
135169
json_metadata = json.dumps({"columns": []})
@@ -221,19 +255,29 @@ def test_parse_type_from_json_string_without_collation(self):
221255
result = DatabricksColumn._parse_type_from_json(type_info)
222256
assert result == "string"
223257

224-
def test_parse_type_from_json_varchar(self):
258+
def test_parse_type_from_json_varchar_with_length(self):
225259
type_info = {"name": "varchar", "length": 10}
226260

227261
result = DatabricksColumn._parse_type_from_json(type_info)
228-
# varchar is just a string in Databricks
229-
assert result == "string"
262+
assert result == "varchar(10)"
230263

231-
def test_parse_type_from_json_char(self):
264+
def test_parse_type_from_json_varchar_without_length(self):
265+
type_info = {"name": "varchar"}
266+
267+
result = DatabricksColumn._parse_type_from_json(type_info)
268+
assert result == "varchar"
269+
270+
def test_parse_type_from_json_char_with_length(self):
232271
type_info = {"name": "char", "length": 10}
233272

234273
result = DatabricksColumn._parse_type_from_json(type_info)
235-
# char is just a string in Databricks
236-
assert result == "string"
274+
assert result == "char(10)"
275+
276+
def test_parse_type_from_json_char_without_length(self):
277+
type_info = {"name": "char"}
278+
279+
result = DatabricksColumn._parse_type_from_json(type_info)
280+
assert result == "char"
237281

238282
def test_parse_type_from_json_primitive_types(self):
239283
"""Test _parse_type_from_json with various primitive types"""

0 commit comments

Comments
 (0)