Skip to content

Commit 06b6938

Browse files
authored
[Feature] add automatic clustering support for databricks_sql_table (#4607)
## Changes - Allow setting `cluster_keys` to `auto` for automatic clustering ## Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] `make test` run locally - [x] relevant change in `docs/` folder - [x] covered with integration tests in `internal/acceptance` - [x] using Go SDK
1 parent 9aa72ad commit 06b6938

File tree

5 files changed

+86
-11
lines changed

5 files changed

+86
-11
lines changed

NEXT_CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### New Features and Improvements
66

7+
* Add automatic clustering support for `databricks_sql_table` ([#4607](https://github.com/databricks/terraform-provider-databricks/pull/4607))
8+
79
### Bug Fixes
810

911
* Suppress diff in `databricks_mlflow_experiment` name ([#4606](https://github.com/databricks/terraform-provider-databricks/pull/4606))

catalog/resource_sql_table.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ func (ti *SqlTableInfo) buildTableCreateStatement() string {
304304
}
305305

306306
if len(ti.ClusterKeys) > 0 {
307-
statements = append(statements, fmt.Sprintf("\nCLUSTER BY (%s)", ti.getWrappedClusterKeys())) // CLUSTER BY (`university`, `major`)
307+
statements = append(statements, fmt.Sprintf("\nCLUSTER BY %s", ti.getWrappedClusterKeys())) // CLUSTER BY (`university`, `major`)
308308
}
309309

310310
if ti.Comment != "" {
@@ -339,7 +339,14 @@ func (ci SqlColumnInfo) getWrappedColumnName() string {
339339

340340
// Wrapping column name with backticks to avoid special character messing things up.
341341
func (ti *SqlTableInfo) getWrappedClusterKeys() string {
342-
return "`" + strings.Join(ti.ClusterKeys, "`,`") + "`"
342+
if len(ti.ClusterKeys) == 1 {
343+
clusterKey := strings.ToUpper(ti.ClusterKeys[0])
344+
// If the cluster key is AUTO or NONE, we don't need to wrap it with backticks.
345+
if slices.Contains([]string{"AUTO", "NONE"}, clusterKey) {
346+
return clusterKey
347+
}
348+
}
349+
return "(`" + strings.Join(ti.ClusterKeys, "`,`") + "`)"
343350
}
344351

345352
func (ti *SqlTableInfo) getStatementsForColumnDiffs(oldti *SqlTableInfo, statements []string, typestring string) []string {
@@ -429,7 +436,7 @@ func (ti *SqlTableInfo) diff(oldti *SqlTableInfo) ([]string, error) {
429436
}
430437
equal := slices.Equal(ti.ClusterKeys, oldti.ClusterKeys)
431438
if !equal {
432-
statements = append(statements, fmt.Sprintf("ALTER TABLE %s CLUSTER BY (%s)", ti.SQLFullName(), ti.getWrappedClusterKeys()))
439+
statements = append(statements, fmt.Sprintf("ALTER TABLE %s CLUSTER BY %s", ti.SQLFullName(), ti.getWrappedClusterKeys()))
433440
}
434441
}
435442

catalog/resource_sql_table_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,26 @@ func TestResourceSqlTableCreateStatement_Liquid(t *testing.T) {
161161
assert.Contains(t, stmt, "CLUSTER BY (`baz`,`bazz`)")
162162
}
163163

164+
func TestResourceSqlTableCreateStatement_AutoLiquid(t *testing.T) {
165+
ti := &SqlTableInfo{
166+
Name: "bar",
167+
CatalogName: "main",
168+
SchemaName: "foo",
169+
TableType: "EXTERNAL",
170+
DataSourceFormat: "DELTA",
171+
StorageLocation: "s3://ext-main/foo/bar1",
172+
StorageCredentialName: "somecred",
173+
Comment: "terraform managed",
174+
ClusterKeys: []string{"auto"},
175+
}
176+
stmt := ti.buildTableCreateStatement()
177+
assert.Contains(t, stmt, "CREATE EXTERNAL TABLE `main`.`foo`.`bar`")
178+
assert.Contains(t, stmt, "USING DELTA")
179+
assert.Contains(t, stmt, "LOCATION 's3://ext-main/foo/bar1' WITH (CREDENTIAL `somecred`)")
180+
assert.Contains(t, stmt, "COMMENT 'terraform managed'")
181+
assert.Contains(t, stmt, "CLUSTER BY AUTO")
182+
}
183+
164184
func TestResourceSqlTableSerializeProperties(t *testing.T) {
165185
ti := &SqlTableInfo{
166186
Properties: map[string]string{

docs/resources/sql_table.md

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ resource "databricks_sql_table" "thing" {
3737
catalog_name = databricks_catalog.sandbox.name
3838
schema_name = databricks_schema.things.name
3939
table_type = "MANAGED"
40-
data_source_format = "DELTA"
41-
storage_location = ""
4240
4341
column {
4442
name = "id"
@@ -81,8 +79,6 @@ resource "databricks_sql_table" "thing" {
8179
catalog_name = databricks_catalog.sandbox.name
8280
schema_name = databricks_schema.things.name
8381
table_type = "MANAGED"
84-
data_source_format = "DELTA"
85-
storage_location = ""
8682
warehouse_id = databricks_sql_endpoint.this.id
8783
8884
column {
@@ -131,12 +127,10 @@ resource "databricks_schema" "things" {
131127
}
132128
resource "databricks_sql_table" "thing" {
133129
provider = databricks.workspace
134-
name = "quickstart_table"
130+
name = "identity_table"
135131
catalog_name = databricks_catalog.sandbox.name
136132
schema_name = databricks_schema.things.name
137133
table_type = "MANAGED"
138-
data_source_format = "DELTA"
139-
storage_location = ""
140134
column {
141135
name = "id"
142136
type = "bigint"
@@ -151,6 +145,26 @@ resource "databricks_sql_table" "thing" {
151145
}
152146
```
153147

148+
## Enable automatic clustering
149+
150+
```hcl
151+
resource "databricks_sql_table" "thing" {
152+
provider = databricks.workspace
153+
name = "auto_cluster_table"
154+
catalog_name = databricks_catalog.sandbox.name
155+
schema_name = databricks_schema.things.name
156+
table_type = "MANAGED"
157+
cluster_keys = ["AUTO"]
158+
159+
column {
160+
name = "name"
161+
type = "string"
162+
comment = "name of thing"
163+
}
164+
comment = "this table is managed by terraform"
165+
}
166+
```
167+
154168
## Argument Reference
155169

156170
The following arguments are supported:
@@ -164,7 +178,7 @@ The following arguments are supported:
164178
* `view_definition` - (Optional) SQL text defining the view (for `table_type == "VIEW"`). Not supported for `MANAGED` or `EXTERNAL` table_type.
165179
* `cluster_id` - (Optional) All table CRUD operations must be executed on a running cluster or SQL warehouse. If a cluster_id is specified, it will be used to execute SQL commands to manage this table. If empty, a cluster will be created automatically with the name `terraform-sql-table`. Conflicts with `warehouse_id`.
166180
* `warehouse_id` - (Optional) All table CRUD operations must be executed on a running cluster or SQL warehouse. If a `warehouse_id` is specified, that SQL warehouse will be used to execute SQL commands to manage this table. Conflicts with `cluster_id`.
167-
* `cluster_keys` - (Optional) a subset of columns to liquid cluster the table by. Conflicts with `partitions`.
181+
* `cluster_keys` - (Optional) a subset of columns to liquid cluster the table by. For automatic clustering, set `cluster_keys` to `["AUTO"]`. To turn off clustering, set it to `["NONE"]`. Conflicts with `partitions`.
168182
* `partitions` - (Optional) a subset of columns to partition the table by. Change forces the creation of a new resource. Conflicts with `cluster_keys`.
169183
* `storage_credential_name` - (Optional) For EXTERNAL Tables only: the name of storage credential to use. Change forces the creation of a new resource.
170184
* `owner` - (Optional) User name/group name/sp application_id of the table owner.

sql/sql_table_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,38 @@ func TestUcAccResourceSqlTable_Liquid(t *testing.T) {
326326
cluster_keys = ["id", "name"]
327327
comment = "this table is managed by terraform..."
328328
}`,
329+
}, acceptance.Step{
330+
Template: `
331+
resource "databricks_schema" "this" {
332+
name = "{var.STICKY_RANDOM}"
333+
catalog_name = "main"
334+
}
335+
336+
resource "databricks_sql_table" "this" {
337+
name = "bar"
338+
catalog_name = "main"
339+
schema_name = databricks_schema.this.name
340+
table_type = "MANAGED"
341+
warehouse_id = "{env.TEST_DEFAULT_WAREHOUSE_ID}"
342+
properties = {
343+
them = "that"
344+
something = "else"
345+
}
346+
options = {
347+
this = "blue"
348+
that = "green"
349+
}
350+
column {
351+
name = "id"
352+
type = "int"
353+
}
354+
column {
355+
name = "name"
356+
type = "varchar(64)"
357+
}
358+
cluster_keys = ["auto"]
359+
comment = "this table is managed by terraform..."
360+
}`,
329361
})
330362
}
331363

0 commit comments

Comments
 (0)