Skip to content

Commit 2b4bb48

Browse files
Fixed mount read failing if cluster no longer exists (#2634)
* Fix mount read failing if cluster no longer exits * fmt * add acceptance test * Add pinned cluster test * lint * a comment * - * put closure inside func * fix bug in func check * address comment * Add docs * Address comments
1 parent 4b0f2ff commit 2b4bb48

File tree

6 files changed

+267
-19
lines changed

6 files changed

+267
-19
lines changed

common/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ func (a *cachedMe) Me(ctx context.Context) (*iam.User, error) {
4242
type DatabricksClient struct {
4343
*client.DatabricksClient
4444

45-
// callback used to create API1.2 call wrapper, which simplifies unit tessting
45+
// callback used to create API1.2 call wrapper, which simplifies unit testing
4646
commandFactory func(context.Context, *DatabricksClient) CommandExecutor
4747
cachedWorkspaceClient *databricks.WorkspaceClient
4848
cachedAccountClient *databricks.AccountClient

docs/resources/mount.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ This resource will [mount your cloud storage](https://docs.databricks.com/data/d
77

88
**Note** When `cluster_id` is not specified, it will create the smallest possible cluster in the default availability zone with name equal to or starting with `terraform-mount` for the shortest possible amount of time. To avoid mount failure due to potentially quota or capacity issues with the default cluster, we recommend specifying a cluster to use for mounting.
99

10+
**Note** CRUD operations on a databricks mount require a running cluster. Due to limitations of terraform and the databricks mounts APIs, if the cluster the mount was most recently created / updated using no longer exists AND the mount is destroyed as a part of a terraform apply, we mark it as deleted without cleaning it up from the workspace.
11+
1012
This resource provides two ways of mounting a storage account:
1113

1214
1. Use a storage-specific configuration block - this could be used for the most cases, as it will fill most of the necessary details. Currently we support following configuration blocks:

internal/acceptance/init_test.go

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,17 @@ func unityAccountLevel(t *testing.T, steps ...step) {
8383
run(t, steps)
8484
}
8585

86+
// A step in a terraform acceptance test
8687
type step struct {
88+
// Terraform HCL for resources to materialize in this test step.
8789
Template string
88-
Check func(*terraform.State) error
90+
91+
// This function is called after the template is applied. Useful for making assertions
92+
// or doing cleanup.
93+
Check func(*terraform.State) error
94+
95+
// Setup function called before the template is materialized.
96+
PreConfig func()
8997

9098
Destroy bool
9199
ExpectNonEmptyPlan bool
@@ -148,7 +156,8 @@ func environmentTemplate(t *testing.T, template string, otherVars ...map[string]
148156
return commands.TrimLeadingWhitespace(template)
149157
}
150158

151-
// Test wrapper over terraform testing framework
159+
// Test wrapper over terraform testing framework. Multiple steps share the same
160+
// terraform state context.
152161
func run(t *testing.T, steps []step) {
153162
cloudEnv := os.Getenv("CLOUD_ENV")
154163
if cloudEnv == "" {
@@ -170,13 +179,7 @@ func run(t *testing.T, steps []step) {
170179
}
171180
ts := []resource.TestStep{}
172181
ctx := context.Background()
173-
type testResource struct {
174-
ID string
175-
Name string
176-
Resource *schema.Resource
177-
}
178182

179-
resourcesEverCreated := map[testResource]bool{}
180183
stepConfig := ""
181184
for i, s := range steps {
182185
if s.Template != "" {
@@ -185,6 +188,7 @@ func run(t *testing.T, steps []step) {
185188
stepNum := i
186189
thisStep := s
187190
stepCheck := thisStep.Check
191+
stepPreConfig := s.PreConfig
188192
ts = append(ts, resource.TestStep{
189193
PreConfig: func() {
190194
if stepConfig == "" {
@@ -193,6 +197,10 @@ func run(t *testing.T, steps []step) {
193197
logger.Infof(ctx, "Test %s (%s) step %d config is:\n%s",
194198
t.Name(), cloudEnv, stepNum,
195199
commands.TrimLeadingWhitespace(stepConfig))
200+
201+
if stepPreConfig != nil {
202+
stepPreConfig()
203+
}
196204
},
197205
Config: stepConfig,
198206
Destroy: s.Destroy,
@@ -205,20 +213,17 @@ func run(t *testing.T, steps []step) {
205213
Check: func(state *terraform.State) error {
206214
// get configured client from provider
207215
client := provider.Meta().(*common.DatabricksClient)
216+
217+
// Default check for all runs. Asserts that the read operation succeeds.
208218
for n, is := range state.RootModule().Resources {
209219
p := strings.Split(n, ".")
220+
221+
// Skip data resources.
210222
if p[0] == "data" {
211223
continue
212224
}
213225
r := provider.ResourcesMap[p[0]]
214-
resourcesEverCreated[testResource{
215-
ID: is.Primary.ID,
216-
Name: p[1],
217-
Resource: r,
218-
}] = true
219-
dia := r.ReadContext(ctx, r.Data(&terraform.InstanceState{
220-
ID: is.Primary.ID,
221-
}), client)
226+
dia := r.ReadContext(ctx, r.Data(is.Primary), client)
222227
if dia != nil {
223228
return fmt.Errorf("%v", dia)
224229
}

internal/acceptance/mounts_test.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package acceptance
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"testing"
7+
8+
"golang.org/x/exp/maps"
9+
10+
"github.com/databricks/databricks-sdk-go"
11+
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
var mountHcl = `
17+
data "databricks_spark_version" "latest" {}
18+
19+
# Test cluster to create the mount using.
20+
resource "databricks_cluster" "this" {
21+
cluster_name = "acc-test-mounts-{var.STICKY_RANDOM}"
22+
spark_version = data.databricks_spark_version.latest.id
23+
instance_pool_id = "{env.TEST_INSTANCE_POOL_ID}"
24+
num_workers = 1
25+
26+
aws_attributes {
27+
instance_profile_arn = "{env.TEST_INSTANCE_PROFILE_ARN}"
28+
}
29+
}
30+
31+
resource "databricks_mount" "my_mount" {
32+
name = "test-mount-{var.STICKY_RANDOM}"
33+
cluster_id = databricks_cluster.this.id
34+
35+
s3 {
36+
bucket_name = "{env.TEST_S3_BUCKET_NAME}"
37+
}
38+
}`
39+
40+
func TestAccCreateDatabricksMount(t *testing.T) {
41+
workspaceLevel(t,
42+
step{
43+
Template: mountHcl,
44+
})
45+
}
46+
47+
func TestAccCreateDatabricksMountIsFineOnClusterRecreate(t *testing.T) {
48+
clusterId1 := ""
49+
clusterId2 := ""
50+
51+
workspaceLevel(t,
52+
// Step 1 creates the cluster and mount.
53+
step{
54+
Template: mountHcl,
55+
Check: func(s *terraform.State) error {
56+
resources := s.RootModule().Resources
57+
cluster := resources["databricks_cluster.this"]
58+
if cluster == nil {
59+
return fmt.Errorf("expected to find databricks_cluster.this in resources keys: %v", maps.Keys(resources))
60+
}
61+
clusterId1 = cluster.Primary.ID
62+
63+
// Assert cluster id is not empty. This is later used to ensure
64+
// the cluster has been recreated.
65+
assert.NotEmpty(t, clusterId1)
66+
67+
// Assert the mount points to the created cluster.
68+
mount := resources["databricks_mount.my_mount"]
69+
assert.Equal(t, clusterId1, mount.Primary.Attributes["cluster_id"])
70+
return nil
71+
},
72+
},
73+
// Step 2: Manually delete the cluster, and then reapply the config. The mount
74+
// will be recreated in this case.
75+
step{
76+
PreConfig: func() {
77+
w, err := databricks.NewWorkspaceClient(&databricks.Config{})
78+
require.NoError(t, err)
79+
err = w.Clusters.PermanentDeleteByClusterId(context.Background(), clusterId1)
80+
assert.NoError(t, err, "failed to delete the cluster, id: "+clusterId1)
81+
},
82+
Template: mountHcl,
83+
Check: func(s *terraform.State) error {
84+
resources := s.RootModule().Resources
85+
cluster := resources["databricks_cluster.this"]
86+
if cluster == nil {
87+
return fmt.Errorf("expected to find databricks_cluster.this in resources keys: %v", maps.Keys(resources))
88+
}
89+
clusterId2 = cluster.Primary.ID
90+
91+
// Assert cluster was indeed recreated
92+
assert.NotEmpty(t, clusterId1)
93+
assert.NotEmpty(t, clusterId2)
94+
assert.NotEqual(t, clusterId1, clusterId2)
95+
96+
// Assert the mount points to the newly recreated cluster.
97+
mount := resources["databricks_mount.my_mount"]
98+
assert.Equal(t, clusterId2, mount.Primary.Attributes["cluster_id"])
99+
return nil
100+
},
101+
},
102+
)
103+
}

storage/resource_mount.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"context"
55
"time"
66

7+
"github.com/databricks/databricks-sdk-go/apierr"
8+
"github.com/databricks/terraform-provider-databricks/clusters"
79
"github.com/databricks/terraform-provider-databricks/common"
810
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
911
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
@@ -37,6 +39,42 @@ func (cb mountCallback) preProcess(r *schema.Resource) func(
3739
}
3840
}
3941

42+
// If a cluster_id is specified for the mount, this function validates that the
43+
// cluster actually exists. If the cluster does not exist, we assume (incorrectly)
44+
// that the mount also does not exist and remove it from the in memory representation
45+
// of the terraform state.
46+
//
47+
// In cases of update requests this would lead to an attempted recreation of the mount.
48+
//
49+
// In cases of delete, this could orphan the mount, ie that the mount still exists
50+
// but is not tracked by the terraform state.
51+
//
52+
// This is needed to resolve issue: https://github.com/databricks/terraform-provider-databricks/issues/1864
53+
// because without a running cluster we do not have a way to do CRUD on mounts.
54+
// The argument being that possibly orphaning mounts is a better experience than
55+
// requiring manual edits to tfstate.
56+
//
57+
// Why do we need a valid cluster_id to be set here?
58+
// Answer: Downstream read and delete code relies on a valid cluster_id being set
59+
// in the terraform state. If the cluster_id is invalid then there is no way to update
60+
// it using native terraform apply workflows. The only workaround is to manually edit the
61+
// tfstate file replacing the existing invalid cluster_id with a new valid one.
62+
func removeFromStateIfClusterDoesNotExist(cb func(context.Context, *schema.ResourceData, any) diag.Diagnostics) func(context.Context, *schema.ResourceData, any) diag.Diagnostics {
63+
return func(ctx context.Context, d *schema.ResourceData, m any) diag.Diagnostics {
64+
clusterId := d.Get("cluster_id").(string)
65+
if clusterId != "" {
66+
clustersAPI := clusters.NewClustersAPI(ctx, m)
67+
_, err := clustersAPI.Get(clusterId)
68+
if apierr.IsMissing(err) {
69+
// Return with empty ID to indicate the resource no longer exists.
70+
d.SetId("")
71+
return nil
72+
}
73+
}
74+
return cb(ctx, d, m)
75+
}
76+
}
77+
4078
func ResourceDatabricksMountSchema() map[string]*schema.Schema {
4179
tpl := GenericMount{}
4280
scm := common.StructToSchema(tpl, func(s map[string]*schema.Schema) map[string]*schema.Schema {
@@ -62,8 +100,8 @@ func ResourceMount() *schema.Resource {
62100
tpl := GenericMount{}
63101
r := commonMountResource(tpl, ResourceDatabricksMountSchema())
64102
r.CreateContext = mountCallback(mountCreate).preProcess(r)
65-
r.ReadContext = mountCallback(mountRead).preProcess(r)
66-
r.DeleteContext = mountCallback(mountDelete).preProcess(r)
103+
r.ReadContext = removeFromStateIfClusterDoesNotExist(mountCallback(mountRead).preProcess(r))
104+
r.DeleteContext = removeFromStateIfClusterDoesNotExist(mountCallback(mountDelete).preProcess(r))
67105
r.Importer = nil
68106
r.Timeouts = &schema.ResourceTimeout{
69107
Default: schema.DefaultTimeout(20 * time.Minute),

0 commit comments

Comments
 (0)