Skip to content

Commit d3a6204

Browse files
authored
Exporter: add support for databricks_file resource (#3301)
* Exporter: add support for `databricks_file` resource Only files explicitly referenced as init scripts & libraries are exported. * Added tests plus some refactoring
1 parent 48e157b commit d3a6204

File tree

4 files changed

+164
-21
lines changed

4 files changed

+164
-21
lines changed

docs/guides/experimental-exporter.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ Services are just logical groups of resources used for filtering and organizatio
7979
* `sql-dashboards` - **listing** [databricks_sql_dashboard](../resources/sql_dashboard.md) along with associated [databricks_sql_widget](../resources/sql_widget.md) and [databricks_sql_visualization](../resources/sql_visualization.md).
8080
* `sql-endpoints` - **listing** [databricks_sql_endpoint](../resources/sql_endpoint.md) along with [databricks_sql_global_config](../resources/sql_global_config.md).
8181
* `sql-queries` - **listing** [databricks_sql_query](../resources/sql_query.md).
82-
* `storage` - only [databricks_dbfs_file](../resources/dbfs_file.md) referenced in other resources (libraries, init scripts, ...) will be downloaded locally and properly arranged into terraform state.
82+
* `storage` - only [databricks_dbfs_file](../resources/dbfs_file.md) and [databricks_file](../resources/file.md) referenced in other resources (libraries, init scripts, ...) will be downloaded locally and properly arranged into terraform state.
8383
* `uc-artifact-allowlist` - **listing** exports [databricks_artifact_allowlist](../resources/artifact_allowlist.md) resources for Unity Catalog Allow Lists attached to the current metastore.
8484
* `uc-catalogs` - **listing** [databricks_catalog](../resources/catalog.md) and [databricks_catalog_workspace_binding](../resources/catalog_workspace_binding.md)
8585
* `uc-connections` - **listing** [databricks_connection](../resources/connection.md). *Please note that because API doesn't return sensitive fields, such as, passwords, tokens, ..., the generated `options` block could be incomplete!*
@@ -124,6 +124,7 @@ Exporter aims to generate HCL code for most of the resources within the Databric
124124
| [databricks_connection](../resources/connection.md) | Yes | Yes | Yes | No |
125125
| [databricks_dbfs_file](../resources/dbfs_file.md) | Yes | No | Yes | No |
126126
| [databricks_external_location](../resources/external_location.md) | Yes | Yes | Yes | No |
127+
| [databricks_file](../resources/file.md) | Yes | No | Yes | No |
127128
| [databricks_global_init_script](../resources/global_init_script.md) | Yes | Yes | Yes | No |
128129
| [databricks_grants](../resources/grants.md) | Yes | No | Yes | No |
129130
| [databricks_group](../resources/group.md) | Yes | No | Yes | Yes |

exporter/importables.go

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/base64"
66
"encoding/json"
77
"fmt"
8+
"io"
89
"log"
910
"reflect"
1011
"regexp"
@@ -175,7 +176,7 @@ var resourcesMap map[string]importable = map[string]importable{
175176
return err
176177
}
177178
name := ic.Importables["databricks_dbfs_file"].Name(ic, r.Data)
178-
fileName, err := ic.createFile(name, content)
179+
fileName, err := ic.saveFileIn("dbfs_files", name, content)
179180
log.Printf("Creating %s for %s", fileName, r)
180181
if err != nil {
181182
return err
@@ -276,10 +277,13 @@ var resourcesMap map[string]importable = map[string]importable{
276277
{Path: "instance_pool_id", Resource: "databricks_instance_pool"},
277278
{Path: "driver_instance_pool_id", Resource: "databricks_instance_pool"},
278279
{Path: "init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
280+
{Path: "init_scripts.volumes.destination", Resource: "databricks_file"},
279281
{Path: "init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
280282
{Path: "library.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
283+
{Path: "library.jar", Resource: "databricks_file"},
281284
{Path: "library.jar", Resource: "databricks_workspace_file", Match: "workspace_path"},
282285
{Path: "library.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
286+
{Path: "library.whl", Resource: "databricks_file"},
283287
{Path: "library.whl", Resource: "databricks_workspace_file", Match: "workspace_path"},
284288
{Path: "library.egg", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
285289
{Path: "library.egg", Resource: "databricks_workspace_file", Match: "workspace_path"},
@@ -362,6 +366,7 @@ var resourcesMap map[string]importable = map[string]importable{
362366
{Path: "job_cluster.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
363367
{Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
364368
{Path: "job_cluster.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
369+
{Path: "job_cluster.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
365370
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
366371
{Path: "job_cluster.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
367372
{Path: "job_cluster.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
@@ -371,20 +376,25 @@ var resourcesMap map[string]importable = map[string]importable{
371376
{Path: "task.library.egg", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
372377
{Path: "task.library.egg", Resource: "databricks_workspace_file", Match: "workspace_path"},
373378
{Path: "task.library.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
379+
{Path: "task.library.jar", Resource: "databricks_file"},
374380
{Path: "task.library.jar", Resource: "databricks_workspace_file", Match: "workspace_path"},
375381
{Path: "task.library.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
382+
{Path: "task.library.whl", Resource: "databricks_file"},
376383
{Path: "task.library.whl", Resource: "databricks_workspace_file", Match: "workspace_path"},
377384
{Path: "task.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
378385
{Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
379386
{Path: "task.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
387+
{Path: "task.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
380388
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
381389
{Path: "task.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
382390
{Path: "task.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
383391
{Path: "task.notebook_task.base_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
392+
{Path: "task.notebook_task.base_parameters", Resource: "databricks_file"},
384393
{Path: "task.notebook_task.base_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
385394
{Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook"},
386395
{Path: "task.pipeline_task.pipeline_id", Resource: "databricks_pipeline"},
387396
{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
397+
{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_file"},
388398
{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
389399
{Path: "task.python_wheel_task.parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
390400
{Path: "task.python_wheel_task.parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
@@ -393,11 +403,13 @@ var resourcesMap map[string]importable = map[string]importable{
393403
{Path: "task.run_job_task.job_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
394404
{Path: "task.spark_jar_task.jar_uri", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
395405
{Path: "task.spark_jar_task.parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
406+
{Path: "task.spark_jar_task.parameters", Resource: "databricks_file"},
396407
{Path: "task.spark_jar_task.parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
397408
{Path: "task.spark_python_task.parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
398409
{Path: "task.spark_python_task.python_file", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
399410
{Path: "task.spark_python_task.python_file", Resource: "databricks_workspace_file", Match: "path"},
400411
{Path: "task.spark_submit_task.parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
412+
{Path: "task.spark_submit_task.parameters", Resource: "databricks_file"},
401413
{Path: "task.spark_submit_task.parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
402414
{Path: "task.sql_task.file.path", Resource: "databricks_workspace_file", Match: "path"},
403415
{Path: "task.dbt_task.project_directory", Resource: "databricks_directory", Match: "path"},
@@ -738,6 +750,10 @@ var resourcesMap map[string]importable = map[string]importable{
738750
strings.HasSuffix(k, ".dbfs.destination") {
739751
ic.emitIfDbfsFile(eitherString(value, defaultValue))
740752
}
753+
if typ == "fixed" && strings.HasPrefix(k, "init_scripts.") &&
754+
strings.HasSuffix(k, ".volumes.destination") {
755+
ic.emitIfVolumeFile(eitherString(value, defaultValue))
756+
}
741757
if typ == "fixed" && strings.HasPrefix(k, "init_scripts.") &&
742758
strings.HasSuffix(k, ".workspace.destination") {
743759
ic.emitWorkspaceFileOrRepo(eitherString(value, defaultValue))
@@ -760,6 +776,8 @@ var resourcesMap map[string]importable = map[string]importable{
760776
ic.emitIfWsfsFile(lib.Whl)
761777
ic.emitIfWsfsFile(lib.Jar)
762778
ic.emitIfWsfsFile(lib.Egg)
779+
ic.emitIfVolumeFile(lib.Whl)
780+
ic.emitIfVolumeFile(lib.Jar)
763781
}
764782

765783
policyFamilyId := clusterPolicy.PolicyFamilyId
@@ -784,8 +802,10 @@ var resourcesMap map[string]importable = map[string]importable{
784802
},
785803
Depends: []reference{
786804
{Path: "libraries.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
805+
{Path: "libraries.jar", Resource: "databricks_file"},
787806
{Path: "libraries.jar", Resource: "databricks_workspace_file", Match: "workspace_path"},
788807
{Path: "libraries.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
808+
{Path: "libraries.whl", Resource: "databricks_file"},
789809
{Path: "libraries.whl", Resource: "databricks_workspace_file", Match: "workspace_path"},
790810
{Path: "libraries.egg", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
791811
{Path: "libraries.egg", Resource: "databricks_workspace_file", Match: "workspace_path"},
@@ -1280,7 +1300,7 @@ var resourcesMap map[string]importable = map[string]importable{
12801300
if err != nil {
12811301
return err
12821302
}
1283-
fileName, err := ic.createFile(fmt.Sprintf("%s.sh", r.Name), content)
1303+
fileName, err := ic.saveFileIn("global_init_scripts", fmt.Sprintf("%s.sh", ic.ResourceName(r)), content)
12841304
if err != nil {
12851305
return err
12861306
}
@@ -1479,7 +1499,7 @@ var resourcesMap map[string]importable = map[string]importable{
14791499
objectId := r.Data.Get("object_id").(int)
14801500
name := fileNameNormalizationRegex.ReplaceAllString(r.ID[1:], "_") + "_" + strconv.Itoa(objectId) + fileExtension
14811501
content, _ := base64.StdEncoding.DecodeString(contentB64)
1482-
fileName, err := ic.createFileIn("notebooks", name, []byte(content))
1502+
fileName, err := ic.saveFileIn("notebooks", name, []byte(content))
14831503
if err != nil {
14841504
return err
14851505
}
@@ -1540,7 +1560,7 @@ var resourcesMap map[string]importable = map[string]importable{
15401560
}
15411561
name := fileNameNormalizationRegex.ReplaceAllString(strings.Join(parts, "/")[1:], "_")
15421562
content, _ := base64.StdEncoding.DecodeString(contentB64)
1543-
fileName, err := ic.createFileIn("workspace_files", name, []byte(content))
1563+
fileName, err := ic.saveFileIn("workspace_files", name, []byte(content))
15441564
if err != nil {
15451565
return err
15461566
}
@@ -1994,11 +2014,13 @@ var resourcesMap map[string]importable = map[string]importable{
19942014
Depends: []reference{
19952015
{Path: "cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
19962016
{Path: "cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
2017+
{Path: "cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
19972018
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
19982019
{Path: "cluster.instance_pool_id", Resource: "databricks_instance_pool"},
19992020
{Path: "cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
20002021
{Path: "cluster.policy_id", Resource: "databricks_cluster_policy"},
20012022
{Path: "configuration", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
2023+
{Path: "configuration", Resource: "databricks_file"},
20022024
{Path: "configuration", Resource: "databricks_workspace_file", Match: "workspace_path"},
20032025
{Path: "library.notebook.path", Resource: "databricks_notebook"},
20042026
{Path: "library.file.path", Resource: "databricks_workspace_file"},
@@ -2906,4 +2928,58 @@ var resourcesMap map[string]importable = map[string]importable{
29062928
{Path: "securable_name", Resource: "databricks_catalog", Match: "name"},
29072929
},
29082930
},
2931+
"databricks_file": {
2932+
WorkspaceLevel: true,
2933+
Service: "storage",
2934+
// TODO: can we implement incremental mode?
2935+
Name: func(ic *importContext, d *schema.ResourceData) string {
2936+
name := strings.TrimPrefix(d.Id(), "/Volumes/")
2937+
fileNameMd5 := fmt.Sprintf("%x", md5.Sum([]byte(name)))
2938+
return strings.ToLower(name) + "_" + fileNameMd5[:8]
2939+
},
2940+
Import: func(ic *importContext, r *resource) error {
2941+
parts := strings.Split(r.ID, "/")
2942+
// Converting /Volumes/<catalog>/<schema>/<table>/<file> to <catalog>.<schema>.<table>
2943+
if len(parts) > 5 {
2944+
volumeId := strings.Join(parts[2:5], ".")
2945+
ic.Emit(&resource{
2946+
Resource: "databricks_volume",
2947+
ID: volumeId,
2948+
})
2949+
}
2950+
2951+
// download & store file
2952+
resp, err := ic.workspaceClient.Files.DownloadByFilePath(ic.Context, r.ID)
2953+
if err != nil {
2954+
return err
2955+
}
2956+
// write file
2957+
fileName := ic.prefix + fileNameNormalizationRegex.ReplaceAllString(strings.TrimPrefix(r.ID, "/Volumes/"), "_")
2958+
local, relativeName, err := ic.createFileIn("uc_files", fileName)
2959+
if err != nil {
2960+
return err
2961+
}
2962+
defer local.Close()
2963+
defer resp.Contents.Close()
2964+
_, err = io.Copy(local, resp.Contents)
2965+
if err != nil {
2966+
return err
2967+
}
2968+
r.Data.Set("source", relativeName)
2969+
r.Data.Set("path", r.ID)
2970+
2971+
return nil
2972+
},
2973+
ShouldOmitField: func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool {
2974+
switch pathString {
2975+
case "md5", "remote_file_modified", "modification_time", "file_size":
2976+
return true
2977+
}
2978+
return defaultShouldOmitFieldFunc(ic, pathString, as, d)
2979+
},
2980+
Depends: []reference{
2981+
{Path: "source", File: true},
2982+
{Path: "path", Resource: "databricks_volume", Match: "volume_path", MatchType: MatchLongestPrefix},
2983+
},
2984+
},
29092985
}

exporter/importables_test.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,7 +1231,7 @@ func TestGlobalInitScriptGeneration(t *testing.T) {
12311231
ic.generateAndWriteResources(nil)
12321232
assert.Equal(t, commands.TrimLeadingWhitespace(`
12331233
resource "databricks_global_init_script" "new_importing_things" {
1234-
source = "${path.module}/files/new_importing_things.sh"
1234+
source = "${path.module}/global_init_scripts/new_importing_things.sh"
12351235
name = "New: Importing ^ Things"
12361236
enabled = true
12371237
}`), getGeneratedFile(ic, "workspace"))
@@ -1298,7 +1298,7 @@ func TestDbfsFileGeneration(t *testing.T) {
12981298
ic.generateAndWriteResources(nil)
12991299
assert.Equal(t, commands.TrimLeadingWhitespace(`
13001300
resource "databricks_dbfs_file" "_0cc175b9c0f1b6a831c399e269772661_a" {
1301-
source = "${path.module}/files/_0cc175b9c0f1b6a831c399e269772661_a"
1301+
source = "${path.module}/dbfs_files/_0cc175b9c0f1b6a831c399e269772661_a"
13021302
path = "a"
13031303
}`), getGeneratedFile(ic, "storage"))
13041304
})
@@ -2128,3 +2128,41 @@ func TestAuxUcFunctions(t *testing.T) {
21282128
assert.False(t, shouldOmitFunc(nil, "isolation_mode", scm["isolation_mode"], d))
21292129
assert.False(t, shouldOmitFunc(nil, "name", scm["name"], d))
21302130
}
2131+
2132+
func TestImportUcVolumeFile(t *testing.T) {
2133+
qa.HTTPFixturesApply(t, []qa.HTTPFixture{
2134+
{
2135+
ReuseRequest: true,
2136+
Method: "GET",
2137+
Resource: "/api/2.0/fs/files/Volumes/main/default/wheels/some.whl?",
2138+
Response: "test",
2139+
},
2140+
}, func(ctx context.Context, client *common.DatabricksClient) {
2141+
ic := importContextForTestWithClient(ctx, client)
2142+
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
2143+
defer os.RemoveAll(tmpDir)
2144+
os.Mkdir(tmpDir, 0700)
2145+
ic.Directory = tmpDir
2146+
ic.enableServices("storage")
2147+
ic.currentMetastore = currentMetastoreResponse
2148+
2149+
file_path := "/Volumes/main/default/wheels/some.whl"
2150+
d := storage.ResourceFile().ToResource().TestResourceData()
2151+
d.SetId(file_path)
2152+
err := resourcesMap["databricks_file"].Import(ic, &resource{
2153+
ID: file_path,
2154+
Data: d,
2155+
})
2156+
assert.NoError(t, err)
2157+
assert.Equal(t, file_path, d.Get("path"))
2158+
assert.Equal(t, "uc_files/main/default/wheels/some.whl", d.Get("source"))
2159+
// Testing auxiliary functions
2160+
shouldOmitFunc := resourcesMap["databricks_file"].ShouldOmitField
2161+
require.NotNil(t, shouldOmitFunc)
2162+
scm := storage.ResourceFile().Schema
2163+
assert.True(t, shouldOmitFunc(ic, "md5", scm["md5"], d))
2164+
assert.False(t, shouldOmitFunc(ic, "path", scm["path"], d))
2165+
2166+
assert.Equal(t, "main/default/wheels/some.whl_f27badf8", resourcesMap["databricks_file"].Name(nil, d))
2167+
})
2168+
}

0 commit comments

Comments
 (0)