Skip to content

Commit dacf048

Browse files
authored
Exporter: Correctly handle not allowed characters in the notebooks/directories names (#2831)
Our UI & API allows to create directories and notebooks with characters like `\` and `"` in names, but they were incorrectly handled in exporter. This PR fixes that by doing following: * Correctly handle special characters when generating references * Sanitizing file names for exported notebooks & workspace files It also should fix #2661 because we'll remove bad characters from file names
1 parent 98f5174 commit dacf048

File tree

3 files changed

+93
-16
lines changed

3 files changed

+93
-16
lines changed

exporter/context.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,12 @@ func (ic *importContext) Emit(r *resource) {
829829
}
830830
}
831831

832+
func maybeAddQuoteCharacter(s string) string {
833+
s = strings.ReplaceAll(s, "\\", "\\\\")
834+
s = strings.ReplaceAll(s, "\"", "\\\"")
835+
return s
836+
}
837+
832838
func (ic *importContext) getTraversalTokens(ref reference, value string) hclwrite.Tokens {
833839
matchType := ref.MatchTypeValue()
834840
attr := ref.MatchAttribute()
@@ -849,18 +855,18 @@ func (ic *importContext) getTraversalTokens(ref reference, value string) hclwrit
849855
tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"', '$', '{'}}}
850856
tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...)
851857
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}})
852-
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(rest)})
858+
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(rest))})
853859
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}})
854860
return tokens
855861
case MatchRegexp:
856862
indices := ref.Regexp.FindStringSubmatchIndex(value)
857863
if len(indices) == 4 {
858864
tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"'}}}
859-
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(value[0:indices[2]])})
865+
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[0:indices[2]]))})
860866
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'$', '{'}})
861867
tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...)
862868
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}})
863-
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(value[indices[3]:])})
869+
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[indices[3]:]))})
864870
tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}})
865871
return tokens
866872
}

exporter/importables.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ var (
4141
gsRegex = regexp.MustCompile(`^gs://([^/]+)(/.*)?$`)
4242
globalWorkspaceConfName = "global_workspace_conf"
4343
nameNormalizationRegex = regexp.MustCompile(`\W+`)
44+
fileNameNormalizationRegex = regexp.MustCompile(`[^-_\w/.@]`)
4445
jobClustersRegex = regexp.MustCompile(`^((job_cluster|task)\.[0-9]+\.new_cluster\.[0-9]+\.)`)
4546
dltClusterRegex = regexp.MustCompile(`^(cluster\.[0-9]+\.)`)
4647
predefinedClusterPolicies = []string{"Personal Compute", "Job Compute", "Power User Compute", "Shared Compute"}
@@ -1313,7 +1314,8 @@ var resourcesMap map[string]importable = map[string]importable{
13131314
fileExtension = fileExtensionFormatMapping[ic.notebooksFormat]
13141315
}
13151316
r.Data.Set("format", ic.notebooksFormat)
1316-
name := r.ID[1:] + fileExtension // todo: replace non-alphanum+/ with _
1317+
objectId := r.Data.Get("object_id").(int)
1318+
name := fileNameNormalizationRegex.ReplaceAllString(r.ID[1:], "_") + "_" + strconv.Itoa(objectId) + fileExtension
13171319
content, _ := base64.StdEncoding.DecodeString(contentB64)
13181320
fileName, err := ic.createFileIn("notebooks", name, []byte(content))
13191321
if err != nil {
@@ -1322,7 +1324,7 @@ var resourcesMap map[string]importable = map[string]importable{
13221324
if ic.meAdmin {
13231325
ic.Emit(&resource{
13241326
Resource: "databricks_permissions",
1325-
ID: fmt.Sprintf("/notebooks/%d", r.Data.Get("object_id").(int)),
1327+
ID: fmt.Sprintf("/notebooks/%d", objectId),
13261328
Name: "notebook_" + ic.Importables["databricks_notebook"].Name(ic, r.Data),
13271329
})
13281330
}
@@ -1359,7 +1361,15 @@ var resourcesMap map[string]importable = map[string]importable{
13591361
if err != nil {
13601362
return err
13611363
}
1362-
name := r.ID[1:]
1364+
objectId := r.Data.Get("object_id").(int)
1365+
parts := strings.Split(r.ID, "/")
1366+
plen := len(parts)
1367+
if idx := strings.Index(parts[plen-1], "."); idx != -1 {
1368+
parts[plen-1] = parts[plen-1][:idx] + "_" + strconv.Itoa(objectId) + parts[plen-1][idx:]
1369+
} else {
1370+
parts[plen-1] = parts[plen-1] + "_" + strconv.Itoa(objectId)
1371+
}
1372+
name := fileNameNormalizationRegex.ReplaceAllString(strings.Join(parts, "/")[1:], "_")
13631373
content, _ := base64.StdEncoding.DecodeString(contentB64)
13641374
fileName, err := ic.createFileIn("workspace_files", name, []byte(content))
13651375
if err != nil {
@@ -1369,7 +1379,7 @@ var resourcesMap map[string]importable = map[string]importable{
13691379
if ic.meAdmin {
13701380
ic.Emit(&resource{
13711381
Resource: "databricks_permissions",
1372-
ID: fmt.Sprintf("/files/%d", r.Data.Get("object_id").(int)),
1382+
ID: fmt.Sprintf("/files/%d", objectId),
13731383
Name: "ws_file_" + ic.Importables["databricks_workspace_file"].Name(ic, r.Data),
13741384
})
13751385
}

exporter/importables_test.go

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -799,14 +799,15 @@ func TestRepoListFails(t *testing.T) {
799799
})
800800
}
801801

802-
func testGenerate(t *testing.T, fixtures []qa.HTTPFixture, services string, cb func(*importContext)) {
802+
func testGenerate(t *testing.T, fixtures []qa.HTTPFixture, services string, asAdmin bool, cb func(*importContext)) {
803803
qa.HTTPFixturesApply(t, fixtures, func(ctx context.Context, client *common.DatabricksClient) {
804804
ic := importContextForTest()
805805
ic.Directory = fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
806806
defer os.RemoveAll(ic.Directory)
807807
ic.Client = client
808808
ic.Context = ctx
809809
ic.testEmits = nil
810+
ic.meAdmin = asAdmin
810811
ic.importing = map[string]bool{}
811812
ic.variables = map[string]string{}
812813
ic.services = services
@@ -850,7 +851,7 @@ func TestNotebookGeneration(t *testing.T) {
850851
Content: "YWJj",
851852
},
852853
},
853-
}, "notebooks", func(ic *importContext) {
854+
}, "notebooks", false, func(ic *importContext) {
854855
ic.notebooksFormat = "SOURCE"
855856
err := resourcesMap["databricks_notebook"].List(ic)
856857
assert.NoError(t, err)
@@ -859,7 +860,7 @@ func TestNotebookGeneration(t *testing.T) {
859860
ic.generateHclForResources(nil)
860861
assert.Equal(t, commands.TrimLeadingWhitespace(`
861862
resource "databricks_notebook" "first_second_123" {
862-
source = "${path.module}/notebooks/First/Second.py"
863+
source = "${path.module}/notebooks/First/Second_123.py"
863864
path = "/First/Second"
864865
}`), string(ic.Files["notebooks"].Bytes()))
865866
})
@@ -900,7 +901,7 @@ func TestNotebookGenerationJupyter(t *testing.T) {
900901
Content: "YWJj",
901902
},
902903
},
903-
}, "notebooks", func(ic *importContext) {
904+
}, "notebooks", false, func(ic *importContext) {
904905
ic.notebooksFormat = "JUPYTER"
905906
err := resourcesMap["databricks_notebook"].List(ic)
906907
assert.NoError(t, err)
@@ -909,14 +910,74 @@ func TestNotebookGenerationJupyter(t *testing.T) {
909910
ic.generateHclForResources(nil)
910911
assert.Equal(t, commands.TrimLeadingWhitespace(`
911912
resource "databricks_notebook" "first_second_123" {
912-
source = "${path.module}/notebooks/First/Second.ipynb"
913+
source = "${path.module}/notebooks/First/Second_123.ipynb"
913914
path = "/First/Second"
914915
language = "PYTHON"
915916
format = "JUPYTER"
916917
}`), string(ic.Files["notebooks"].Bytes()))
917918
})
918919
}
919920

921+
func TestNotebookGenerationBadCharacters(t *testing.T) {
922+
testGenerate(t, []qa.HTTPFixture{
923+
{
924+
Method: "GET",
925+
Resource: "/api/2.0/workspace/list?path=%2F",
926+
Response: workspace.ObjectList{
927+
Objects: []workspace.ObjectStatus{
928+
{
929+
Path: "/Repos/Foo/Bar",
930+
ObjectType: "NOTEBOOK",
931+
},
932+
{
933+
Path: "/Fir\"st\\/Second",
934+
ObjectType: "NOTEBOOK",
935+
},
936+
},
937+
},
938+
},
939+
{
940+
Method: "GET",
941+
Resource: "/api/2.0/workspace/get-status?path=%2FFir%22st%5C",
942+
943+
Response: workspace.ObjectStatus{
944+
ObjectID: 124,
945+
ObjectType: "DIRECTORY",
946+
Path: "/Fir\"st\\",
947+
},
948+
},
949+
{
950+
Method: "GET",
951+
Resource: "/api/2.0/workspace/get-status?path=%2FFir%22st%5C%2FSecond",
952+
Response: workspace.ObjectStatus{
953+
ObjectID: 123,
954+
ObjectType: "NOTEBOOK",
955+
Path: "/Fir\"st\\/Second",
956+
Language: "PYTHON",
957+
},
958+
},
959+
{
960+
Method: "GET",
961+
Resource: "/api/2.0/workspace/export?format=SOURCE&path=%2FFir%22st%5C%2FSecond",
962+
Response: workspace.ExportPath{
963+
Content: "YWJj",
964+
},
965+
},
966+
}, "notebooks,directories", true, func(ic *importContext) {
967+
ic.notebooksFormat = "SOURCE"
968+
err := resourcesMap["databricks_notebook"].List(ic)
969+
assert.NoError(t, err)
970+
ic.waitGroup.Wait()
971+
ic.closeImportChannels()
972+
ic.generateHclForResources(nil)
973+
assert.Equal(t, commands.TrimLeadingWhitespace(`
974+
resource "databricks_notebook" "fir_st_second_123" {
975+
source = "${path.module}/notebooks/Fir_st_/Second_123.py"
976+
path = "/Fir\"st\\/Second"
977+
}`), string(ic.Files["notebooks"].Bytes()))
978+
})
979+
}
980+
920981
func TestDirectoryGeneration(t *testing.T) {
921982
testGenerate(t, []qa.HTTPFixture{
922983
{
@@ -950,7 +1011,7 @@ func TestDirectoryGeneration(t *testing.T) {
9501011
Path: "/first",
9511012
},
9521013
},
953-
}, "directories", func(ic *importContext) {
1014+
}, "directories", false, func(ic *importContext) {
9541015
err := resourcesMap["databricks_directory"].List(ic)
9551016
assert.NoError(t, err)
9561017

@@ -976,7 +1037,7 @@ func TestGlobalInitScriptGen(t *testing.T) {
9761037
ContentBase64: "YWJj",
9771038
},
9781039
},
979-
}, "workspace", func(ic *importContext) {
1040+
}, "workspace", false, func(ic *importContext) {
9801041
ic.Emit(&resource{
9811042
Resource: "databricks_global_init_script",
9821043
ID: "a",
@@ -1008,7 +1069,7 @@ func TestSecretGen(t *testing.T) {
10081069
},
10091070
},
10101071
},
1011-
}, "secrets", func(ic *importContext) {
1072+
}, "secrets", false, func(ic *importContext) {
10121073
ic.Emit(&resource{
10131074
Resource: "databricks_secret",
10141075
ID: "a|||b",
@@ -1043,7 +1104,7 @@ func TestDbfsFileGen(t *testing.T) {
10431104
BytesRead: 3,
10441105
},
10451106
},
1046-
}, "storage", func(ic *importContext) {
1107+
}, "storage", false, func(ic *importContext) {
10471108
ic.Emit(&resource{
10481109
Resource: "databricks_dbfs_file",
10491110
ID: "a",

0 commit comments

Comments
 (0)