Skip to content

Commit 01a758b

Browse files
authored
[Exporter] Emit files installed with %pip install in Python notebooks (#4664)
## Changes <!-- Summary of your changes that are easy to understand --> This is quite a naive implementation, but it should help a bit for extracting dependencies, especially for DLT pipelines ## Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] using Go SDK - [ ] using TF Plugin Framework
1 parent 9993f3d commit 01a758b

File tree

4 files changed

+107
-35
lines changed

4 files changed

+107
-35
lines changed

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
### Exporter
2121

22+
* Emit files installed with `%pip install` in Python notebooks ([#4664](https://github.com/databricks/terraform-provider-databricks/pull/4664))
2223
* Correctly handle account-level identities when generating the code ([#4650](https://github.com/databricks/terraform-provider-databricks/pull/4650))
2324
* Add export of dashboard tasks in `datarbicks_job` ([#4665](https://github.com/databricks/terraform-provider-databricks/pull/4665))
2425
* Add export of PowerBI tasks in `datarbicks_job` ([#4668](https://github.com/databricks/terraform-provider-databricks/pull/4668))

exporter/impl_workspace.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package exporter
2+
3+
import (
4+
"encoding/base64"
5+
"fmt"
6+
"strconv"
7+
"strings"
8+
9+
"github.com/databricks/databricks-sdk-go/apierr"
10+
sdk_workspace "github.com/databricks/databricks-sdk-go/service/workspace"
11+
)
12+
13+
func ImportNotebook(ic *importContext, r *resource) error {
14+
ic.emitUserOrServicePrincipalForPath(r.ID, "/Users")
15+
resp, err := ic.workspaceClient.Workspace.Export(ic.Context, sdk_workspace.ExportRequest{
16+
Path: r.ID,
17+
Format: sdk_workspace.ExportFormat(ic.notebooksFormat),
18+
})
19+
if err != nil {
20+
if apierr.IsMissing(err) {
21+
ic.addIgnoredResource(fmt.Sprintf("databricks_notebook. path=%s", r.ID))
22+
}
23+
return err
24+
}
25+
var fileExtension string
26+
if ic.notebooksFormat == "SOURCE" {
27+
language := r.Data.Get("language").(string)
28+
fileExtension = fileExtensionLanguageMapping[language]
29+
r.Data.Set("language", "")
30+
} else {
31+
fileExtension = fileExtensionFormatMapping[ic.notebooksFormat]
32+
}
33+
r.Data.Set("format", ic.notebooksFormat)
34+
objectId := r.Data.Get("object_id").(int)
35+
name := fileNameNormalizationRegex.ReplaceAllString(r.ID[1:], "_") + "_" + strconv.Itoa(objectId) + fileExtension
36+
content, _ := base64.StdEncoding.DecodeString(resp.Content)
37+
fileName, err := ic.saveFileIn("notebooks", name, []byte(content))
38+
if err != nil {
39+
return err
40+
}
41+
if ic.notebooksFormat == "SOURCE" && r.Data.Get("language").(string) == "PYTHON" {
42+
analyzeNotebook(ic, string(content))
43+
}
44+
ic.emitPermissionsIfNotIgnored(r, fmt.Sprintf("/notebooks/%d", objectId),
45+
"notebook_"+ic.Importables["databricks_notebook"].Name(ic, r.Data))
46+
// TODO: it's not completely correct condition - we need to make emit smarter -
47+
// emit only if permissions are different from their parent's permission.
48+
ic.emitWorkspaceObjectParentDirectory(r)
49+
return r.Data.Set("source", fileName)
50+
}
51+
52+
func analyzeNotebook(ic *importContext, content string) {
53+
lines := strings.Split(content, "\n")
54+
for _, line := range lines {
55+
if strings.HasPrefix(strings.TrimSpace(line), "#") {
56+
continue
57+
}
58+
59+
if strings.HasPrefix(strings.TrimSpace(line), "%pip") && strings.Contains(line, " install ") {
60+
parts := strings.Fields(line)
61+
if len(parts) < 3 {
62+
continue
63+
}
64+
for _, part := range parts[2:] {
65+
if strings.HasPrefix(part, "-") || strings.Contains(part, "==") {
66+
continue
67+
}
68+
if strings.HasPrefix(part, "/dbfs/") {
69+
ic.Emit(&resource{
70+
Resource: "databricks_dbfs_file",
71+
ID: strings.TrimPrefix(part, "/dbfs"),
72+
})
73+
} else if strings.HasPrefix(part, "/Workspace/") {
74+
ic.Emit(&resource{
75+
Resource: "databricks_workspace_file",
76+
ID: strings.TrimPrefix(part, "/Workspace"),
77+
})
78+
} else if strings.HasPrefix(part, "/Volumes/") {
79+
ic.Emit(&resource{
80+
Resource: "databricks_file",
81+
ID: part,
82+
})
83+
}
84+
}
85+
}
86+
}
87+
88+
}

exporter/importables.go

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,41 +1004,7 @@ var resourcesMap map[string]importable = map[string]importable{
10041004
WorkspaceLevel: true,
10051005
Service: "notebooks",
10061006
Name: workspaceObjectResouceName,
1007-
Import: func(ic *importContext, r *resource) error {
1008-
ic.emitUserOrServicePrincipalForPath(r.ID, "/Users")
1009-
resp, err := ic.workspaceClient.Workspace.Export(ic.Context, sdk_workspace.ExportRequest{
1010-
Path: r.ID,
1011-
Format: sdk_workspace.ExportFormat(ic.notebooksFormat),
1012-
})
1013-
if err != nil {
1014-
if apierr.IsMissing(err) {
1015-
ic.addIgnoredResource(fmt.Sprintf("databricks_notebook. path=%s", r.ID))
1016-
}
1017-
return err
1018-
}
1019-
var fileExtension string
1020-
if ic.notebooksFormat == "SOURCE" {
1021-
language := r.Data.Get("language").(string)
1022-
fileExtension = fileExtensionLanguageMapping[language]
1023-
r.Data.Set("language", "")
1024-
} else {
1025-
fileExtension = fileExtensionFormatMapping[ic.notebooksFormat]
1026-
}
1027-
r.Data.Set("format", ic.notebooksFormat)
1028-
objectId := r.Data.Get("object_id").(int)
1029-
name := fileNameNormalizationRegex.ReplaceAllString(r.ID[1:], "_") + "_" + strconv.Itoa(objectId) + fileExtension
1030-
content, _ := base64.StdEncoding.DecodeString(resp.Content)
1031-
fileName, err := ic.saveFileIn("notebooks", name, []byte(content))
1032-
if err != nil {
1033-
return err
1034-
}
1035-
ic.emitPermissionsIfNotIgnored(r, fmt.Sprintf("/notebooks/%d", objectId),
1036-
"notebook_"+ic.Importables["databricks_notebook"].Name(ic, r.Data))
1037-
// TODO: it's not completely correct condition - we need to make emit smarter -
1038-
// emit only if permissions are different from their parent's permission.
1039-
ic.emitWorkspaceObjectParentDirectory(r)
1040-
return r.Data.Set("source", fileName)
1041-
},
1007+
Import: ImportNotebook,
10421008
ShouldOmitField: func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool {
10431009
switch pathString {
10441010
case "language":

exporter/util_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,3 +497,20 @@ func TestParsingServices(t *testing.T) {
497497
assert.ElementsMatch(t, expectedServices, specificServices)
498498

499499
}
500+
501+
func TestAnalyzeNotebook(t *testing.T) {
502+
ic := importContextForTest()
503+
ic.enableServices("notebooks,wsfiles,volumes,storage")
504+
content := `
505+
%pip install -U pandas /dbfs/tmp/mypkg.whl numpy==1.23.5
506+
# %pip install -U pandas /dbfs/tmp/mypkg2.whl numpy==1.23.5
507+
%pip install -U pandas /Volumes/default/main/tmp/mypkg3.whl numpy==1.23.5
508+
%pip install -U pandas /Workspace/Shared/tmp/mypkg4.whl numpy==1.23.5
509+
510+
`
511+
analyzeNotebook(ic, content)
512+
assert.Equal(t, 3, len(ic.testEmits))
513+
assert.True(t, ic.testEmits["databricks_dbfs_file[<unknown>] (id: /tmp/mypkg.whl)"])
514+
assert.True(t, ic.testEmits["databricks_file[<unknown>] (id: /Volumes/default/main/tmp/mypkg3.whl)"])
515+
assert.True(t, ic.testEmits["databricks_workspace_file[<unknown>] (id: /Shared/tmp/mypkg4.whl)"])
516+
}

0 commit comments

Comments
 (0)