Skip to content

Commit 29bb830

Browse files
authored
[Exporter] **Breaking change**: Move databricks_workspace_file to a separate service (#4118)
## Changes <!-- Summary of your changes that are easy to understand --> Move `databricks_workspace_file` to a separate service `wsfiles`, so we can list and export them separately from notebooks. If you used `notebooks` in `-listing` or `-services` options, then you need to append `wsfiles` to these options. ## Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK
1 parent b9fb47c commit 29bb830

File tree

8 files changed

+53
-47
lines changed

8 files changed

+53
-47
lines changed

docs/guides/experimental-exporter.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ Services are just logical groups of resources used for filtering and organizatio
120120
* `mlflow-webhooks` - **listing** [databricks_mlflow_webhook](../resources/mlflow_webhook.md).
121121
* `model-serving` - **listing** [databricks_model_serving](../resources/model_serving.md).
122122
* `mounts` - **listing** works only in combination with `-mounts` command-line option.
123-
* `notebooks` - **listing** [databricks_notebook](../resources/notebook.md) and [databricks_workspace_file](../resources/workspace_file.md).
123+
* `notebooks` - **listing** [databricks_notebook](../resources/notebook.md).
124124
* `policies` - **listing** [databricks_cluster_policy](../resources/cluster_policy).
125125
* `pools` - **listing** [instance pools](../resources/instance_pool.md).
126126
* `repos` - **listing** [databricks_repo](../resources/repo.md)
@@ -148,6 +148,7 @@ Services are just logical groups of resources used for filtering and organizatio
148148
* `users` - [databricks_user](../resources/user.md) and [databricks_service_principal](../resources/service_principal.md) are written to their own file, simply because of their amount. If you use SCIM provisioning, migrating workspaces is the only use case for importing `users` service.
149149
* `vector-search` - **listing** exports [databricks_vector_search_endpoint](../resources/vector_search_endpoint.md) and [databricks_vector_search_index](../resources/vector_search_index.md)
150150
* `workspace` - **listing** [databricks_workspace_conf](../resources/workspace_conf.md) and [databricks_global_init_script](../resources/global_init_script.md)
151+
* `wsfiles` - **listing** [databricks_workspace_file](../resources/workspace_file.md).
151152

152153
## Secrets
153154

exporter/command.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ func (ic *importContext) allServicesAndListing() (string, string) {
4141
listing[ir.Service] = struct{}{}
4242
}
4343
}
44-
// We need this to specify default listings of UC objects...
45-
for _, ir := range []string{"uc-schemas", "uc-models", "uc-tables", "uc-volumes"} {
44+
// We need this to specify default listings of UC & Workspace objects...
45+
for _, ir := range []string{"uc-schemas", "uc-models", "uc-tables", "uc-volumes",
46+
"notebooks", "directories", "wsfiles"} {
4647
listing[ir] = struct{}{}
4748
}
4849
return strings.Join(maps.Keys(services), ","), strings.Join(maps.Keys(listing), ",")

exporter/context.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,14 +374,30 @@ func (ic *importContext) Run() error {
374374
ic.startImportChannels()
375375

376376
// Start listing of objects
377+
listWorkspaceObjectsAlreadyRunning := false
377378
for rnLoop, irLoop := range ic.Importables {
378379
resourceName := rnLoop
379380
ir := irLoop
381+
// TODO: extend this to other services? Like, Git Folders
382+
if !ic.accountLevel && (ir.Service == "notebooks" || ir.Service == "wsfiles" || (ir.Service == "directories" && !ic.incremental)) {
383+
if _, exists := ic.listing[ir.Service]; exists && !listWorkspaceObjectsAlreadyRunning {
384+
ic.waitGroup.Add(1)
385+
log.Printf("[DEBUG] Starting listing of workspace objects")
386+
go func() {
387+
if err := listWorkspaceObjects(ic); err != nil {
388+
log.Printf("[ERROR] listing of workspace objects failed %s", err)
389+
}
390+
log.Print("[DEBUG] Finished listing of workspace objects")
391+
ic.waitGroup.Done()
392+
}()
393+
listWorkspaceObjectsAlreadyRunning = true
394+
}
395+
continue
396+
}
380397
if ir.List == nil {
381398
continue
382399
}
383-
_, exists := ic.listing[ir.Service]
384-
if !exists {
400+
if _, exists := ic.listing[ir.Service]; !exists {
385401
log.Printf("[DEBUG] %s (%s service) is not part of listing", resourceName, ir.Service)
386402
continue
387403
}

exporter/exporter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2180,7 +2180,7 @@ func TestImportingDLTPipelines(t *testing.T) {
21802180
ic := newImportContext(client)
21812181
ic.Directory = tmpDir
21822182
ic.enableListing("dlt")
2183-
ic.enableServices("dlt,access,notebooks,users,repos,secrets")
2183+
ic.enableServices("dlt,access,notebooks,users,repos,secrets,wsfiles")
21842184

21852185
err := ic.Run()
21862186
assert.NoError(t, err)

exporter/importables.go

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,7 +1576,6 @@ var resourcesMap map[string]importable = map[string]importable{
15761576
WorkspaceLevel: true,
15771577
Service: "notebooks",
15781578
Name: workspaceObjectResouceName,
1579-
List: listNotebooksAndWorkspaceFiles,
15801579
Import: func(ic *importContext, r *resource) error {
15811580
ic.emitUserOrServicePrincipalForPath(r.ID, "/Users")
15821581
notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
@@ -1623,10 +1622,8 @@ var resourcesMap map[string]importable = map[string]importable{
16231622
},
16241623
"databricks_workspace_file": {
16251624
WorkspaceLevel: true,
1626-
Service: "notebooks",
1625+
Service: "wsfiles",
16271626
Name: workspaceObjectResouceName,
1628-
// We don't need list function for workspace files because it will be handled by the notebooks listing
1629-
// List: createListWorkspaceObjectsFunc(workspace.File, "databricks_workspace_file", "workspace_file"),
16301627
Import: func(ic *importContext, r *resource) error {
16311628
ic.emitUserOrServicePrincipalForPath(r.ID, "/Users")
16321629
notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
@@ -2175,28 +2172,6 @@ var resourcesMap map[string]importable = map[string]importable{
21752172
}
21762173
return fmt.Errorf("can't find directory with object_id: %s", r.Value)
21772174
},
2178-
// TODO: think if we really need this, we need directories only for permissions,
2179-
// and only when they are different from parents & notebooks
2180-
List: func(ic *importContext) error {
2181-
if ic.incremental {
2182-
return nil
2183-
}
2184-
directoryList := ic.getAllDirectories()
2185-
for offset, directory := range directoryList {
2186-
if strings.HasPrefix(directory.Path, "/Repos") {
2187-
continue
2188-
}
2189-
if res := ignoreIdeFolderRegex.FindStringSubmatch(directory.Path); res != nil {
2190-
continue
2191-
}
2192-
ic.maybeEmitWorkspaceObject("databricks_directory", directory.Path, &directory)
2193-
2194-
if offset%50 == 0 {
2195-
log.Printf("[INFO] Scanned %d of %d directories", offset+1, len(directoryList))
2196-
}
2197-
}
2198-
return nil
2199-
},
22002175
Import: func(ic *importContext, r *resource) error {
22012176
ic.emitUserOrServicePrincipalForPath(r.ID, "/Users")
22022177
// Existing permissions API doesn't allow to set permissions for

exporter/importables_test.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,8 @@ func TestNotebookGeneration(t *testing.T) {
10841084
},
10851085
}, "notebooks", false, func(ic *importContext) {
10861086
ic.notebooksFormat = "SOURCE"
1087-
err := resourcesMap["databricks_notebook"].List(ic)
1087+
ic.enableListing("notebooks")
1088+
err := listWorkspaceObjects(ic)
10881089
assert.NoError(t, err)
10891090
ic.waitGroup.Wait()
10901091
ic.closeImportChannels()
@@ -1127,7 +1128,8 @@ func TestNotebookGenerationJupyter(t *testing.T) {
11271128
},
11281129
}, "notebooks", false, func(ic *importContext) {
11291130
ic.notebooksFormat = "JUPYTER"
1130-
err := resourcesMap["databricks_notebook"].List(ic)
1131+
ic.enableListing("notebooks")
1132+
err := listWorkspaceObjects(ic)
11311133
assert.NoError(t, err)
11321134
ic.waitGroup.Wait()
11331135
ic.closeImportChannels()
@@ -1184,7 +1186,8 @@ func TestNotebookGenerationBadCharacters(t *testing.T) {
11841186
}, "notebooks,directories", true, func(ic *importContext) {
11851187
ic.notebooksFormat = "SOURCE"
11861188
ic.enableServices("notebooks")
1187-
err := resourcesMap["databricks_notebook"].List(ic)
1189+
ic.enableListing("notebooks")
1190+
err := listWorkspaceObjects(ic)
11881191
assert.NoError(t, err)
11891192
ic.waitGroup.Wait()
11901193
ic.closeImportChannels()
@@ -1231,7 +1234,8 @@ func TestDirectoryGeneration(t *testing.T) {
12311234
},
12321235
},
12331236
}, "directories", false, func(ic *importContext) {
1234-
err := resourcesMap["databricks_directory"].List(ic)
1237+
ic.enableListing("directories")
1238+
err := listWorkspaceObjects(ic)
12351239
assert.NoError(t, err)
12361240

12371241
ic.waitGroup.Wait()
@@ -1521,7 +1525,7 @@ func TestEmitSqlParent(t *testing.T) {
15211525

15221526
func TestEmitFilesFromSlice(t *testing.T) {
15231527
ic := importContextForTest()
1524-
ic.enableServices("storage,notebooks")
1528+
ic.enableServices("storage,notebooks,wsfiles")
15251529
ic.emitFilesFromSlice([]string{
15261530
"dbfs:/FileStore/test.txt",
15271531
"/Workspace/Shared/test.txt",
@@ -1534,7 +1538,7 @@ func TestEmitFilesFromSlice(t *testing.T) {
15341538

15351539
func TestEmitFilesFromMap(t *testing.T) {
15361540
ic := importContextForTest()
1537-
ic.enableServices("storage,notebooks")
1541+
ic.enableServices("storage,notebooks,wsfiles")
15381542
ic.emitFilesFromMap(map[string]string{
15391543
"k1": "dbfs:/FileStore/test.txt",
15401544
"k2": "/Workspace/Shared/test.txt",

exporter/util_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,6 @@ func TestDirectoryIncrementalMode(t *testing.T) {
434434
ic := importContextForTest()
435435
ic.incremental = true
436436

437-
// test direct listing
438-
assert.Nil(t, resourcesMap["databricks_directory"].List(ic))
439437
// test emit during workspace listing
440438
assert.True(t, ic.shouldSkipWorkspaceObject(workspace.ObjectStatus{ObjectType: workspace.Directory}, 111111))
441439
}

exporter/util_workspace.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func (ic *importContext) shouldSkipWorkspaceObject(object workspace.ObjectStatus
193193
}
194194
if !(object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) ||
195195
strings.HasPrefix(object.Path, "/Repos") {
196-
// log.Printf("[DEBUG] Skipping unsupported entry %v", object)
196+
log.Printf("[DEBUG] Skipping unsupported entry %v", object)
197197
return true
198198
}
199199
if res := ignoreIdeFolderRegex.FindStringSubmatch(object.Path); res != nil {
@@ -236,7 +236,7 @@ func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) {
236236
}
237237
}
238238

239-
func listNotebooksAndWorkspaceFiles(ic *importContext) error {
239+
func listWorkspaceObjects(ic *importContext) error {
240240
objectsChannel := make(chan workspace.ObjectStatus, defaultChannelSize)
241241
numRoutines := 2 // TODO: make configurable? together with the channel size?
242242
var processedObjects atomic.Uint64
@@ -257,10 +257,13 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
257257
}
258258
// There are two use cases - this function will handle listing, or it will receive listing
259259
updatedSinceMs := ic.getUpdatedSinceMs()
260+
isNotebooksListingEnabled := ic.isServiceInListing("notebooks")
261+
isDirectoryListingEnabled := ic.isServiceInListing("directories")
262+
isWsFilesListingEnabled := ic.isServiceInListing("wsfiles")
260263
allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) {
261264
for _, object := range objects {
262265
if object.ObjectType == workspace.Directory {
263-
if !ic.incremental && object.Path != "/" && ic.isServiceInListing("directories") {
266+
if !ic.incremental && object.Path != "/" && isDirectoryListingEnabled {
264267
objectsChannel <- object
265268
}
266269
} else {
@@ -269,8 +272,14 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
269272
}
270273
object := object
271274
switch object.ObjectType {
272-
case workspace.Notebook, workspace.File:
273-
objectsChannel <- object
275+
case workspace.Notebook:
276+
if isNotebooksListingEnabled {
277+
objectsChannel <- object
278+
}
279+
case workspace.File:
280+
if isWsFilesListingEnabled {
281+
objectsChannel <- object
282+
}
274283
default:
275284
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
276285
}
@@ -285,9 +294,11 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
285294
if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) {
286295
continue
287296
}
288-
if object.ObjectType == workspace.Directory && !ic.incremental && ic.isServiceInListing("directories") && object.Path != "/" {
297+
if !ic.incremental && isDirectoryListingEnabled && object.ObjectType == workspace.Directory && object.Path != "/" {
298+
emitWorkpaceObject(ic, object)
299+
} else if isNotebooksListingEnabled && object.ObjectType == workspace.Notebook {
289300
emitWorkpaceObject(ic, object)
290-
} else if (object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) && ic.isServiceInListing("notebooks") {
301+
} else if isWsFilesListingEnabled && object.ObjectType == workspace.File {
291302
emitWorkpaceObject(ic, object)
292303
}
293304
}

0 commit comments

Comments
 (0)