Skip to content

Commit 6d08102

Browse files
authored
Exporter: don't emit all UC objects when handling dependencies (#3556)
Right now, if we discover any UC dependency, we emit upstream objects as well, such as schemas, catalogs, etc., but in their `Import` operations we're doing all nested objects unnecessarily - so emit one init script from a UC Volume may lead to emitting of the whole UC Catalog with all schemas/tables/volumes... With this change, we list all nested objects (schemas in catalog, tables/models/volumes in a schema) only if these services are explicitly specified in the `-listing` option. Fixes #3555
1 parent 468b436 commit 6d08102

File tree

4 files changed

+78
-62
lines changed

4 files changed

+78
-62
lines changed

docs/guides/experimental-exporter.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,13 @@ Services are just logical groups of resources used for filtering and organizatio
8787
* `uc-external-locations` - **listing** exports [databricks_external_location](../resources/external_location.md) resource.
8888
* `uc-grants` - [databricks_grants](../resources/grants.md). *Please note that during export the list of grants is expanded to include the identity that does the export! This is done to allow to create objects in case when catalogs/schemas have different owners than current identity.*.
8989
* `uc-metastores` - **listing** [databricks_metastore](../resources/metastore.md) and [databricks_metastore_assignment](../resource/metastore_assignment.md) (only on account-level). *Please note that when using workspace-level configuration, only metastores from the workspace's region are listed!*
90-
* `uc-models` - [databricks_registered_model](../resources/registered_model.md)
91-
* `uc-schemas` - [databricks_schema](../resources/schema.md)
90+
* `uc-models` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_registered_model](../resources/registered_model.md)
91+
* `uc-schemas` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_schema](../resources/schema.md)
9292
* `uc-shares` - **listing** [databricks_share](../resources/share.md) and [databricks_recipient](../resources/recipient.md)
9393
* `uc-storage-credentials` - **listing** exports [databricks_storage_credential](../resources/storage_credential.md) resources on workspace or account level.
9494
* `uc-system-schemas` - **listing** exports [databricks_system_schema](../resources/system_schema.md) resources for the UC metastore of the current workspace.
95-
* `uc-tables` - [databricks_sql_table](../resources/sql_table.md) resource.
96-
* `uc-volumes` - [databricks_volume](../resources/volume.md)
95+
* `uc-tables` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_sql_table](../resources/sql_table.md) resource.
96+
* `uc-volumes` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_volume](../resources/volume.md)
9797
* `users` - [databricks_user](../resources/user.md) and [databricks_service_principal](../resources/service_principal.md) are written to their own file, simply because of their amount. If you use SCIM provisioning, migrating workspaces is the only use case for importing `users` service.
9898
* `workspace` - **listing** [databricks_workspace_conf](../resources/workspace_conf.md) and [databricks_global_init_script](../resources/global_init_script.md)
9999

exporter/context.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,6 +1416,11 @@ func (ic *importContext) isServiceEnabled(service string) bool {
14161416
return exists
14171417
}
14181418

1419+
func (ic *importContext) isServiceInListing(service string) bool {
1420+
_, exists := ic.listing[service]
1421+
return exists
1422+
}
1423+
14191424
func (ic *importContext) EmitIfUpdatedAfterMillis(r *resource, modifiedAt int64, message string) {
14201425
updatedSinceMs := ic.getUpdatedSinceMs()
14211426
if ic.incremental && modifiedAt < updatedSinceMs {

exporter/importables.go

Lines changed: 66 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2341,20 +2341,22 @@ var resourcesMap map[string]importable = map[string]importable{
23412341
})
23422342
} else if cat.ShareName == "" {
23432343
// TODO: We need to be careful here if we add more catalog types... Really we need to have CatalogType in resource
2344-
schemas, err := ic.workspaceClient.Schemas.ListAll(ic.Context, catalog.ListSchemasRequest{CatalogName: r.ID})
2345-
if err != nil {
2346-
return err
2347-
}
2348-
ignoredSchemas := []string{"information_schema"}
2349-
for _, schema := range schemas {
2350-
if schema.CatalogType != "MANAGED_CATALOG" || slices.Contains(ignoredSchemas, schema.Name) {
2351-
continue
2344+
if ic.isServiceInListing("uc-schemas") {
2345+
schemas, err := ic.workspaceClient.Schemas.ListAll(ic.Context, catalog.ListSchemasRequest{CatalogName: r.ID})
2346+
if err != nil {
2347+
return err
2348+
}
2349+
ignoredSchemas := []string{"information_schema"}
2350+
for _, schema := range schemas {
2351+
if schema.CatalogType != "MANAGED_CATALOG" || slices.Contains(ignoredSchemas, schema.Name) {
2352+
continue
2353+
}
2354+
ic.EmitIfUpdatedAfterMillis(&resource{
2355+
Resource: "databricks_schema",
2356+
ID: schema.FullName,
2357+
DependsOn: dependsOn,
2358+
}, schema.UpdatedAt, fmt.Sprintf("schema '%s'", schema.FullName))
23522359
}
2353-
ic.EmitIfUpdatedAfterMillis(&resource{
2354-
Resource: "databricks_schema",
2355-
ID: schema.FullName,
2356-
DependsOn: dependsOn,
2357-
}, schema.UpdatedAt, fmt.Sprintf("schema '%s'", schema.FullName))
23582360
}
23592361
}
23602362
if cat.IsolationMode == "ISOLATED" {
@@ -2427,55 +2429,61 @@ var resourcesMap map[string]importable = map[string]importable{
24272429

24282430
// TODO: somehow add depends on catalog's grant...
24292431
// TODO: emit owner? See comment in catalog resource
2430-
models, err := ic.workspaceClient.RegisteredModels.ListAll(ic.Context,
2431-
catalog.ListRegisteredModelsRequest{
2432-
CatalogName: catalogName,
2433-
SchemaName: schemaName,
2434-
})
2435-
if err != nil { // TODO: should we continue?
2436-
return err
2432+
if ic.isServiceInListing("uc-models") {
2433+
models, err := ic.workspaceClient.RegisteredModels.ListAll(ic.Context,
2434+
catalog.ListRegisteredModelsRequest{
2435+
CatalogName: catalogName,
2436+
SchemaName: schemaName,
2437+
})
2438+
if err != nil { // TODO: should we continue?
2439+
return err
2440+
}
2441+
for _, model := range models {
2442+
ic.EmitIfUpdatedAfterMillis(&resource{
2443+
Resource: "databricks_registered_model",
2444+
ID: model.FullName,
2445+
DependsOn: dependsOn,
2446+
}, model.UpdatedAt, fmt.Sprintf("registered model '%s'", model.FullName))
2447+
}
24372448
}
2438-
for _, model := range models {
2439-
ic.EmitIfUpdatedAfterMillis(&resource{
2440-
Resource: "databricks_registered_model",
2441-
ID: model.FullName,
2442-
DependsOn: dependsOn,
2443-
}, model.UpdatedAt, fmt.Sprintf("registered model '%s'", model.FullName))
2444-
}
2445-
// list volumes
2446-
volumes, err := ic.workspaceClient.Volumes.ListAll(ic.Context,
2447-
catalog.ListVolumesRequest{
2449+
if ic.isServiceInListing("uc-volumes") {
2450+
// list volumes
2451+
volumes, err := ic.workspaceClient.Volumes.ListAll(ic.Context,
2452+
catalog.ListVolumesRequest{
2453+
CatalogName: catalogName,
2454+
SchemaName: schemaName,
2455+
})
2456+
if err != nil {
2457+
return err
2458+
}
2459+
for _, volume := range volumes {
2460+
ic.EmitIfUpdatedAfterMillis(&resource{
2461+
Resource: "databricks_volume",
2462+
ID: volume.FullName,
2463+
DependsOn: dependsOn,
2464+
}, volume.UpdatedAt, fmt.Sprintf("volume '%s'", volume.FullName))
2465+
}
2466+
}
2467+
if ic.isServiceInListing("uc-tables") {
2468+
// list tables
2469+
tables, err := ic.workspaceClient.Tables.ListAll(ic.Context, catalog.ListTablesRequest{
24482470
CatalogName: catalogName,
24492471
SchemaName: schemaName,
24502472
})
2451-
if err != nil {
2452-
return err
2453-
}
2454-
for _, volume := range volumes {
2455-
ic.EmitIfUpdatedAfterMillis(&resource{
2456-
Resource: "databricks_volume",
2457-
ID: volume.FullName,
2458-
DependsOn: dependsOn,
2459-
}, volume.UpdatedAt, fmt.Sprintf("volume '%s'", volume.FullName))
2460-
}
2461-
// list tables
2462-
tables, err := ic.workspaceClient.Tables.ListAll(ic.Context, catalog.ListTablesRequest{
2463-
CatalogName: catalogName,
2464-
SchemaName: schemaName,
2465-
})
2466-
if err != nil {
2467-
return err
2468-
}
2469-
for _, table := range tables {
2470-
switch table.TableType {
2471-
case "MANAGED", "EXTERNAL", "VIEW":
2472-
ic.EmitIfUpdatedAfterMillis(&resource{
2473-
Resource: "databricks_sql_table",
2474-
ID: table.FullName,
2475-
DependsOn: dependsOn,
2476-
}, table.UpdatedAt, fmt.Sprintf("table '%s'", table.FullName))
2477-
default:
2478-
log.Printf("[DEBUG] Skipping table %s of type %s", table.FullName, table.TableType)
2473+
if err != nil {
2474+
return err
2475+
}
2476+
for _, table := range tables {
2477+
switch table.TableType {
2478+
case "MANAGED", "EXTERNAL", "VIEW":
2479+
ic.EmitIfUpdatedAfterMillis(&resource{
2480+
Resource: "databricks_sql_table",
2481+
ID: table.FullName,
2482+
DependsOn: dependsOn,
2483+
}, table.UpdatedAt, fmt.Sprintf("table '%s'", table.FullName))
2484+
default:
2485+
log.Printf("[DEBUG] Skipping table %s of type %s", table.FullName, table.TableType)
2486+
}
24792487
}
24802488
}
24812489
// TODO: list VectorSearch indexes

exporter/importables_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,6 +1641,7 @@ func TestImportManagedCatalog(t *testing.T) {
16411641
}, func(ctx context.Context, client *common.DatabricksClient) {
16421642
ic := importContextForTestWithClient(ctx, client)
16431643
ic.enableServices("uc-catalogs,uc-grants,uc-schemas")
1644+
ic.enableListing("uc-schemas")
16441645
ic.currentMetastore = currentMetastoreResponse
16451646
d := tfcatalog.ResourceCatalog().ToResource().TestResourceData()
16461647
d.SetId("ctest")
@@ -1697,6 +1698,7 @@ func TestImportIsolatedManagedCatalog(t *testing.T) {
16971698
}, func(ctx context.Context, client *common.DatabricksClient) {
16981699
ic := importContextForTestWithClient(ctx, client)
16991700
ic.enableServices("uc-catalogs,uc-grants,uc-schemas")
1701+
ic.enableListing("uc-schemas,uc-volumes,uc-models,uc-tables")
17001702
ic.currentMetastore = currentMetastoreResponse
17011703
d := tfcatalog.ResourceCatalog().ToResource().TestResourceData()
17021704
d.SetId("ctest")
@@ -1760,6 +1762,7 @@ func TestImportSchema(t *testing.T) {
17601762
}, func(ctx context.Context, client *common.DatabricksClient) {
17611763
ic := importContextForTestWithClient(ctx, client)
17621764
ic.enableServices("uc-catalogs,uc-grants,uc-schemas,uc-volumes,uc-models,uc-tables")
1765+
ic.enableListing("uc-schemas,uc-volumes,uc-models,uc-tables")
17631766
ic.currentMetastore = currentMetastoreResponse
17641767
d := tfcatalog.ResourceSchema().ToResource().TestResourceData()
17651768
d.SetId("ctest.stest")

0 commit comments

Comments
 (0)