Skip to content

Commit 4bebb0d

Browse files
authored
[Exporter] Use List + iteration instead of call to ListAll (#4123)
## Changes <!-- Summary of your changes that are easy to understand --> This change significantly improve performance of export for resources with big number of objects because we're starting to export objects as soon as we get first page with list of objects. I.e., for Lakeview dashboards, the export time for ~10k dashboards went from 47 minutes down to 22 minutes. Resolves #4119 ## Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [x] using Go SDK
1 parent 39d745e commit 4bebb0d

File tree

2 files changed

+123
-96
lines changed

2 files changed

+123
-96
lines changed

exporter/context.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ var goroutinesNumber = map[string]int{
200200
"databricks_dbfs_file": 3,
201201
"databricks_user": 1,
202202
"databricks_service_principal": 1,
203+
"databricks_dashboard": 4,
203204
"databricks_sql_dashboard": 3,
204205
"databricks_sql_widget": 4,
205206
"databricks_sql_visualization": 4,

exporter/importables.go

Lines changed: 122 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -222,19 +222,22 @@ var resourcesMap map[string]importable = map[string]importable{
222222
return raw.(string)
223223
},
224224
List: func(ic *importContext) error {
225-
pools, err := ic.workspaceClient.InstancePools.ListAll(ic.Context)
226-
if err != nil {
227-
return err
228-
}
229-
for i, pool := range pools {
225+
it := ic.workspaceClient.InstancePools.List(ic.Context)
226+
i := 0
227+
for it.HasNext(ic.Context) {
228+
pool, err := it.Next(ic.Context)
229+
if err != nil {
230+
return err
231+
}
232+
i++
230233
if !ic.MatchesName(pool.InstancePoolName) {
231234
continue
232235
}
233236
ic.Emit(&resource{
234237
Resource: "databricks_instance_pool",
235238
ID: pool.InstancePoolId,
236239
})
237-
log.Printf("[INFO] Imported %d of %d instance pools", i+1, len(pools))
240+
log.Printf("[INFO] Imported %d instance pools", i)
238241
}
239242
return nil
240243
},
@@ -756,14 +759,16 @@ var resourcesMap map[string]importable = map[string]importable{
756759
if err != nil {
757760
return err
758761
}
759-
policiesList, err := w.ClusterPolicies.ListAll(ic.Context, compute.ListClusterPoliciesRequest{})
760-
if err != nil {
761-
return err
762-
}
763-
764762
builtInClusterPolicies := ic.getBuiltinPolicyFamilies()
765-
for offset, policy := range policiesList {
766-
log.Printf("[TRACE] Scanning %d: %v", offset+1, policy)
763+
it := w.ClusterPolicies.List(ic.Context, compute.ListClusterPoliciesRequest{})
764+
i := 0
765+
for it.HasNext(ic.Context) {
766+
policy, err := it.Next(ic.Context)
767+
if err != nil {
768+
return err
769+
}
770+
i++
771+
log.Printf("[TRACE] Scanning %d: %v", i, policy)
767772
family, isBuiltin := builtInClusterPolicies[policy.PolicyFamilyId]
768773
if policy.PolicyFamilyId != "" && isBuiltin && family.Name == policy.Name &&
769774
policy.PolicyFamilyDefinitionOverrides == "" {
@@ -778,8 +783,8 @@ var resourcesMap map[string]importable = map[string]importable{
778783
Resource: "databricks_cluster_policy",
779784
ID: policy.PolicyId,
780785
})
781-
if offset%10 == 0 {
782-
log.Printf("[INFO] Scanned %d of %d cluster policies", offset+1, len(policiesList))
786+
if i%10 == 0 {
787+
log.Printf("[INFO] Scanned %d cluster policies", i)
783788
}
784789
}
785790
return nil
@@ -1741,19 +1746,22 @@ var resourcesMap map[string]importable = map[string]importable{
17411746
return name
17421747
},
17431748
List: func(ic *importContext) error {
1744-
endpointsList, err := ic.workspaceClient.Warehouses.ListAll(ic.Context, sql.ListWarehousesRequest{})
1745-
if err != nil {
1746-
return err
1747-
}
1748-
for i, q := range endpointsList {
1749+
it := ic.workspaceClient.Warehouses.List(ic.Context, sql.ListWarehousesRequest{})
1750+
i := 0
1751+
for it.HasNext(ic.Context) {
1752+
q, err := it.Next(ic.Context)
1753+
if err != nil {
1754+
return err
1755+
}
17491756
if !ic.MatchesName(q.Name) {
17501757
continue
17511758
}
17521759
ic.Emit(&resource{
17531760
Resource: "databricks_sql_endpoint",
17541761
ID: q.Id,
17551762
})
1756-
log.Printf("[INFO] Imported %d of %d SQL endpoints", i+1, len(endpointsList))
1763+
i++
1764+
log.Printf("[INFO] Imported %d SQL endpoints", i)
17571765
}
17581766
return nil
17591767
},
@@ -2202,11 +2210,13 @@ var resourcesMap map[string]importable = map[string]importable{
22022210
return strings.ToLower(d.Id()) + "_" + nameMd5[:8]
22032211
},
22042212
List: func(ic *importContext) error {
2205-
endpointsList, err := ic.workspaceClient.ServingEndpoints.ListAll(ic.Context)
2206-
if err != nil {
2207-
return err
2208-
}
2209-
for offset, endpoint := range endpointsList {
2213+
it := ic.workspaceClient.ServingEndpoints.List(ic.Context)
2214+
i := 0
2215+
for it.HasNext(ic.Context) {
2216+
endpoint, err := it.Next(ic.Context)
2217+
if err != nil {
2218+
return err
2219+
}
22102220
if endpoint.Config != nil && endpoint.Config.ServedEntities != nil && len(endpoint.Config.ServedEntities) > 0 {
22112221
if endpoint.Config.ServedEntities[0].FoundationModel != nil {
22122222
log.Printf("[INFO] skipping endpoint %s that is foundation model", endpoint.Name)
@@ -2217,8 +2227,9 @@ var resourcesMap map[string]importable = map[string]importable{
22172227
Resource: "databricks_model_serving",
22182228
ID: endpoint.Name,
22192229
}, endpoint.LastUpdatedTimestamp, fmt.Sprintf("serving endpoint '%s'", endpoint.Name))
2220-
if offset%50 == 0 {
2221-
log.Printf("[INFO] Scanned %d of %d Serving Endpoints", offset+1, len(endpointsList))
2230+
i++
2231+
if i%50 == 0 {
2232+
log.Printf("[INFO] Scanned %d Serving Endpoints", i)
22222233
}
22232234
}
22242235
return nil
@@ -2522,12 +2533,12 @@ var resourcesMap map[string]importable = map[string]importable{
25222533
if ic.currentMetastore == nil {
25232534
return fmt.Errorf("there is no UC metastore information")
25242535
}
2525-
2526-
catalogs, err := ic.workspaceClient.Catalogs.ListAll(ic.Context, catalog.ListCatalogsRequest{})
2527-
if err != nil {
2528-
return err
2529-
}
2530-
for _, v := range catalogs {
2536+
it := ic.workspaceClient.Catalogs.List(ic.Context, catalog.ListCatalogsRequest{})
2537+
for it.HasNext(ic.Context) {
2538+
v, err := it.Next(ic.Context)
2539+
if err != nil {
2540+
return err
2541+
}
25312542
switch v.CatalogType {
25322543
case "MANAGED_CATALOG", "FOREIGN_CATALOG", "DELTASHARING_CATALOG":
25332544
{
@@ -2564,12 +2575,13 @@ var resourcesMap map[string]importable = map[string]importable{
25642575
} else if cat.ShareName == "" {
25652576
// TODO: We need to be careful here if we add more catalog types... Really we need to have CatalogType in resource
25662577
if ic.isServiceInListing("uc-schemas") {
2567-
schemas, err := ic.workspaceClient.Schemas.ListAll(ic.Context, catalog.ListSchemasRequest{CatalogName: r.ID})
2568-
if err != nil {
2569-
return err
2570-
}
25712578
ignoredSchemas := []string{"information_schema"}
2572-
for _, schema := range schemas {
2579+
it := ic.workspaceClient.Schemas.List(ic.Context, catalog.ListSchemasRequest{CatalogName: r.ID})
2580+
for it.HasNext(ic.Context) {
2581+
schema, err := it.Next(ic.Context)
2582+
if err != nil {
2583+
return err
2584+
}
25732585
if schema.CatalogType != "MANAGED_CATALOG" || slices.Contains(ignoredSchemas, schema.Name) {
25742586
continue
25752587
}
@@ -2622,15 +2634,16 @@ var resourcesMap map[string]importable = map[string]importable{
26222634
// TODO: somehow add depends on catalog's grant...
26232635
// TODO: emit owner? See comment in catalog resource
26242636
if ic.isServiceInListing("uc-models") {
2625-
models, err := ic.workspaceClient.RegisteredModels.ListAll(ic.Context,
2637+
it := ic.workspaceClient.RegisteredModels.List(ic.Context,
26262638
catalog.ListRegisteredModelsRequest{
26272639
CatalogName: catalogName,
26282640
SchemaName: schemaName,
26292641
})
2630-
if err != nil { // TODO: should we continue?
2631-
return err
2632-
}
2633-
for _, model := range models {
2642+
for it.HasNext(ic.Context) {
2643+
model, err := it.Next(ic.Context)
2644+
if err != nil {
2645+
return err // TODO: should we continue?
2646+
}
26342647
ic.EmitIfUpdatedAfterMillis(&resource{
26352648
Resource: "databricks_registered_model",
26362649
ID: model.FullName,
@@ -2640,15 +2653,16 @@ var resourcesMap map[string]importable = map[string]importable{
26402653
}
26412654
if ic.isServiceInListing("uc-volumes") {
26422655
// list volumes
2643-
volumes, err := ic.workspaceClient.Volumes.ListAll(ic.Context,
2656+
it := ic.workspaceClient.Volumes.List(ic.Context,
26442657
catalog.ListVolumesRequest{
26452658
CatalogName: catalogName,
26462659
SchemaName: schemaName,
26472660
})
2648-
if err != nil {
2649-
return err
2650-
}
2651-
for _, volume := range volumes {
2661+
for it.HasNext(ic.Context) {
2662+
volume, err := it.Next(ic.Context)
2663+
if err != nil {
2664+
return err // TODO: should we continue?
2665+
}
26522666
ic.EmitIfUpdatedAfterMillis(&resource{
26532667
Resource: "databricks_volume",
26542668
ID: volume.FullName,
@@ -2658,14 +2672,15 @@ var resourcesMap map[string]importable = map[string]importable{
26582672
}
26592673
if ic.isServiceInListing("uc-tables") {
26602674
// list tables
2661-
tables, err := ic.workspaceClient.Tables.ListAll(ic.Context, catalog.ListTablesRequest{
2675+
it := ic.workspaceClient.Tables.List(ic.Context, catalog.ListTablesRequest{
26622676
CatalogName: catalogName,
26632677
SchemaName: schemaName,
26642678
})
2665-
if err != nil {
2666-
return err
2667-
}
2668-
for _, table := range tables {
2679+
for it.HasNext(ic.Context) {
2680+
table, err := it.Next(ic.Context)
2681+
if err != nil {
2682+
return err // TODO: should we continue?
2683+
}
26692684
switch table.TableType {
26702685
case "MANAGED", "EXTERNAL", "VIEW":
26712686
ic.EmitIfUpdatedAfterMillis(&resource{
@@ -2848,11 +2863,12 @@ var resourcesMap map[string]importable = map[string]importable{
28482863
return nil
28492864
},
28502865
List: func(ic *importContext) error {
2851-
objList, err := ic.workspaceClient.StorageCredentials.ListAll(ic.Context, catalog.ListStorageCredentialsRequest{})
2852-
if err != nil {
2853-
return err
2854-
}
2855-
for _, v := range objList {
2866+
it := ic.workspaceClient.StorageCredentials.List(ic.Context, catalog.ListStorageCredentialsRequest{})
2867+
for it.HasNext(ic.Context) {
2868+
v, err := it.Next(ic.Context)
2869+
if err != nil {
2870+
return err
2871+
}
28562872
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
28572873
Resource: "databricks_storage_credential",
28582874
ID: v.Name,
@@ -2890,11 +2906,12 @@ var resourcesMap map[string]importable = map[string]importable{
28902906
return nil
28912907
},
28922908
List: func(ic *importContext) error {
2893-
objList, err := ic.workspaceClient.ExternalLocations.ListAll(ic.Context, catalog.ListExternalLocationsRequest{})
2894-
if err != nil {
2895-
return err
2896-
}
2897-
for _, v := range objList {
2909+
it := ic.workspaceClient.ExternalLocations.List(ic.Context, catalog.ListExternalLocationsRequest{})
2910+
for it.HasNext(ic.Context) {
2911+
v, err := it.Next(ic.Context)
2912+
if err != nil {
2913+
return err
2914+
}
28982915
if v.Name != "metastore_default_location" {
28992916
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
29002917
Resource: "databricks_external_location",
@@ -2930,11 +2947,12 @@ var resourcesMap map[string]importable = map[string]importable{
29302947
return connectionType + "_" + connectionName
29312948
},
29322949
List: func(ic *importContext) error {
2933-
connections, err := ic.workspaceClient.Connections.ListAll(ic.Context, catalog.ListConnectionsRequest{})
2934-
if err != nil {
2935-
return err
2936-
}
2937-
for _, conn := range connections {
2950+
it := ic.workspaceClient.Connections.List(ic.Context, catalog.ListConnectionsRequest{})
2951+
for it.HasNext(ic.Context) {
2952+
conn, err := it.Next(ic.Context)
2953+
if err != nil {
2954+
return err
2955+
}
29382956
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
29392957
Resource: "databricks_connection",
29402958
ID: conn.MetastoreId + "|" + conn.Name,
@@ -2955,11 +2973,12 @@ var resourcesMap map[string]importable = map[string]importable{
29552973
WorkspaceLevel: true,
29562974
Service: "uc-shares",
29572975
List: func(ic *importContext) error {
2958-
shares, err := ic.workspaceClient.Shares.ListAll(ic.Context, sharing.ListSharesRequest{})
2959-
if err != nil {
2960-
return err
2961-
}
2962-
for _, share := range shares {
2976+
it := ic.workspaceClient.Shares.List(ic.Context, sharing.ListSharesRequest{})
2977+
for it.HasNext(ic.Context) {
2978+
share, err := it.Next(ic.Context)
2979+
if err != nil {
2980+
return err
2981+
}
29632982
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
29642983
Resource: "databricks_share",
29652984
ID: share.Name,
@@ -3011,11 +3030,12 @@ var resourcesMap map[string]importable = map[string]importable{
30113030
WorkspaceLevel: true,
30123031
Service: "uc-shares",
30133032
List: func(ic *importContext) error {
3014-
recipients, err := ic.workspaceClient.Recipients.ListAll(ic.Context, sharing.ListRecipientsRequest{})
3015-
if err != nil {
3016-
return err
3017-
}
3018-
for _, rec := range recipients {
3033+
it := ic.workspaceClient.Recipients.List(ic.Context, sharing.ListRecipientsRequest{})
3034+
for it.HasNext(ic.Context) {
3035+
rec, err := it.Next(ic.Context)
3036+
if err != nil {
3037+
return err
3038+
}
30193039
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
30203040
Resource: "databricks_recipient",
30213041
ID: rec.Name,
@@ -3086,11 +3106,12 @@ var resourcesMap map[string]importable = map[string]importable{
30863106
return name
30873107
},
30883108
List: func(ic *importContext) error {
3089-
metastores, err := ic.accountClient.Metastores.ListAll(ic.Context)
3090-
if err != nil {
3091-
return err
3092-
}
3093-
for _, mstore := range metastores {
3109+
it := ic.accountClient.Metastores.List(ic.Context)
3110+
for it.HasNext(ic.Context) {
3111+
mstore, err := it.Next(ic.Context)
3112+
if err != nil {
3113+
return err
3114+
}
30943115
ic.EmitIfUpdatedAfterMillisAndNameMatches(&resource{
30953116
Resource: "databricks_metastore",
30963117
ID: mstore.MetastoreId,
@@ -3285,23 +3306,27 @@ var resourcesMap map[string]importable = map[string]importable{
32853306
WorkspaceLevel: true,
32863307
Service: "dashboards",
32873308
List: func(ic *importContext) error {
3288-
dashboards, err := ic.workspaceClient.Lakeview.ListAll(ic.Context, dashboards.ListDashboardsRequest{PageSize: 100})
3289-
if err != nil {
3290-
return err
3291-
}
3292-
for i, d := range dashboards {
3309+
it := ic.workspaceClient.Lakeview.List(ic.Context, dashboards.ListDashboardsRequest{PageSize: 100})
3310+
i := 0
3311+
for it.HasNext(ic.Context) {
3312+
d, err := it.Next(ic.Context)
3313+
if err != nil {
3314+
return err
3315+
}
3316+
i++
32933317
if !ic.MatchesName(d.DisplayName) {
32943318
continue
32953319
}
3296-
// TODO: add emit for incremental mode. Use already defined functions for emitting?
3320+
// TODO: add emit for incremental mode. But this information isn't included into the List response
32973321
ic.Emit(&resource{
32983322
Resource: "databricks_dashboard",
32993323
ID: d.DashboardId,
33003324
})
33013325
if i%100 == 0 {
3302-
log.Printf("[INFO] Processed %d dashboard out of %d", i+1, len(dashboards))
3326+
log.Printf("[INFO] Processed %d dashboards", i)
33033327
}
33043328
}
3329+
log.Printf("[INFO] Listed %d dashboards", i)
33053330
return nil
33063331
},
33073332
Name: func(ic *importContext, d *schema.ResourceData) string {
@@ -3391,11 +3416,12 @@ var resourcesMap map[string]importable = map[string]importable{
33913416
if !ic.meAdmin {
33923417
return fmt.Errorf("notifications can be imported only by admin")
33933418
}
3394-
notifications, err := ic.workspaceClient.NotificationDestinations.ListAll(ic.Context, settings.ListNotificationDestinationsRequest{})
3395-
if err != nil {
3396-
return err
3397-
}
3398-
for _, n := range notifications {
3419+
it := ic.workspaceClient.NotificationDestinations.List(ic.Context, settings.ListNotificationDestinationsRequest{})
3420+
for it.HasNext(ic.Context) {
3421+
n, err := it.Next(ic.Context)
3422+
if err != nil {
3423+
return err
3424+
}
33993425
ic.Emit(&resource{
34003426
Resource: "databricks_notification_destination",
34013427
ID: n.Id,

0 commit comments

Comments
 (0)