Skip to content

Commit 160f38e

Browse files
alexotttanmay-db
andauthored
Exporter: decrease a need for get-status calls for directories when listing workspace objects (#3470)
* Exporter: decrease a need for `get-status` calls for directories when listing Also, rework handling of longest prefix matches a bit * remove commented out code --------- Co-authored-by: Tanmay Rustagi <[email protected]>
1 parent b2286ca commit 160f38e

File tree

5 files changed

+67
-32
lines changed

5 files changed

+67
-32
lines changed

exporter/context.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,14 +1250,24 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
12501250
!ic.isIgnoredResourceApproximation(sr) {
12511251
log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
12521252
ref.Resource, attr, value, ref, sr.Type, sr.Name)
1253-
// TODO: we need to not generate traversals resources for which their Ignore function returns true...
12541253
return matchValue, genTraversalTokens(sr, attr), sr.Mode == "data"
12551254
}
12561255
if ref.MatchType != MatchCaseInsensitive { // for case-insensitive matching we'll try iteration
12571256
log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Not found",
12581257
ref.Resource, attr, value, ref)
12591258
return "", nil, false
12601259
}
1260+
} else if ref.MatchType == MatchLongestPrefix && ref.ExtraLookupKey != "" {
1261+
extraKeyValue, exists := origResource.GetExtraData(ref.ExtraLookupKey)
1262+
if exists && extraKeyValue.(string) != "" {
1263+
sr := ic.State.Get(ref.Resource, attr, extraKeyValue.(string))
1264+
if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) &&
1265+
!ic.isIgnoredResourceApproximation(sr) {
1266+
log.Printf("[DEBUG] Finished direct lookup by key %s for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
1267+
ref.ExtraLookupKey, ref.Resource, attr, value, ref, sr.Type, sr.Name)
1268+
return extraKeyValue.(string), genTraversalTokens(sr, attr), sr.Mode == "data"
1269+
}
1270+
}
12611271
}
12621272

12631273
maxPrefixLen := 0
@@ -1275,7 +1285,7 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
12751285
origValue := strValue
12761286
if ref.SearchValueTransformFunc != nil {
12771287
strValue = ref.SearchValueTransformFunc(strValue)
1278-
log.Printf("[DEBUG] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue)
1288+
log.Printf("[TRACE] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue)
12791289
}
12801290
matched := false
12811291
switch ref.MatchType {
@@ -1298,7 +1308,6 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
12981308
ic.isIgnoredResourceApproximation(sr) {
12991309
continue
13001310
}
1301-
// TODO: we need to not generate traversals resources for which their Ignore function returns true...
13021311
log.Printf("[DEBUG] Finished searching for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
13031312
ref.Resource, attr, value, ref, sr.Type, sr.Name)
13041313
return origValue, genTraversalTokens(sr, attr), sr.Mode == "data"

exporter/importables.go

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ var (
8080
"storage_credential": {`CREATE_EXTERNAL_LOCATION`, `CREATE_EXTERNAL_TABLE`},
8181
"foreign_connection": {`CREATE_FOREIGN_CATALOG`},
8282
}
83+
ParentDirectoryExtraKey = "parent_directory"
8384
)
8485

8586
func generateMountBody(ic *importContext, body *hclwrite.Body, r *resource) error {
@@ -1527,20 +1528,14 @@ var resourcesMap map[string]importable = map[string]importable{
15271528
"notebook_"+ic.Importables["databricks_notebook"].Name(ic, r.Data))
15281529
// TODO: it's not completely correct condition - we need to make emit smarter -
15291530
// emit only if permissions are different from their parent's permission.
1530-
if idx := strings.LastIndex(r.ID, "/"); idx != -1 {
1531-
directoryPath := r.ID[:idx]
1532-
ic.Emit(&resource{
1533-
Resource: "databricks_directory",
1534-
ID: directoryPath,
1535-
})
1536-
}
1531+
ic.emitWorkspaceObjectParentDirectory(r)
15371532
return r.Data.Set("source", fileName)
15381533
},
15391534
ShouldOmitField: shouldOmitMd5Field,
15401535
Depends: []reference{
15411536
{Path: "source", File: true},
1542-
{Path: "path", Resource: "databricks_directory",
1543-
MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
1537+
{Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix,
1538+
SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey},
15441539
{Path: "path", Resource: "databricks_user", Match: "home",
15451540
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
15461541
{Path: "path", Resource: "databricks_service_principal", Match: "home",
@@ -1580,21 +1575,15 @@ var resourcesMap map[string]importable = map[string]importable{
15801575

15811576
// TODO: it's not completely correct condition - we need to make emit smarter -
15821577
// emit only if permissions are different from their parent's permission.
1583-
if idx := strings.LastIndex(r.ID, "/"); idx != -1 {
1584-
directoryPath := r.ID[:idx]
1585-
ic.Emit(&resource{
1586-
Resource: "databricks_directory",
1587-
ID: directoryPath,
1588-
})
1589-
}
1590-
log.Printf("Creating %s for %s", fileName, r)
1578+
ic.emitWorkspaceObjectParentDirectory(r)
1579+
log.Printf("[TRACE] Creating %s for %s", fileName, r)
15911580
return r.Data.Set("source", fileName)
15921581
},
15931582
ShouldOmitField: shouldOmitMd5Field,
15941583
Depends: []reference{
15951584
{Path: "source", File: true},
1596-
{Path: "path", Resource: "databricks_directory",
1597-
MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
1585+
{Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix,
1586+
SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey},
15981587
{Path: "path", Resource: "databricks_user", Match: "home",
15991588
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
16001589
{Path: "path", Resource: "databricks_service_principal", Match: "home",

exporter/model.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ type reference struct {
203203
IsValidApproximation isValidAproximationFunc
204204
// if we should skip direct lookups (for example, we need it for UC schemas matching)
205205
SkipDirectLookup bool
206+
// Extra Lookup key - if we need to search for the resource in a different way
207+
ExtraLookupKey string
206208
}
207209

208210
func (r reference) MatchAttribute() string {

exporter/util.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,8 @@ func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) {
11231123
ic.maybeEmitWorkspaceObject("databricks_notebook", object.Path, &object)
11241124
case workspace.File:
11251125
ic.maybeEmitWorkspaceObject("databricks_workspace_file", object.Path, &object)
1126+
case workspace.Directory:
1127+
ic.maybeEmitWorkspaceObject("databricks_directory", object.Path, &object)
11261128
default:
11271129
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
11281130
}
@@ -1140,8 +1142,6 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
11401142
for object := range objectsChannel {
11411143
processedObjects.Add(1)
11421144
ic.waitGroup.Add(1)
1143-
// log.Printf("[DEBUG] channel %d for workspace objects, channel size=%d got %v",
1144-
// num, len(objectsChannel), object)
11451145
emitWorkpaceObject(ic, object)
11461146
ic.waitGroup.Done()
11471147
}
@@ -1153,15 +1153,19 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
11531153
updatedSinceMs := ic.getUpdatedSinceMs()
11541154
allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) {
11551155
for _, object := range objects {
1156-
if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) {
1157-
continue
1158-
}
1159-
object := object
1160-
switch object.ObjectType {
1161-
case workspace.Notebook, workspace.File:
1156+
if object.ObjectType == workspace.Directory && object.Path != "/" && !ic.incremental {
11621157
objectsChannel <- object
1163-
default:
1164-
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
1158+
} else {
1159+
if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) {
1160+
continue
1161+
}
1162+
object := object
1163+
switch object.ObjectType {
1164+
case workspace.Notebook, workspace.File:
1165+
objectsChannel <- object
1166+
default:
1167+
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
1168+
}
11651169
}
11661170
}
11671171
})
@@ -1459,3 +1463,17 @@ func (ic *importContext) emitPermissionsIfNotIgnored(r *resource, id, name strin
14591463
}
14601464
}
14611465
}
1466+
1467+
func (ic *importContext) emitWorkspaceObjectParentDirectory(r *resource) {
1468+
if !ic.isServiceEnabled("directories") {
1469+
return
1470+
}
1471+
if idx := strings.LastIndex(r.ID, "/"); idx > 0 { // not found, or directly in the root...
1472+
directoryPath := r.ID[:idx]
1473+
ic.Emit(&resource{
1474+
Resource: "databricks_directory",
1475+
ID: directoryPath,
1476+
})
1477+
r.AddExtraData(ParentDirectoryExtraKey, directoryPath)
1478+
}
1479+
}

exporter/util_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,20 @@ func TestIgnoreObjectWithEmptyName(t *testing.T) {
412412
assert.False(t, ignoreFunc(ic, r))
413413
assert.Equal(t, 1, len(ic.ignoredResources))
414414
}
415+
416+
func TestEmitWorkspaceObjectParentDirectory(t *testing.T) {
417+
ic := importContextForTest()
418+
ic.enableServices("notebooks,directories")
419+
dirPath := "/Shared"
420+
r := &resource{
421+
ID: "/Shared/abc",
422+
Resource: "databricks_notebook",
423+
}
424+
ic.emitWorkspaceObjectParentDirectory(r)
425+
assert.Equal(t, 1, len(ic.testEmits))
426+
assert.True(t, ic.testEmits["databricks_directory[<unknown>] (id: /Shared)"])
427+
428+
dir, exists := r.GetExtraData(ParentDirectoryExtraKey)
429+
assert.True(t, exists)
430+
assert.Equal(t, dirPath, dir)
431+
}

0 commit comments

Comments
 (0)