Skip to content

Commit 18d13bf

Browse files
authored
[Exporter] export databricks_repo for Git Folders outside of /Repos (#4308)
## Changes <!-- Summary of your changes that are easy to understand --> Resolves #3672 ## Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK
1 parent 3077b79 commit 18d13bf

File tree

9 files changed

+252
-56
lines changed

9 files changed

+252
-56
lines changed

docs/guides/experimental-exporter.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ Services are just logical groups of resources used for filtering and organizatio
128128
* `policies` - **listing** [databricks_cluster_policy](../resources/cluster_policy).
129129
* `pools` - **listing** [instance pools](../resources/instance_pool.md).
130130
* `queries` - **listing** [databricks_query](../resources/query.md).
131-
* `repos` - **listing** [databricks_repo](../resources/repo.md)
131+
* `repos` - **listing** [databricks_repo](../resources/repo.md) (both classical Repos in `/Repos` and Git Folders in artbitrary locations).
132132
* `secrets` - **listing** [databricks_secret_scope](../resources/secret_scope.md) along with [keys](../resources/secret.md) and [ACLs](../resources/secret_acl.md).
133133
* `settings` - **listing** [databricks_notification_destination](../resources/notification_destination.md).
134134
* `sql-dashboards` - **listing** Legacy [databricks_sql_dashboard](../resources/sql_dashboard.md) along with associated [databricks_sql_widget](../resources/sql_widget.md) and [databricks_sql_visualization](../resources/sql_visualization.md).

exporter/context.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ import (
5252

5353
type resourceChannel chan *resource
5454

55+
type gitInfoCacheEntry struct {
56+
IsPresent bool
57+
RepoId int64
58+
}
59+
5560
type importContext struct {
5661
// not modified/used only in single thread
5762
Module string
@@ -139,6 +144,9 @@ type importContext struct {
139144
oldWorkspaceObjects []workspace.ObjectStatus
140145
oldWorkspaceObjectMapping map[int64]string
141146

147+
gitInfoCache map[string]gitInfoCacheEntry
148+
gitInfoCacheMutex sync.RWMutex
149+
142150
builtInPolicies map[string]compute.PolicyFamily
143151
builtInPoliciesMutex sync.Mutex
144152

@@ -256,6 +264,7 @@ func newImportContext(c *common.DatabricksClient) *importContext {
256264
allWorkspaceObjects: []workspace.ObjectStatus{},
257265
oldWorkspaceObjects: []workspace.ObjectStatus{},
258266
oldWorkspaceObjectMapping: map[int64]string{},
267+
gitInfoCache: map[string]gitInfoCacheEntry{},
259268
workspaceConfKeys: workspaceConfKeys,
260269
shImports: map[string]bool{},
261270
notebooksFormat: "SOURCE",

exporter/exporter_test.go

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/databricks/terraform-provider-databricks/clusters"
3131
"github.com/databricks/terraform-provider-databricks/commands"
3232
"github.com/databricks/terraform-provider-databricks/common"
33+
"github.com/databricks/terraform-provider-databricks/internal/service/workspace_tf"
3334
"github.com/databricks/terraform-provider-databricks/jobs"
3435
"github.com/databricks/terraform-provider-databricks/qa"
3536
"github.com/databricks/terraform-provider-databricks/repos"
@@ -288,7 +289,7 @@ var emptyConnections = qa.HTTPFixture{
288289
var emptyRepos = qa.HTTPFixture{
289290
Method: "GET",
290291
ReuseRequest: true,
291-
Resource: "/api/2.0/repos?",
292+
Resource: "/api/2.0/repos?path_prefix=%2FWorkspace",
292293
Response: repos.ReposListResponse{},
293294
}
294295

@@ -830,6 +831,16 @@ func TestImportingClusters(t *testing.T) {
830831
meAdminFixture,
831832
noCurrentMetastoreAttached,
832833
emptyRepos,
834+
{
835+
Method: "GET",
836+
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2Flibs%2Ftest.whl&return_git_info=true",
837+
Response: workspace.ObjectStatus{},
838+
},
839+
{
840+
Method: "GET",
841+
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2Frepo%2Ftest.sh&return_git_info=true",
842+
Response: workspace.ObjectStatus{},
843+
},
833844
{
834845
Method: "GET",
835846
Resource: "/api/2.0/preview/scim/v2/Groups?",
@@ -1494,6 +1505,11 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
14941505
},
14951506
},
14961507
},
1508+
{
1509+
Method: "GET",
1510+
Resource: "/api/2.0/workspace/get-status?path=%2Ffoo%2Fbar.py&return_git_info=true",
1511+
Response: workspace.ObjectStatus{},
1512+
},
14971513
},
14981514
func(ctx context.Context, client *common.DatabricksClient) {
14991515
ic := newImportContext(client)
@@ -1743,7 +1759,7 @@ func TestImportingRepos(t *testing.T) {
17431759
userReadFixture,
17441760
{
17451761
Method: "GET",
1746-
Resource: "/api/2.0/repos?",
1762+
Resource: "/api/2.0/repos?path_prefix=%2FWorkspace",
17471763
Response: repos.ReposListResponse{
17481764
Repos: []repos.ReposInformation{
17491765
resp,
@@ -2184,6 +2200,16 @@ func TestImportingDLTPipelines(t *testing.T) {
21842200
Resource: "/api/2.0/permissions/files/789?",
21852201
Response: getJSONObject("test-data/get-workspace-file-permissions.json"),
21862202
},
2203+
{
2204+
Method: "GET",
2205+
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2FTest%20DLT&return_git_info=true",
2206+
Response: workspace.ObjectStatus{},
2207+
},
2208+
{
2209+
Method: "GET",
2210+
Resource: "/api/2.0/workspace/get-status?path=%2Finit.sh&return_git_info=true",
2211+
Response: workspace.ObjectStatus{},
2212+
},
21872213
},
21882214
func(ctx context.Context, client *common.DatabricksClient) {
21892215
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
@@ -2277,6 +2303,16 @@ func TestImportingDLTPipelinesMatchingOnly(t *testing.T) {
22772303
Resource: "/api/2.0/instance-profiles/list",
22782304
Response: getJSONObject("test-data/list-instance-profiles.json"),
22792305
},
2306+
{
2307+
Method: "GET",
2308+
Resource: "/api/2.0/workspace/get-status?path=%2FUsers%2Fuser%40domain.com%2FTest%20DLT&return_git_info=true",
2309+
Response: workspace.ObjectStatus{},
2310+
},
2311+
{
2312+
Method: "GET",
2313+
Resource: "/api/2.0/workspace/get-status?path=%2Finit.sh&return_git_info=true",
2314+
Response: workspace.ObjectStatus{},
2315+
},
22802316
},
22812317
func(ctx context.Context, client *common.DatabricksClient) {
22822318
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())
@@ -2975,6 +3011,11 @@ func TestImportingLakeviewDashboards(t *testing.T) {
29753011
WarehouseId: "1234",
29763012
},
29773013
},
3014+
{
3015+
Method: "GET",
3016+
Resource: "/api/2.0/workspace/get-status?path=%2FDashboard1.lvdash.json&return_git_info=true",
3017+
Response: workspace_tf.ObjectInfo{},
3018+
},
29783019
},
29793020
func(ctx context.Context, client *common.DatabricksClient) {
29803021
tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName())

exporter/importables.go

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ var resourcesMap map[string]importable = map[string]importable{
323323
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
324324
{Path: "init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
325325
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
326+
{Path: "init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
327+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
326328
},
327329
List: func(ic *importContext) error {
328330
clusters, err := clusters.NewClustersAPI(ic.Context, ic.Client).List()
@@ -470,6 +472,8 @@ var resourcesMap map[string]importable = map[string]importable{
470472
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
471473
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
472474
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
475+
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
476+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
473477
{Path: "task.notebook_task.base_parameters", Resource: "databricks_repo", Match: "workspace_path",
474478
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
475479
{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "path",
@@ -492,6 +496,8 @@ var resourcesMap map[string]importable = map[string]importable{
492496
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
493497
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
494498
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
499+
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
500+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
495501
},
496502
Import: func(ic *importContext, r *resource) error {
497503
var job jobs.JobSettingsResource
@@ -579,8 +585,8 @@ var resourcesMap map[string]importable = map[string]importable{
579585
}
580586
if task.DbtTask.Source == "WORKSPACE" {
581587
directory := task.DbtTask.ProjectDirectory
582-
if strings.HasPrefix(directory, "/Repos") {
583-
ic.emitRepoByPath(directory)
588+
if ic.isInRepoOrGitFolder(directory, true) {
589+
ic.emitRepoOrGitFolder(directory, true)
584590
} else {
585591
// Traverse the dbt project directory and emit all objects found in it
586592
nbAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
@@ -1456,40 +1462,51 @@ var resourcesMap map[string]importable = map[string]importable{
14561462
return nameNormalizationRegex.ReplaceAllString(name[7:], "_") + "_" + d.Id()
14571463
},
14581464
Search: func(ic *importContext, r *resource) error {
1459-
reposAPI := repos.NewReposAPI(ic.Context, ic.Client)
1460-
notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client)
1461-
repoDir, err := notebooksAPI.Read(r.Value)
1465+
repoDir, err := ic.workspaceClient.Workspace.GetStatusByPath(ic.Context, r.Value)
14621466
if err != nil {
14631467
return err
14641468
}
1465-
repo, err := reposAPI.Read(fmt.Sprintf("%d", repoDir.ObjectID))
1466-
if err != nil {
1467-
return err
1469+
if repoDir.ObjectType != sdk_workspace.ObjectTypeRepo {
1470+
return fmt.Errorf("object %s is not a repo", r.Value)
1471+
}
1472+
if repoDir.ResourceId != "" {
1473+
r.ID = repoDir.ResourceId
1474+
} else {
1475+
r.ID = strconv.FormatInt(repoDir.ObjectId, 10)
14681476
}
1469-
r.ID = fmt.Sprintf("%d", repo.ID)
14701477
return nil
14711478
},
14721479
List: func(ic *importContext) error {
1473-
objList, err := repos.NewReposAPI(ic.Context, ic.Client).ListAll()
1474-
if err != nil {
1475-
return err
1476-
}
1477-
for offset, repo := range objList {
1480+
it := ic.workspaceClient.Repos.List(ic.Context, sdk_workspace.ListReposRequest{PathPrefix: "/Workspace"})
1481+
i := 1
1482+
for it.HasNext(ic.Context) {
1483+
repo, err := it.Next(ic.Context)
1484+
if err != nil {
1485+
return err
1486+
}
14781487
if repo.Url != "" {
14791488
ic.Emit(&resource{
14801489
Resource: "databricks_repo",
1481-
ID: fmt.Sprintf("%d", repo.ID),
1490+
ID: strconv.FormatInt(repo.Id, 10),
14821491
})
14831492
} else {
14841493
log.Printf("[WARN] ignoring databricks_repo without Git provider. Path: %s", repo.Path)
14851494
ic.addIgnoredResource(fmt.Sprintf("databricks_repo. path=%s", repo.Path))
14861495
}
1487-
log.Printf("[INFO] Scanned %d of %d repos", offset+1, len(objList))
1496+
if i%50 == 0 {
1497+
log.Printf("[INFO] Scanned %d repos", i)
1498+
}
1499+
i++
14881500
}
14891501
return nil
14901502
},
14911503
Import: func(ic *importContext, r *resource) error {
1492-
ic.emitUserOrServicePrincipalForPath(r.Data.Get("path").(string), "/Repos")
1504+
path := maybeStripWorkspacePrefix(r.Data.Get("path").(string))
1505+
if strings.HasPrefix(path, "/Repos") {
1506+
ic.emitUserOrServicePrincipalForPath(path, "/Repos")
1507+
} else if strings.HasPrefix(path, "/Users") {
1508+
ic.emitUserOrServicePrincipalForPath(path, "/Users")
1509+
}
14931510
ic.emitPermissionsIfNotIgnored(r, fmt.Sprintf("/repos/%s", r.ID),
14941511
"repo_"+ic.Importables["databricks_repo"].Name(ic, r.Data))
14951512
return nil
@@ -1518,12 +1535,15 @@ var resourcesMap map[string]importable = map[string]importable{
15181535
}
15191536
return shouldIgnore
15201537
},
1521-
15221538
Depends: []reference{
15231539
{Path: "path", Resource: "databricks_user", Match: "repos",
15241540
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
15251541
{Path: "path", Resource: "databricks_service_principal", Match: "repos",
15261542
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
1543+
{Path: "path", Resource: "databricks_user", Match: "home",
1544+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
1545+
{Path: "path", Resource: "databricks_service_principal", Match: "home",
1546+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
15271547
},
15281548
},
15291549
"databricks_workspace_conf": {
@@ -2236,6 +2256,8 @@ var resourcesMap map[string]importable = map[string]importable{
22362256
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
22372257
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
22382258
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
2259+
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path",
2260+
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
22392261
},
22402262
},
22412263
"databricks_directory": {
@@ -3436,8 +3458,8 @@ var resourcesMap map[string]importable = map[string]importable{
34363458
},
34373459
Import: func(ic *importContext, r *resource) error {
34383460
path := r.Data.Get("path").(string)
3439-
if strings.HasPrefix(path, "/Repos") {
3440-
ic.emitRepoByPath(path)
3461+
if ic.isInRepoOrGitFolder(path, false) {
3462+
ic.emitRepoOrGitFolder(path, false)
34413463
return nil
34423464
}
34433465
parts := strings.Split(path, "/")
@@ -3459,10 +3481,7 @@ var resourcesMap map[string]importable = map[string]importable{
34593481
"dashboard_"+ic.Importables["databricks_dashboard"].Name(ic, r.Data))
34603482
parentPath := r.Data.Get("parent_path").(string)
34613483
if parentPath != "" && parentPath != "/" {
3462-
ic.Emit(&resource{
3463-
Resource: "databricks_directory",
3464-
ID: parentPath,
3465-
})
3484+
ic.emitDirectoryOrRepo(parentPath)
34663485
}
34673486
warehouseId := r.Data.Get("warehouse_id").(string)
34683487
if warehouseId != "" {
@@ -3478,7 +3497,7 @@ var resourcesMap map[string]importable = map[string]importable{
34783497
return pathString == "dashboard_change_detected" || shouldOmitMd5Field(ic, pathString, as, d)
34793498
},
34803499
Ignore: func(ic *importContext, r *resource) bool {
3481-
return strings.HasPrefix(r.Data.Get("path").(string), "/Repos") || strings.HasPrefix(r.Data.Get("parent_path").(string), "/Repos")
3500+
return ic.isInRepoOrGitFolder(r.Data.Get("path").(string), false) || ic.isInRepoOrGitFolder(r.Data.Get("parent_path").(string), true)
34823501
},
34833502
Depends: []reference{
34843503
{Path: "file_path", File: true},

exporter/importables_test.go

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ func importContextForTest() *importContext {
5858
allSps: map[string]scim.User{},
5959
channels: makeResourcesChannels(),
6060
oldWorkspaceObjectMapping: map[int64]string{},
61+
gitInfoCache: map[string]gitInfoCacheEntry{},
6162
exportDeletedUsersAssets: false,
6263
ignoredResources: map[string]struct{}{},
6364
deletedResources: map[string]struct{}{},
@@ -1525,29 +1526,67 @@ func TestEmitSqlParent(t *testing.T) {
15251526
}
15261527

15271528
func TestEmitFilesFromSlice(t *testing.T) {
1528-
ic := importContextForTest()
1529-
ic.enableServices("storage,notebooks,wsfiles")
1530-
ic.emitFilesFromSlice([]string{
1531-
"dbfs:/FileStore/test.txt",
1532-
"/Workspace/Shared/test.txt",
1533-
"nothing",
1529+
qa.HTTPFixturesApply(t, []qa.HTTPFixture{
1530+
{
1531+
Method: "GET",
1532+
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Ftest.txt&return_git_info=true",
1533+
Response: workspace.ObjectStatus{},
1534+
},
1535+
{
1536+
Method: "GET",
1537+
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Fgit%2Ftest.txt&return_git_info=true",
1538+
Response: workspace.ObjectStatus{
1539+
GitInfo: &sdk_workspace.RepoInfo{
1540+
Id: 1234,
1541+
},
1542+
},
1543+
},
1544+
}, func(ctx context.Context, client *common.DatabricksClient) {
1545+
ic := importContextForTestWithClient(ctx, client)
1546+
ic.enableServices("storage,notebooks,wsfiles,repos")
1547+
ic.emitFilesFromSlice([]string{
1548+
"dbfs:/FileStore/test.txt",
1549+
"/Workspace/Shared/test.txt",
1550+
"/Workspace/Shared/git/test.txt",
1551+
"nothing",
1552+
})
1553+
assert.Equal(t, 3, len(ic.testEmits))
1554+
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
1555+
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
1556+
assert.Contains(t, ic.testEmits, "databricks_repo[<unknown>] (id: 1234)")
15341557
})
1535-
assert.Equal(t, 2, len(ic.testEmits))
1536-
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
1537-
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
15381558
}
15391559

15401560
func TestEmitFilesFromMap(t *testing.T) {
1541-
ic := importContextForTest()
1542-
ic.enableServices("storage,notebooks,wsfiles")
1543-
ic.emitFilesFromMap(map[string]string{
1544-
"k1": "dbfs:/FileStore/test.txt",
1545-
"k2": "/Workspace/Shared/test.txt",
1546-
"k3": "nothing",
1547-
})
1548-
assert.Equal(t, 2, len(ic.testEmits))
1549-
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
1550-
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
1561+
qa.HTTPFixturesApply(t, []qa.HTTPFixture{
1562+
{
1563+
Method: "GET",
1564+
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Ftest.txt&return_git_info=true",
1565+
Response: workspace.ObjectStatus{},
1566+
},
1567+
{
1568+
Method: "GET",
1569+
Resource: "/api/2.0/workspace/get-status?path=%2FShared%2Fgit%2Ftest.txt&return_git_info=true",
1570+
Response: workspace.ObjectStatus{
1571+
GitInfo: &sdk_workspace.RepoInfo{
1572+
Id: 1234,
1573+
},
1574+
},
1575+
},
1576+
}, func(ctx context.Context, client *common.DatabricksClient) {
1577+
ic := importContextForTestWithClient(ctx, client)
1578+
ic.enableServices("storage,notebooks,wsfiles,repos")
1579+
ic.emitFilesFromMap(map[string]string{
1580+
"k1": "dbfs:/FileStore/test.txt",
1581+
"k2": "/Workspace/Shared/test.txt",
1582+
"k3": "nothing",
1583+
"k4": "/Workspace/Shared/git/test.txt",
1584+
})
1585+
assert.Equal(t, 3, len(ic.testEmits))
1586+
assert.Contains(t, ic.testEmits, "databricks_dbfs_file[<unknown>] (id: dbfs:/FileStore/test.txt)")
1587+
assert.Contains(t, ic.testEmits, "databricks_workspace_file[<unknown>] (id: /Shared/test.txt)")
1588+
assert.Contains(t, ic.testEmits, "databricks_repo[<unknown>] (id: 1234)")
1589+
})
15511590
}
15521591

15531592
func TestStorageCredentialListFails(t *testing.T) {

0 commit comments

Comments
 (0)