Skip to content

Commit 94a1d73

Browse files
author
Tagir Bakirov
committed
[Performance] Cache SCIM and permission-assignment list calls to reduce API requests during plan
For large IAM deployments (thousands of databricks_group_member, databricks_group, databricks_user, and mws_permission_assignment resources), terraform plan was issuing one API call per resource Read — resulting in thousands of redundant requests and 429 rate-limit errors. Each resource type now uses a shared in-memory cache backed by sync.RWMutex (concurrent warm reads) + singleflight (deduplicated cold-cache fetches). A single list call populates the cache for all resources of that type; subsequent reads are served from memory. Mutations (Create/Update/Delete) invalidate the relevant cache entry. Changes: - access/resource_permission_assignment.go: permAssignmentCache (1 List call per host instead of 1 per resource) - mws/resource_mws_permission_assignment.go: workspaceAssignmentsCache (1 ListByWorkspaceId per workspace_id instead of 1 per resource) - scim/resource_group.go + scim/groups.go: groupsListCache (1 GET /Groups?attributes=id,... instead of N GET /Groups/{id}) - scim/resource_user.go + scim/users.go: usersListCache (1 GET /Users?attributes=id,... instead of N GET /Users/{id}) - scim/resource_group_member.go: bulk groupCache (1 GET /Groups?attributes=id,members instead of N per-group reads, eliminating concurrent request storms that caused 429 errors)
1 parent a4559d0 commit 94a1d73

14 files changed

+891
-62
lines changed

access/resource_permission_assignment.go

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ import (
88
"net/http"
99
"slices"
1010
"strconv"
11+
"sync"
12+
13+
"golang.org/x/sync/singleflight"
1114

1215
"github.com/databricks/databricks-sdk-go/apierr"
1316
"github.com/databricks/terraform-provider-databricks/common"
@@ -18,6 +21,98 @@ func NewPermissionAssignmentAPI(ctx context.Context, m any) PermissionAssignment
1821
return PermissionAssignmentAPI{m.(*common.DatabricksClient), ctx}
1922
}
2023

24+
// permAssignmentEntry holds the cached list result for one workspace.
25+
type permAssignmentEntry struct {
26+
mu sync.RWMutex
27+
initialized bool
28+
list permissionAssignmentResponse
29+
// sg deduplicates concurrent in-flight API calls when the cache is cold.
30+
// All goroutines that arrive while a fetch is in-flight join the same call
31+
// and are woken simultaneously when it completes, rather than serialising
32+
// through a write lock.
33+
sg singleflight.Group
34+
}
35+
36+
// permAssignmentCache caches the permission assignments list per workspace host
37+
// so that N databricks_permission_assignment resources in the same workspace
38+
// only issue a single API call during a terraform plan/apply cycle.
39+
type permAssignmentCache struct {
40+
mu sync.Mutex
41+
cache map[string]*permAssignmentEntry
42+
}
43+
44+
func newPermAssignmentCache() *permAssignmentCache {
45+
return &permAssignmentCache{
46+
cache: make(map[string]*permAssignmentEntry),
47+
}
48+
}
49+
50+
func (c *permAssignmentCache) getOrCreate(host string) *permAssignmentEntry {
51+
c.mu.Lock()
52+
defer c.mu.Unlock()
53+
if entry, ok := c.cache[host]; ok {
54+
return entry
55+
}
56+
entry := &permAssignmentEntry{}
57+
c.cache[host] = entry
58+
return entry
59+
}
60+
61+
func (c *permAssignmentCache) list(api PermissionAssignmentAPI) (permissionAssignmentResponse, error) {
62+
host := api.client.Config.Host
63+
entry := c.getOrCreate(host)
64+
65+
// Fast path: warm cache. Many goroutines can hold a read-lock simultaneously.
66+
entry.mu.RLock()
67+
if entry.initialized {
68+
l := entry.list
69+
entry.mu.RUnlock()
70+
return l, nil
71+
}
72+
entry.mu.RUnlock()
73+
74+
// Slow path: cache is cold. Use singleflight so exactly one API call is made
75+
// regardless of how many goroutines arrive concurrently; all share the result.
76+
v, err, _ := entry.sg.Do("fetch", func() (interface{}, error) {
77+
// Double-check now that we are the singleflight leader.
78+
entry.mu.RLock()
79+
if entry.initialized {
80+
l := entry.list
81+
entry.mu.RUnlock()
82+
return &l, nil
83+
}
84+
entry.mu.RUnlock()
85+
86+
l, err := api.List()
87+
if err != nil {
88+
return nil, err
89+
}
90+
entry.mu.Lock()
91+
entry.list = l
92+
entry.initialized = true
93+
entry.mu.Unlock()
94+
return &l, nil
95+
})
96+
if err != nil {
97+
return permissionAssignmentResponse{}, err
98+
}
99+
return *v.(*permissionAssignmentResponse), nil
100+
}
101+
102+
func (c *permAssignmentCache) invalidate(host string) {
103+
c.mu.Lock()
104+
entry, ok := c.cache[host]
105+
c.mu.Unlock()
106+
if ok {
107+
entry.mu.Lock()
108+
entry.initialized = false
109+
entry.list = permissionAssignmentResponse{}
110+
entry.mu.Unlock()
111+
}
112+
}
113+
114+
var globalPermAssignmentCache = newPermAssignmentCache()
115+
21116
type PermissionAssignmentAPI struct {
22117
client *common.DatabricksClient
23118
context context.Context
@@ -156,6 +251,7 @@ func ResourcePermissionAssignment() common.Resource {
156251
if err != nil {
157252
return err
158253
}
254+
defer globalPermAssignmentCache.invalidate(c.Config.Host)
159255
var assignment permissionAssignmentEntity
160256
common.DataToStructPointer(d, s, &assignment)
161257
api := NewPermissionAssignmentAPI(ctx, c)
@@ -188,7 +284,7 @@ func ResourcePermissionAssignment() common.Resource {
188284
if err != nil {
189285
return err
190286
}
191-
list, err := NewPermissionAssignmentAPI(ctx, c).List()
287+
list, err := globalPermAssignmentCache.list(NewPermissionAssignmentAPI(ctx, c))
192288
if err != nil {
193289
return err
194290
}
@@ -203,6 +299,7 @@ func ResourcePermissionAssignment() common.Resource {
203299
if err != nil {
204300
return err
205301
}
302+
defer globalPermAssignmentCache.invalidate(c.Config.Host)
206303
var assignment permissionAssignmentEntity
207304
common.DataToStructPointer(d, s, &assignment)
208305
api := NewPermissionAssignmentAPI(ctx, c)
@@ -214,6 +311,7 @@ func ResourcePermissionAssignment() common.Resource {
214311
if err != nil {
215312
return err
216313
}
314+
defer globalPermAssignmentCache.invalidate(c.Config.Host)
217315
return NewPermissionAssignmentAPI(ctx, c).Remove(d.Id())
218316
},
219317
}

mws/resource_mws_permission_assignment.go

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,107 @@ package mws
33
import (
44
"context"
55
"fmt"
6+
"sync"
7+
8+
"golang.org/x/sync/singleflight"
69

710
"github.com/databricks/databricks-sdk-go/apierr"
811
"github.com/databricks/databricks-sdk-go/service/iam"
912
"github.com/databricks/terraform-provider-databricks/common"
1013
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
1114
)
1215

16+
// workspaceAssignmentsEntry holds the cached ListByWorkspaceId result for one workspace.
17+
type workspaceAssignmentsEntry struct {
18+
mu sync.RWMutex
19+
initialized bool
20+
list *iam.PermissionAssignments
21+
// sg deduplicates concurrent in-flight API calls when the cache is cold.
22+
// All goroutines that arrive while a fetch is in-flight join the same call
23+
// and are woken simultaneously when it completes, rather than serialising
24+
// through a write lock.
25+
sg singleflight.Group
26+
}
27+
28+
// workspaceAssignmentsCache caches ListByWorkspaceId results per workspace ID so
29+
// that N databricks_mws_permission_assignment resources sharing the same
30+
// workspace_id only issue a single API call during a terraform plan/apply cycle.
31+
type workspaceAssignmentsCache struct {
32+
mu sync.Mutex
33+
cache map[int64]*workspaceAssignmentsEntry
34+
}
35+
36+
func newWorkspaceAssignmentsCache() *workspaceAssignmentsCache {
37+
return &workspaceAssignmentsCache{
38+
cache: make(map[int64]*workspaceAssignmentsEntry),
39+
}
40+
}
41+
42+
func (c *workspaceAssignmentsCache) getOrCreate(workspaceId int64) *workspaceAssignmentsEntry {
43+
c.mu.Lock()
44+
defer c.mu.Unlock()
45+
if entry, ok := c.cache[workspaceId]; ok {
46+
return entry
47+
}
48+
entry := &workspaceAssignmentsEntry{}
49+
c.cache[workspaceId] = entry
50+
return entry
51+
}
52+
53+
func (c *workspaceAssignmentsCache) list(ctx context.Context, api iam.WorkspaceAssignmentInterface, workspaceId int64) (*iam.PermissionAssignments, error) {
54+
entry := c.getOrCreate(workspaceId)
55+
56+
// Fast path: warm cache. Many goroutines can hold a read-lock simultaneously.
57+
entry.mu.RLock()
58+
if entry.initialized {
59+
l := entry.list
60+
entry.mu.RUnlock()
61+
return l, nil
62+
}
63+
entry.mu.RUnlock()
64+
65+
// Slow path: cache is cold. Use singleflight so exactly one API call is made
66+
// regardless of how many goroutines arrive concurrently; all share the result.
67+
v, err, _ := entry.sg.Do("fetch", func() (interface{}, error) {
68+
// Double-check now that we are the singleflight leader.
69+
entry.mu.RLock()
70+
if entry.initialized {
71+
l := entry.list
72+
entry.mu.RUnlock()
73+
return l, nil
74+
}
75+
entry.mu.RUnlock()
76+
77+
list, err := api.ListByWorkspaceId(ctx, workspaceId)
78+
if err != nil {
79+
return nil, err
80+
}
81+
entry.mu.Lock()
82+
entry.list = list
83+
entry.initialized = true
84+
entry.mu.Unlock()
85+
return list, nil
86+
})
87+
if err != nil {
88+
return nil, err
89+
}
90+
return v.(*iam.PermissionAssignments), nil
91+
}
92+
93+
func (c *workspaceAssignmentsCache) invalidate(workspaceId int64) {
94+
c.mu.Lock()
95+
entry, ok := c.cache[workspaceId]
96+
c.mu.Unlock()
97+
if ok {
98+
entry.mu.Lock()
99+
entry.initialized = false
100+
entry.list = nil
101+
entry.mu.Unlock()
102+
}
103+
}
104+
105+
var globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
106+
13107
func getPermissionsByPrincipal(list iam.PermissionAssignments, principalId int64) (res iam.UpdateWorkspaceAssignments, err error) {
14108
for _, v := range list.PermissionAssignments {
15109
if v.Principal.PrincipalId != principalId {
@@ -56,6 +150,7 @@ func ResourceMwsPermissionAssignment() common.Resource {
56150
if err != nil {
57151
return err
58152
}
153+
globalWorkspaceAssignmentsCache.invalidate(assignment.WorkspaceId)
59154
pair.Pack(d)
60155
return nil
61156
},
@@ -68,7 +163,7 @@ func ResourceMwsPermissionAssignment() common.Resource {
68163
if err != nil {
69164
return fmt.Errorf("parse id: %w", err)
70165
}
71-
list, err := acc.WorkspaceAssignment.ListByWorkspaceId(ctx, common.MustInt64(workspaceId))
166+
list, err := globalWorkspaceAssignmentsCache.list(ctx, acc.WorkspaceAssignment, common.MustInt64(workspaceId))
72167
if err != nil {
73168
return err
74169
}
@@ -89,7 +184,9 @@ func ResourceMwsPermissionAssignment() common.Resource {
89184
if err != nil {
90185
return fmt.Errorf("parse id: %w", err)
91186
}
92-
return acc.WorkspaceAssignment.DeleteByWorkspaceIdAndPrincipalId(ctx, common.MustInt64(workspaceId), common.MustInt64(principalId))
187+
err = acc.WorkspaceAssignment.DeleteByWorkspaceIdAndPrincipalId(ctx, common.MustInt64(workspaceId), common.MustInt64(principalId))
188+
globalWorkspaceAssignmentsCache.invalidate(common.MustInt64(workspaceId))
189+
return err
93190
},
94191
}
95192
}

mws/resource_mws_permission_assignment_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
)
1111

1212
func TestPermissionAssignmentCreate(t *testing.T) {
13+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
1314
qa.ResourceFixture{
1415
MockAccountClientFunc: func(m *mocks.MockAccountClient) {
1516
e := m.GetMockWorkspaceAssignmentAPI().EXPECT()
@@ -46,6 +47,7 @@ func TestPermissionAssignmentCreate(t *testing.T) {
4647
}
4748

4849
func TestPermissionAssignmentRead(t *testing.T) {
50+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
4951
qa.ResourceFixture{
5052
MockAccountClientFunc: func(m *mocks.MockAccountClient) {
5153
e := m.GetMockWorkspaceAssignmentAPI().EXPECT()
@@ -79,6 +81,7 @@ func TestPermissionAssignmentRead(t *testing.T) {
7981
}
8082

8183
func TestPermissionAssignmentReadNotFound(t *testing.T) {
84+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
8285
qa.ResourceFixture{
8386
MockAccountClientFunc: func(m *mocks.MockAccountClient) {
8487
e := m.GetMockWorkspaceAssignmentAPI().EXPECT()
@@ -102,6 +105,7 @@ func TestPermissionAssignmentReadNotFound(t *testing.T) {
102105
}
103106

104107
func TestPermissionAssignmentDelete(t *testing.T) {
108+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
105109
qa.ResourceFixture{
106110
MockAccountClientFunc: func(m *mocks.MockAccountClient) {
107111
e := m.GetMockWorkspaceAssignmentAPI().EXPECT()
@@ -115,19 +119,22 @@ func TestPermissionAssignmentDelete(t *testing.T) {
115119
}
116120

117121
func TestPermissionAssignmentFuzz_NoAccountID(t *testing.T) {
122+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
118123
qa.ResourceCornerCases(t, ResourceMwsPermissionAssignment(),
119124
qa.CornerCaseID("123|456"),
120125
qa.CornerCaseExpectError("invalid Databricks Account configuration"))
121126
}
122127

123128
func TestPermissionAssignmentFuzz_InvalidID(t *testing.T) {
129+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
124130
qa.ResourceCornerCases(t, ResourceMwsPermissionAssignment(),
125131
qa.CornerCaseExpectError("parse id: invalid ID: x"),
126132
qa.CornerCaseSkipCRUD("create"),
127133
qa.CornerCaseAccountID("abc"))
128134
}
129135

130136
func TestPermissionAssignmentFuzz_ApiErrors(t *testing.T) {
137+
globalWorkspaceAssignmentsCache = newWorkspaceAssignmentsCache()
131138
qa.ResourceCornerCases(t, ResourceMwsPermissionAssignment(),
132139
qa.CornerCaseAccountID("abc"),
133140
qa.CornerCaseID("123|456"))

scim/groups.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"net/http"
7+
"strconv"
78

89
"github.com/databricks/terraform-provider-databricks/common"
910
)
@@ -55,6 +56,32 @@ func (a GroupsAPI) Filter(filter string, attributes string) (GroupList, error) {
5556
return groups, err
5657
}
5758

59+
// ListAll retrieves all groups with the given attributes, handling SCIM pagination.
60+
func (a GroupsAPI) ListAll(attributes string) ([]Group, error) {
61+
startIndex := 1
62+
var result []Group
63+
for {
64+
req := map[string]string{
65+
"count": "10000",
66+
"startIndex": strconv.Itoa(startIndex),
67+
}
68+
if attributes != "" {
69+
req["attributes"] = attributes
70+
}
71+
var page GroupList
72+
err := a.client.Scim(a.context, http.MethodGet, "/preview/scim/v2/Groups", req, &page, a.ApiLevel)
73+
if err != nil {
74+
return nil, err
75+
}
76+
result = append(result, page.Resources...)
77+
if len(page.Resources) == 0 || int32(len(result)) >= page.TotalResults {
78+
break
79+
}
80+
startIndex += len(page.Resources)
81+
}
82+
return result, nil
83+
}
84+
5885
func (a GroupsAPI) ReadByDisplayName(displayName, attributes string) (group Group, err error) {
5986
groupList, err := a.Filter(fmt.Sprintf(`displayName eq "%s"`, displayName), attributes)
6087
if err != nil {

0 commit comments

Comments
 (0)