Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions pkg/services/authz/rbac.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ func ProvideAuthZClient(
return ctx, nil
}))
authzv1.RegisterAuthzServiceServer(channel, server)
rbacClient := newRBACClient(channel, tracer)
rbacClient := authzlib.NewClient(
channel,
authzlib.WithCacheClientOption(&NoopCache{}),
authzlib.WithTracerClientOption(tracer),
)

if features.IsEnabledGlobally(featuremgmt.FlagZanzana) {
return zanzana.WithShadowClient(rbacClient, zanzanaClient, reg)
Expand Down Expand Up @@ -153,18 +157,16 @@ func newRemoteRBACClient(clientCfg *authzClientSettings, tracer trace.Tracer) (a
return nil, fmt.Errorf("failed to create authz client to remote server: %w", err)
}

return newRBACClient(conn, tracer), nil
}

func newRBACClient(conn grpc.ClientConnInterface, tracer trace.Tracer) authlib.AccessClient {
return authzlib.NewClient(
client := authzlib.NewClient(
conn,
authzlib.WithCacheClientOption(cache.NewLocalCache(cache.Config{
Expiry: 30 * time.Second,
CleanupInterval: 2 * time.Minute,
})),
authzlib.WithTracerClientOption(tracer),
)

return client, nil
}

func RegisterRBACAuthZService(
Expand Down Expand Up @@ -233,3 +235,17 @@ func (t tokenExhangeRoundTripper) RoundTrip(r *http.Request) (*http.Response, er
r.Header.Set("X-Access-Token", "Bearer "+res.Token)
return t.rt.RoundTrip(r)
}

type NoopCache struct{}

func (lc *NoopCache) Get(ctx context.Context, key string) ([]byte, error) {
return nil, cache.ErrNotFound
}

func (lc *NoopCache) Set(ctx context.Context, key string, data []byte, exp time.Duration) error {
return nil
}

func (lc *NoopCache) Delete(ctx context.Context, key string) error {
return nil
}
4 changes: 4 additions & 0 deletions pkg/services/authz/rbac/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ func userPermCacheKey(namespace, userUID, action string) string {
return namespace + ".perm_" + userUID + "_" + action
}

func userPermDenialCacheKey(namespace, userUID, action, name, parent string) string {
return namespace + ".perm_" + userUID + "_" + action + "_" + name + "_" + parent
}

func userBasicRoleCacheKey(namespace, userUID string) string {
return namespace + ".basic_role_" + userUID
}
Expand Down
93 changes: 74 additions & 19 deletions pkg/services/authz/rbac/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ type Service struct {
sf *singleflight.Group

// Cache for user permissions, user team memberships and user basic roles
idCache *cacheWrap[store.UserIdentifiers]
permCache *cacheWrap[map[string]bool]
teamCache *cacheWrap[[]int64]
basicRoleCache *cacheWrap[store.BasicRole]
folderCache *cacheWrap[folderTree]
idCache *cacheWrap[store.UserIdentifiers]
permCache *cacheWrap[map[string]bool]
permDenialCache *cacheWrap[bool]
teamCache *cacheWrap[[]int64]
basicRoleCache *cacheWrap[store.BasicRole]
folderCache *cacheWrap[folderTree]
}

func NewService(
Expand All @@ -81,6 +82,7 @@ func NewService(
mapper: newMapper(),
idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL),
permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL),
permDenialCache: newCacheWrap[bool](cache, logger, shortCacheTTL),
teamCache: newCacheWrap[[]int64](cache, logger, shortCacheTTL),
basicRoleCache: newCacheWrap[store.BasicRole](cache, logger, shortCacheTTL),
folderCache: newCacheWrap[folderTree](cache, logger, shortCacheTTL),
Expand Down Expand Up @@ -111,6 +113,29 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv
attribute.String("folder", checkReq.ParentFolder),
)

permDenialKey := userPermDenialCacheKey(checkReq.Namespace.Value, checkReq.UserUID, checkReq.Action, checkReq.Name, checkReq.ParentFolder)
if _, ok := s.permDenialCache.Get(ctx, permDenialKey); ok {
s.metrics.permissionCacheUsage.WithLabelValues("true", checkReq.Action).Inc()
s.metrics.requestCount.WithLabelValues("false", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return &authzv1.CheckResponse{Allowed: false}, nil
}

cachedPerms, err := s.getCachedIdentityPermissions(ctx, checkReq.Namespace, checkReq.IdentityType, checkReq.UserUID, checkReq.Action)
if err == nil {
allowed, err := s.checkPermission(ctx, cachedPerms, checkReq)
if err != nil {
ctxLogger.Error("could not check permission", "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return deny, err
}
if allowed {
s.metrics.permissionCacheUsage.WithLabelValues("true", checkReq.Action).Inc()
s.metrics.requestCount.WithLabelValues("false", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return &authzv1.CheckResponse{Allowed: allowed}, nil
}
}
s.metrics.permissionCacheUsage.WithLabelValues("false", checkReq.Action).Inc()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This increments permissionCacheUsage with "false" even when getCachedIdentityPermissions was a cache hit but allowed == false, which misclassifies a cache hit as a miss. Consider counting a cache hit whenever err == nil (regardless of allow/deny), or only incrementing the "false" metric when the cache lookup actually missed.

🤖 Was this useful? React with 👍 or 👎


permissions, err := s.getIdentityPermissions(ctx, checkReq.Namespace, checkReq.IdentityType, checkReq.UserUID, checkReq.Action)
if err != nil {
ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err)
Expand All @@ -125,6 +150,10 @@ func (s *Service) Check(ctx context.Context, req *authzv1.CheckRequest) (*authzv
return deny, err
}

if !allowed {
s.permDenialCache.Set(ctx, permDenialKey, true)
}

s.metrics.requestCount.WithLabelValues("false", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return &authzv1.CheckResponse{Allowed: allowed}, nil
}
Expand All @@ -148,11 +177,18 @@ func (s *Service) List(ctx context.Context, req *authzv1.ListRequest) (*authzv1.
attribute.String("action", listReq.Action),
)

permissions, err := s.getIdentityPermissions(ctx, listReq.Namespace, listReq.IdentityType, listReq.UserUID, listReq.Action)
if err != nil {
ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return nil, err
permissions, err := s.getCachedIdentityPermissions(ctx, listReq.Namespace, listReq.IdentityType, listReq.UserUID, listReq.Action)
if err == nil {
s.metrics.permissionCacheUsage.WithLabelValues("true", listReq.Action).Inc()
} else {
s.metrics.permissionCacheUsage.WithLabelValues("false", listReq.Action).Inc()

permissions, err = s.getIdentityPermissions(ctx, listReq.Namespace, listReq.IdentityType, listReq.UserUID, listReq.Action)
if err != nil {
ctxLogger.Error("could not get user permissions", "subject", req.GetSubject(), "error", err)
s.metrics.requestCount.WithLabelValues("true", "true", req.GetVerb(), req.GetGroup(), req.GetResource()).Inc()
return nil, err
}
}

resp, err := s.listPermission(ctx, permissions, listReq)
Expand Down Expand Up @@ -303,6 +339,34 @@ func (s *Service) getIdentityPermissions(ctx context.Context, ns types.Namespace
}
}

func (s *Service) getCachedIdentityPermissions(ctx context.Context, ns types.NamespaceInfo, idType types.IdentityType, userID, action string) (map[string]bool, error) {
ctx, span := s.tracer.Start(ctx, "authz_direct_db.service.getCachedIdentityPermissions")
defer span.End()

switch idType {
case types.TypeAnonymous:
anonPermKey := anonymousPermCacheKey(ns.Value, action)
if cached, ok := s.permCache.Get(ctx, anonPermKey); ok {
return cached, nil
}
return nil, cache.ErrNotFound
case types.TypeRenderService:
return nil, cache.ErrNotFound
case types.TypeUser, types.TypeServiceAccount:
userIdentifiers, err := s.GetUserIdentifiers(ctx, ns, userID)
if err != nil {
return nil, err
}
userPermKey := userPermCacheKey(ns.Value, userIdentifiers.UID, action)
if cached, ok := s.permCache.Get(ctx, userPermKey); ok {
return cached, nil
}
return nil, cache.ErrNotFound
default:
return nil, fmt.Errorf("unsupported identity type: %s", idType)
}
}

func (s *Service) getUserPermissions(ctx context.Context, ns types.NamespaceInfo, userID, action string, actionSets []string) (map[string]bool, error) {
ctx, span := s.tracer.Start(ctx, "authz_direct_db.service.getUserPermissions")
defer span.End()
Expand All @@ -313,12 +377,6 @@ func (s *Service) getUserPermissions(ctx context.Context, ns types.NamespaceInfo
}

userPermKey := userPermCacheKey(ns.Value, userIdentifiers.UID, action)
if cached, ok := s.permCache.Get(ctx, userPermKey); ok {
s.metrics.permissionCacheUsage.WithLabelValues("true", action).Inc()
return cached, nil
}
s.metrics.permissionCacheUsage.WithLabelValues("false", action).Inc()

res, err, _ := s.sf.Do(userPermKey+"_getUserPermissions", func() (interface{}, error) {
basicRoles, err := s.getUserBasicRole(ctx, ns, userIdentifiers)
if err != nil {
Expand Down Expand Up @@ -363,9 +421,6 @@ func (s *Service) getAnonymousPermissions(ctx context.Context, ns types.Namespac
defer span.End()

anonPermKey := anonymousPermCacheKey(ns.Value, action)
if cached, ok := s.permCache.Get(ctx, anonPermKey); ok {
return cached, nil
}
res, err, _ := s.sf.Do(anonPermKey+"_getAnonymousPermissions", func() (interface{}, error) {
permissions, err := s.permissionStore.GetUserPermissions(ctx, ns, store.PermissionsQuery{Action: action, ActionSets: actionSets, Role: "Viewer"})
if err != nil {
Expand Down
148 changes: 140 additions & 8 deletions pkg/services/authz/rbac/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,14 +339,6 @@ func TestService_getUserPermissions(t *testing.T) {
}

testCases := []testCase{
{
name: "should return permissions from cache if available",
permissions: []accesscontrol.Permission{
{Action: "dashboards:read", Scope: "dashboards:uid:some_dashboard"},
},
cacheHit: true,
expectedPerms: map[string]bool{"dashboards:uid:some_dashboard": true},
},
{
name: "should return permissions from store if not in cache",
permissions: []accesscontrol.Permission{
Expand Down Expand Up @@ -898,6 +890,111 @@ func TestService_Check(t *testing.T) {
})
}

func TestService_CacheCheck(t *testing.T) {
callingService := authn.NewAccessTokenAuthInfo(authn.Claims[authn.AccessTokenClaims]{
Claims: jwt.Claims{
Subject: types.NewTypeID(types.TypeAccessPolicy, "some-service"),
Audience: []string{"authzservice"},
},
Rest: authn.AccessTokenClaims{Namespace: "org-12"},
})

ctx := types.WithAuthInfo(context.Background(), callingService)
userID := &store.UserIdentifiers{UID: "test-uid", ID: 1}

t.Run("Allow based on cached permissions", func(t *testing.T) {
s := setupService()

s.idCache.Set(ctx, userIdentifierCacheKey("org-12", "test-uid"), *userID)
s.permCache.Set(ctx, userPermCacheKey("org-12", "test-uid", "dashboards:read"), map[string]bool{"dashboards:uid:dash1": true})

resp, err := s.Check(ctx, &authzv1.CheckRequest{
Namespace: "org-12",
Subject: "user:test-uid",
Group: "dashboard.grafana.app",
Resource: "dashboards",
Verb: "get",
Name: "dash1",
})
require.NoError(t, err)
assert.True(t, resp.Allowed)
})
t.Run("Fallback to the database on cache miss", func(t *testing.T) {
s := setupService()

// Populate database permission but not the cache
store := &fakeStore{
userID: userID,
userPermissions: []accesscontrol.Permission{{Action: "dashboards:read", Scope: "dashboards:uid:dash2"}},
}

s.store = store
s.permissionStore = store

s.idCache.Set(ctx, userIdentifierCacheKey("org-12", "test-uid"), *userID)

resp, err := s.Check(ctx, &authzv1.CheckRequest{
Namespace: "org-12",
Subject: "user:test-uid",
Group: "dashboard.grafana.app",
Resource: "dashboards",
Verb: "get",
Name: "dash2",
})
require.NoError(t, err)
assert.True(t, resp.Allowed)
})
t.Run("Fallback to the database on outdated cache", func(t *testing.T) {
s := setupService()

store := &fakeStore{
userID: userID,
userPermissions: []accesscontrol.Permission{{Action: "dashboards:read", Scope: "dashboards:uid:dash2"}},
}

s.store = store
s.permissionStore = store

s.idCache.Set(ctx, userIdentifierCacheKey("org-12", "test-uid"), *userID)
// The cache does not have the permission for dash2 (outdated)
s.permCache.Set(ctx, userPermCacheKey("org-12", "test-uid", "dashboards:read"), map[string]bool{"dashboards:uid:dash1": true})

resp, err := s.Check(ctx, &authzv1.CheckRequest{
Namespace: "org-12",
Subject: "user:test-uid",
Group: "dashboard.grafana.app",
Resource: "dashboards",
Verb: "get",
Name: "dash2",
})
require.NoError(t, err)
assert.True(t, resp.Allowed)
})
t.Run("Should deny on explicit cache deny entry", func(t *testing.T) {
s := setupService()

s.idCache.Set(ctx, userIdentifierCacheKey("org-12", "test-uid"), *userID)

// Explicitly deny access to the dashboard
s.permDenialCache.Set(ctx, userPermDenialCacheKey("org-12", "test-uid", "dashboards:read", "dash1", "fold1"), true)

// Allow access to the dashboard to prove this is not checked
s.permCache.Set(ctx, userPermCacheKey("org-12", "test-uid", "dashboards:read"), map[string]bool{"dashboards:uid:dash1": false})

resp, err := s.Check(ctx, &authzv1.CheckRequest{
Namespace: "org-12",
Subject: "user:test-uid",
Group: "dashboard.grafana.app",
Resource: "dashboards",
Verb: "get",
Name: "dash1",
Folder: "fold1",
})
require.NoError(t, err)
assert.False(t, resp.Allowed)
})
}

func TestService_List(t *testing.T) {
callingService := authn.NewAccessTokenAuthInfo(authn.Claims[authn.AccessTokenClaims]{
Claims: jwt.Claims{
Expand Down Expand Up @@ -1191,6 +1288,40 @@ func TestService_List(t *testing.T) {
})
}

func TestService_CacheList(t *testing.T) {
callingService := authn.NewAccessTokenAuthInfo(authn.Claims[authn.AccessTokenClaims]{
Claims: jwt.Claims{
Subject: types.NewTypeID(types.TypeAccessPolicy, "some-service"),
Audience: []string{"authzservice"},
},
Rest: authn.AccessTokenClaims{Namespace: "org-12"},
})

t.Run("List based on cached permissions", func(t *testing.T) {
s := setupService()
ctx := types.WithAuthInfo(context.Background(), callingService)
userID := &store.UserIdentifiers{UID: "test-uid", ID: 1}
s.idCache.Set(ctx, userIdentifierCacheKey("org-12", "test-uid"), *userID)
s.permCache.Set(ctx,
userPermCacheKey("org-12", "test-uid", "dashboards:read"),
map[string]bool{"dashboards:uid:dash1": true, "dashboards:uid:dash2": true, "folders:uid:fold1": true},
)
s.identityStore = &fakeIdentityStore{}

resp, err := s.List(ctx, &authzv1.ListRequest{
Namespace: "org-12",
Subject: "user:test-uid",
Group: "dashboard.grafana.app",
Resource: "dashboards",
Verb: "list",
})

require.NoError(t, err)
require.ElementsMatch(t, resp.Items, []string{"dash1", "dash2"})
require.ElementsMatch(t, resp.Folders, []string{"fold1"})
})
}

func setupService() *Service {
cache := cache.NewLocalCache(cache.Config{Expiry: 5 * time.Minute, CleanupInterval: 5 * time.Minute})
logger := log.New("authz-rbac-service")
Expand All @@ -1202,6 +1333,7 @@ func setupService() *Service {
metrics: newMetrics(nil),
idCache: newCacheWrap[store.UserIdentifiers](cache, logger, longCacheTTL),
permCache: newCacheWrap[map[string]bool](cache, logger, shortCacheTTL),
permDenialCache: newCacheWrap[bool](cache, logger, shortCacheTTL),
teamCache: newCacheWrap[[]int64](cache, logger, shortCacheTTL),
basicRoleCache: newCacheWrap[store.BasicRole](cache, logger, longCacheTTL),
folderCache: newCacheWrap[folderTree](cache, logger, shortCacheTTL),
Expand Down
Loading