Skip to content

Commit 68d537b

Browse files
authored
Move rules deletion from Purger to Ruler API. (#3899)
Signed-off-by: Peter Štibraný <[email protected]>
1 parent a0c89bd commit 68d537b

File tree

13 files changed

+152
-204
lines changed

13 files changed

+152
-204
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
* `cortex_ha_tracker_replicas_cleanup_marked_for_deletion_total`
7373
* `cortex_ha_tracker_replicas_cleanup_deleted_total`
7474
* `cortex_ha_tracker_replicas_cleanup_delete_failed_total`
75-
* [ENHANCEMENT] Tenant deletion endpoints now support deletion of ruler groups. This only works when using rule store that supports deletion. #3750
75+
* [ENHANCEMENT] Ruler now has new API endpoint `/ruler/delete_tenant_config` that can be used to delete all ruler groups for tenant. It is intended to be used by administrators who wish to clean up state after removed user. Note that this endpoint is enabled regardless of `-experimental.ruler.enable-api`. #3750 #3899
7676
* [ENHANCEMENT] Query-frontend, query-scheduler: cleanup metrics for inactive tenants. #3826
7777
* [ENHANCEMENT] Distributor: Prevent failed ingestion from affecting rate limiting. #3825
7878
* [ENHANCEMENT] Blocks storage: added `-blocks-storage.s3.region` support to S3 client configuration. #3811

docs/api/_index.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ For the sake of clarity, in this document we have grouped API endpoints by servi
4949
| [Set rule group](#set-rule-group) | Ruler | `POST /api/v1/rules/{namespace}` |
5050
| [Delete rule group](#delete-rule-group) | Ruler | `DELETE /api/v1/rules/{namespace}/{groupName}` |
5151
| [Delete namespace](#delete-namespace) | Ruler | `DELETE /api/v1/rules/{namespace}` |
52+
| [Delete tenant configuration](#delete-tenant-configuration) | Ruler | `POST /ruler/delete_tenant_config` |
5253
| [Alertmanager status](#alertmanager-status) | Alertmanager | `GET /multitenant_alertmanager/status` |
5354
| [Alertmanager ring status](#alertmanager-ring-status) | Alertmanager | `GET /multitenant_alertmanager/ring` |
5455
| [Alertmanager UI](#alertmanager-ui) | Alertmanager | `GET /<alertmanager-http-prefix>` |
@@ -636,6 +637,18 @@ _This experimental endpoint is disabled by default and can be enabled via the `-
636637

637638
_Requires [authentication](#authentication)._
638639

640+
### Delete tenant configuration
641+
642+
```
643+
POST /ruler/delete_tenant_config
644+
```
645+
646+
This deletes all rule groups for tenant, and returns `200` on success. Calling endpoint when no rule groups exist for user returns `200`. Authentication is only to identify the tenant.
647+
648+
This is intended as internal API, and not to be exposed to users. This endpoint is enabled regardless of whether `-experimental.ruler.enable-api` is enabled or not.
649+
650+
_Requires [authentication](#authentication)._
651+
639652
## Alertmanager
640653

641654
### Alertmanager status

pkg/api/api.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,9 @@ func (a *API) RegisterRuler(r *ruler.Ruler) {
276276
a.indexPage.AddLink(SectionAdminEndpoints, "/ruler/ring", "Ruler Ring Status")
277277
a.RegisterRoute("/ruler/ring", r, false, "GET", "POST")
278278

279+
// Administrative API, uses authentication to inform which user's configuration to delete.
280+
a.RegisterRoute("/ruler/delete_tenant_config", http.HandlerFunc(r.DeleteTenantConfiguration), true, "POST")
281+
279282
// Legacy Ring Route
280283
a.RegisterRoute("/ruler_ring", r, false, "GET", "POST")
281284

pkg/chunk/purger/tenant_deletion_api.go

Lines changed: 8 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"github.com/prometheus/client_golang/prometheus"
1414
"github.com/thanos-io/thanos/pkg/objstore"
1515

16-
"github.com/cortexproject/cortex/pkg/ruler/rulestore"
1716
"github.com/cortexproject/cortex/pkg/storage/bucket"
1817
cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
1918
"github.com/cortexproject/cortex/pkg/tenant"
@@ -22,24 +21,22 @@ import (
2221

2322
type TenantDeletionAPI struct {
2423
bucketClient objstore.Bucket
25-
ruleStore rulestore.RuleStore
2624
logger log.Logger
2725
cfgProvider bucket.TenantConfigProvider
2826
}
2927

30-
func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
28+
func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
3129
bucketClient, err := createBucketClient(storageCfg, logger, reg)
3230
if err != nil {
3331
return nil, err
3432
}
3533

36-
return newTenantDeletionAPI(bucketClient, cfgProvider, ruleStore, logger), nil
34+
return newTenantDeletionAPI(bucketClient, cfgProvider, logger), nil
3735
}
3836

39-
func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger) *TenantDeletionAPI {
37+
func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, logger log.Logger) *TenantDeletionAPI {
4038
return &TenantDeletionAPI{
4139
bucketClient: bkt,
42-
ruleStore: ruleStore,
4340
cfgProvider: cfgProvider,
4441
logger: logger,
4542
}
@@ -49,7 +46,9 @@ func (api *TenantDeletionAPI) DeleteTenant(w http.ResponseWriter, r *http.Reques
4946
ctx := r.Context()
5047
userID, err := tenant.TenantID(ctx)
5148
if err != nil {
52-
http.Error(w, err.Error(), http.StatusBadRequest)
49+
// When Cortex is running, it uses Auth Middleware for checking X-Scope-OrgID and injecting tenant into context.
50+
// Auth Middleware sends http.StatusUnauthorized if X-Scope-OrgID is missing, so we do too here, for consistency.
51+
http.Error(w, err.Error(), http.StatusUnauthorized)
5352
return
5453
}
5554

@@ -63,38 +62,12 @@ func (api *TenantDeletionAPI) DeleteTenant(w http.ResponseWriter, r *http.Reques
6362

6463
level.Info(api.logger).Log("msg", "tenant deletion mark in blocks storage created", "user", userID)
6564

66-
if api.ruleStore != nil {
67-
err := api.deleteRules(r.Context(), userID)
68-
if err != nil {
69-
level.Error(api.logger).Log("msg", "failed to delete tenant rule groups", "user", userID, "err", err)
70-
http.Error(w, errors.Wrapf(err, "failed to delete tenant rule groups").Error(), http.StatusInternalServerError)
71-
return
72-
}
73-
}
74-
7565
w.WriteHeader(http.StatusOK)
7666
}
7767

78-
func (api *TenantDeletionAPI) deleteRules(ctx context.Context, userID string) error {
79-
if !api.ruleStore.SupportsModifications() {
80-
level.Warn(api.logger).Log("msg", "cannot delete tenant rule groups, using read-only rule store", "user", userID)
81-
return nil
82-
}
83-
84-
err := api.ruleStore.DeleteNamespace(ctx, userID, "") // Empty namespace = delete all rule groups.
85-
if err != nil && !errors.Is(err, rulestore.ErrGroupNamespaceNotFound) {
86-
return err
87-
}
88-
89-
level.Info(api.logger).Log("msg", "deleted all tenant rule groups", "user", userID)
90-
return nil
91-
}
92-
9368
type DeleteTenantStatusResponse struct {
94-
TenantID string `json:"tenant_id"`
95-
BlocksDeleted bool `json:"blocks_deleted"`
96-
RuleGroupsDeleted bool `json:"rule_groups_deleted"`
97-
AlertManagerConfigDeleted bool `json:"alert_manager_config_deleted,omitempty"` // Not yet supported.
69+
TenantID string `json:"tenant_id"`
70+
BlocksDeleted bool `json:"blocks_deleted"`
9871
}
9972

10073
func (api *TenantDeletionAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {
@@ -113,29 +86,9 @@ func (api *TenantDeletionAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.
11386
return
11487
}
11588

116-
result.RuleGroupsDeleted, err = api.isRulesForUserDeleted(ctx, userID)
117-
if err != nil {
118-
http.Error(w, err.Error(), http.StatusInternalServerError)
119-
return
120-
}
121-
12289
util.WriteJSONResponse(w, result)
12390
}
12491

125-
func (api *TenantDeletionAPI) isRulesForUserDeleted(ctx context.Context, userID string) (bool, error) {
126-
if api.ruleStore == nil {
127-
// If API doesn't have access to rule store, then we cannot say that rules have been deleted.
128-
return false, nil
129-
}
130-
131-
list, err := api.ruleStore.ListRuleGroupsForUserAndNamespace(ctx, userID, "")
132-
if err != nil {
133-
return false, errors.Wrap(err, "failed to list rule groups for tenant")
134-
}
135-
136-
return len(list) == 0, nil
137-
}
138-
13992
func (api *TenantDeletionAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
14093
var errBlockFound = errors.New("block found")
14194

pkg/chunk/purger/tenant_deletion_api_test.go

Lines changed: 3 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,27 @@ package purger
33
import (
44
"bytes"
55
"context"
6-
"encoding/json"
76
"net/http"
87
"net/http/httptest"
98
"path"
109
"testing"
1110

1211
"github.com/go-kit/kit/log"
13-
"github.com/prometheus/prometheus/pkg/rulefmt"
1412
"github.com/stretchr/testify/require"
1513
"github.com/thanos-io/thanos/pkg/objstore"
1614
"github.com/weaveworks/common/user"
1715

18-
"github.com/cortexproject/cortex/pkg/chunk"
19-
"github.com/cortexproject/cortex/pkg/ruler/rulespb"
20-
"github.com/cortexproject/cortex/pkg/ruler/rulestore"
21-
"github.com/cortexproject/cortex/pkg/ruler/rulestore/objectclient"
2216
"github.com/cortexproject/cortex/pkg/storage/tsdb"
2317
)
2418

2519
func TestDeleteTenant(t *testing.T) {
2620
bkt := objstore.NewInMemBucket()
27-
api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
21+
api := newTenantDeletionAPI(bkt, nil, log.NewNopLogger())
2822

2923
{
3024
resp := httptest.NewRecorder()
3125
api.DeleteTenant(resp, &http.Request{})
32-
require.Equal(t, http.StatusBadRequest, resp.Code)
26+
require.Equal(t, http.StatusUnauthorized, resp.Code)
3327
}
3428

3529
{
@@ -86,122 +80,11 @@ func TestDeleteTenantStatus(t *testing.T) {
8680
require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
8781
}
8882

89-
api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
83+
api := newTenantDeletionAPI(bkt, nil, log.NewNopLogger())
9084

9185
res, err := api.isBlocksForUserDeleted(context.Background(), username)
9286
require.NoError(t, err)
9387
require.Equal(t, tc.expectedBlocksDeleted, res)
9488
})
9589
}
9690
}
97-
98-
func TestDeleteTenantRuleGroups(t *testing.T) {
99-
ruleGroups := []ruleGroupKey{
100-
{user: "userA", namespace: "namespace", group: "group"},
101-
{user: "userB", namespace: "namespace1", group: "group"},
102-
{user: "userB", namespace: "namespace2", group: "group"},
103-
}
104-
105-
obj, rs := setupRuleGroupsStore(t, ruleGroups)
106-
require.Equal(t, 3, obj.GetObjectCount())
107-
108-
api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
109-
110-
{
111-
callDeleteTenantAPI(t, api, "user-with-no-rule-groups")
112-
require.Equal(t, 3, obj.GetObjectCount())
113-
114-
verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
115-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false)
116-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
117-
}
118-
119-
{
120-
callDeleteTenantAPI(t, api, "userA")
121-
require.Equal(t, 2, obj.GetObjectCount())
122-
123-
verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
124-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true) // Just deleted.
125-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
126-
}
127-
128-
{
129-
callDeleteTenantAPI(t, api, "userB")
130-
require.Equal(t, 0, obj.GetObjectCount())
131-
132-
verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
133-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true) // Deleted previously
134-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", true) // Just deleted
135-
}
136-
}
137-
138-
func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
139-
ruleGroups := []ruleGroupKey{
140-
{user: "userA", namespace: "namespace", group: "group"},
141-
{user: "userB", namespace: "namespace1", group: "group"},
142-
{user: "userB", namespace: "namespace2", group: "group"},
143-
}
144-
145-
obj, rs := setupRuleGroupsStore(t, ruleGroups)
146-
require.Equal(t, 3, obj.GetObjectCount())
147-
148-
rs = &readOnlyRuleStore{RuleStore: rs}
149-
150-
api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
151-
152-
// Make sure there is no error reported.
153-
callDeleteTenantAPI(t, api, "userA")
154-
require.Equal(t, 3, obj.GetObjectCount())
155-
156-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false) // Cannot delete from read-only store.
157-
verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
158-
}
159-
160-
func callDeleteTenantAPI(t *testing.T, api *TenantDeletionAPI, userID string) {
161-
ctx := user.InjectOrgID(context.Background(), userID)
162-
163-
req := &http.Request{}
164-
resp := httptest.NewRecorder()
165-
api.DeleteTenant(resp, req.WithContext(ctx))
166-
167-
require.Equal(t, http.StatusOK, resp.Code)
168-
}
169-
170-
func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *TenantDeletionAPI, userID string, expected bool) {
171-
ctx := user.InjectOrgID(context.Background(), userID)
172-
173-
req := &http.Request{}
174-
resp := httptest.NewRecorder()
175-
api.DeleteTenantStatus(resp, req.WithContext(ctx))
176-
177-
require.Equal(t, http.StatusOK, resp.Code)
178-
179-
deleteResp := &DeleteTenantStatusResponse{}
180-
require.NoError(t, json.Unmarshal(resp.Body.Bytes(), deleteResp))
181-
require.Equal(t, expected, deleteResp.RuleGroupsDeleted)
182-
}
183-
184-
func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rulestore.RuleStore) {
185-
obj := chunk.NewMockStorage()
186-
rs := objectclient.NewRuleStore(obj, 5, log.NewNopLogger())
187-
188-
// "upload" rule groups
189-
for _, key := range ruleGroups {
190-
desc := rulespb.ToProto(key.user, key.namespace, rulefmt.RuleGroup{Name: key.group})
191-
require.NoError(t, rs.SetRuleGroup(context.Background(), key.user, key.namespace, desc))
192-
}
193-
194-
return obj, rs
195-
}
196-
197-
type ruleGroupKey struct {
198-
user, namespace, group string
199-
}
200-
201-
type readOnlyRuleStore struct {
202-
rulestore.RuleStore
203-
}
204-
205-
func (r *readOnlyRuleStore) SupportsModifications() bool {
206-
return false
207-
}

pkg/cortex/modules.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -639,13 +639,6 @@ func (t *Cortex) initRulerStorage() (serv services.Service, err error) {
639639
return
640640
}
641641

642-
// Purger didn't use ruler storage before, but now it does. However empty configuration just causes error,
643-
// so to preserve previous purger behaviour, we simply disable it.
644-
if t.Cfg.isModuleEnabled(Purger) && t.Cfg.Ruler.StoreConfig.IsDefaults() {
645-
level.Info(util_log.Logger).Log("msg", "Ruler storage is not configured. If you want to use tenant deletion API and delete rule groups, please configure ruler storage.")
646-
return
647-
}
648-
649642
if !t.Cfg.Ruler.StoreConfig.IsDefaults() {
650643
t.RulerStorage, err = ruler.NewLegacyRuleStore(t.Cfg.Ruler.StoreConfig, rules.FileLoader{}, util_log.Logger)
651644
} else {
@@ -810,7 +803,7 @@ func (t *Cortex) initTenantDeletionAPI() (services.Service, error) {
810803
}
811804

812805
// t.RulerStorage can be nil when running in single-binary mode, and rule storage is not configured.
813-
tenantDeletionAPI, err := purger.NewTenantDeletionAPI(t.Cfg.BlocksStorage, t.Overrides, t.RulerStorage, util_log.Logger, prometheus.DefaultRegisterer)
806+
tenantDeletionAPI, err := purger.NewTenantDeletionAPI(t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, prometheus.DefaultRegisterer)
814807
if err != nil {
815808
return nil, err
816809
}
@@ -895,7 +888,7 @@ func (t *Cortex) setupModuleManager() error {
895888
Compactor: {API, MemberlistKV, Overrides},
896889
StoreGateway: {API, Overrides, MemberlistKV},
897890
ChunksPurger: {Store, DeleteRequestsStore, API},
898-
TenantDeletion: {Store, API, Overrides, RulerStorage},
891+
TenantDeletion: {Store, API, Overrides},
899892
Purger: {ChunksPurger, TenantDeletion},
900893
TenantFederation: {Queryable},
901894
All: {QueryFrontend, Querier, Ingester, Distributor, TableManager, Purger, StoreGateway, Ruler},

pkg/ruler/ruler.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/cortexproject/cortex/pkg/util"
3535
"github.com/cortexproject/cortex/pkg/util/flagext"
3636
"github.com/cortexproject/cortex/pkg/util/grpcclient"
37+
util_log "github.com/cortexproject/cortex/pkg/util/log"
3738
"github.com/cortexproject/cortex/pkg/util/services"
3839
"github.com/cortexproject/cortex/pkg/util/validation"
3940
)
@@ -771,3 +772,24 @@ func (r *Ruler) AssertMaxRulesPerRuleGroup(userID string, rules int) error {
771772
}
772773
return fmt.Errorf(errMaxRulesPerRuleGroupPerUserLimitExceeded, limit, rules)
773774
}
775+
776+
func (r *Ruler) DeleteTenantConfiguration(w http.ResponseWriter, req *http.Request) {
777+
logger := util_log.WithContext(req.Context(), r.logger)
778+
779+
userID, err := tenant.TenantID(req.Context())
780+
if err != nil {
781+
// When Cortex is running, it uses Auth Middleware for checking X-Scope-OrgID and injecting tenant into context.
782+
// Auth Middleware sends http.StatusUnauthorized if X-Scope-OrgID is missing, so we do too here, for consistency.
783+
http.Error(w, err.Error(), http.StatusUnauthorized)
784+
return
785+
}
786+
787+
err = r.store.DeleteNamespace(req.Context(), userID, "") // Empty namespace = delete all rule groups.
788+
if err != nil && !errors.Is(err, rulestore.ErrGroupNamespaceNotFound) {
789+
respondError(logger, w, err.Error())
790+
return
791+
}
792+
793+
level.Info(logger).Log("msg", "deleted all tenant rule groups", "user", userID)
794+
w.WriteHeader(http.StatusOK)
795+
}

0 commit comments

Comments
 (0)