Skip to content

Commit 95cb360

Browse files
authored
per tenant retention (#116)
2 parents 2fecf4d + 2cc4540 commit 95cb360

File tree

4 files changed

+361
-0
lines changed

4 files changed

+361
-0
lines changed

cmd/thanos/compact.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,12 @@ func runCompact(
436436
level.Info(logger).Log("msg", "retention policy of 1 hour aggregated samples is enabled", "duration", retentionByResolution[compact.ResolutionLevel1h])
437437
}
438438

439+
retentionByTenant, err := compact.ParesRetentionPolicyByTenant(logger, *conf.retentionTenants)
440+
if err != nil {
441+
level.Error(logger).Log("msg", "failed to parse retention policy by tenant", "err", err)
442+
return err
443+
}
444+
439445
var cleanMtx sync.Mutex
440446
// TODO(GiedriusS): we could also apply retention policies here but the logic would be a bit more complex.
441447
cleanPartialMarked := func() error {
@@ -534,6 +540,10 @@ func runCompact(
534540
return errors.Wrap(err, "sync before retention")
535541
}
536542

543+
if err := compact.ApplyRetentionPolicyByTenant(ctx, logger, insBkt, sy.Metas(), retentionByTenant, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, metadata.TenantRetentionExpired)); err != nil {
544+
return errors.Wrap(err, "retention by tenant failed")
545+
}
546+
537547
if err := compact.ApplyRetentionPolicyByResolution(ctx, logger, insBkt, sy.Metas(), retentionByResolution, compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, "")); err != nil {
538548
return errors.Wrap(err, "retention failed")
539549
}
@@ -726,6 +736,7 @@ type compactConfig struct {
726736
objStore extflag.PathOrContent
727737
consistencyDelay time.Duration
728738
retentionRaw, retentionFiveMin, retentionOneHr model.Duration
739+
retentionTenants *[]string
729740
wait bool
730741
waitInterval time.Duration
731742
disableDownsampling bool
@@ -781,6 +792,7 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
781792
Default("0d").SetValue(&cc.retentionFiveMin)
782793
cmd.Flag("retention.resolution-1h", "How long to retain samples of resolution 2 (1 hour) in bucket. Setting this to 0d will retain samples of this resolution forever").
783794
Default("0d").SetValue(&cc.retentionOneHr)
795+
cc.retentionTenants = cmd.Flag("retention.tenant", "How long to retain samples in bucket per tenant. Setting this to 0d will retain samples of this resolution forever").Strings()
784796

785797
// TODO(kakkoyun, pgough): https://github.com/thanos-io/thanos/issues/2266.
786798
cmd.Flag("wait", "Do not exit after all compactions have been processed and wait for new work.").

pkg/block/metadata/markers.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ const (
8181
OutOfOrderChunksNoCompactReason = "block-index-out-of-order-chunk"
8282
// DownsampleVerticalCompactionNoCompactReason is a reason to not compact overlapping downsampled blocks as it does not make sense e.g. how to vertically compact the average.
8383
DownsampleVerticalCompactionNoCompactReason = "downsample-vertical-compaction"
84+
// TenantRetentionExpired is a reason to delete block as it's per tenant retention is expired.
85+
TenantRetentionExpired = "tenant-retention-expired"
8486
)
8587

8688
// NoCompactMark marker stores reason of block being excluded from compaction if needed.

pkg/compact/retention.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,27 @@ package compact
66
import (
77
"context"
88
"fmt"
9+
"regexp"
910
"time"
1011

1112
"github.com/go-kit/log"
1213
"github.com/go-kit/log/level"
1314
"github.com/oklog/ulid"
1415
"github.com/pkg/errors"
1516
"github.com/prometheus/client_golang/prometheus"
17+
"github.com/prometheus/common/model"
1618
"github.com/thanos-io/objstore"
1719

1820
"github.com/thanos-io/thanos/pkg/block"
1921
"github.com/thanos-io/thanos/pkg/block/metadata"
2022
)
2123

24+
const (
25+
// tenantRetentionRegex is the regex pattern for parsing tenant retention.
26+
// valid format is `<tenant>:(<yyyy-mm-dd>|<duration>d)` where <duration> > 0.
27+
tenantRetentionRegex = `^([\w-]+):((\d{4}-\d{2}-\d{2})|(\d+d))$`
28+
)
29+
2230
// ApplyRetentionPolicyByResolution removes blocks depending on the specified retentionByResolution based on blocks MaxTime.
2331
// A value of 0 disables the retention for its resolution.
2432
func ApplyRetentionPolicyByResolution(
@@ -47,3 +55,79 @@ func ApplyRetentionPolicyByResolution(
4755
level.Info(logger).Log("msg", "optional retention apply done")
4856
return nil
4957
}
58+
59+
type RetentionPolicy struct {
60+
CutoffDate time.Time
61+
RetentionDuration time.Duration
62+
}
63+
64+
func (r RetentionPolicy) isExpired(blockMaxTime time.Time) bool {
65+
if r.CutoffDate.IsZero() {
66+
return time.Now().After(blockMaxTime.Add(r.RetentionDuration))
67+
}
68+
return r.CutoffDate.After(blockMaxTime)
69+
}
70+
71+
func ParesRetentionPolicyByTenant(logger log.Logger, retentionTenants []string) (map[string]RetentionPolicy, error) {
72+
pattern := regexp.MustCompile(tenantRetentionRegex)
73+
retentionByTenant := make(map[string]RetentionPolicy, len(retentionTenants))
74+
for _, tenantRetention := range retentionTenants {
75+
matches := pattern.FindStringSubmatch(tenantRetention)
76+
invalidFormat := errors.Errorf("invalid retention format for tenant: %s, must be `<tenant>:(<yyyy-mm-dd>|<duration>d)`", tenantRetention)
77+
if len(matches) != 5 {
78+
return nil, errors.Wrapf(invalidFormat, "matched size %d", len(matches))
79+
}
80+
tenant := matches[1]
81+
var policy RetentionPolicy
82+
if _, ok := retentionByTenant[tenant]; ok {
83+
return nil, errors.Errorf("duplicate retention policy for tenant: %s", tenant)
84+
}
85+
if cutoffDate, err := time.Parse(time.DateOnly, matches[3]); matches[3] != "" {
86+
if err != nil {
87+
return nil, errors.Wrapf(invalidFormat, "error parsing cutoff date: %v", err)
88+
}
89+
policy.CutoffDate = cutoffDate
90+
}
91+
if duration, err := model.ParseDuration(matches[4]); matches[4] != "" {
92+
if err != nil {
93+
return nil, errors.Wrapf(invalidFormat, "error parsing duration: %v", err)
94+
} else if duration == 0 {
95+
return nil, errors.Wrapf(invalidFormat, "duration must be greater than 0")
96+
}
97+
policy.RetentionDuration = time.Duration(duration)
98+
}
99+
level.Info(logger).Log("msg", "retention policy for tenant is enabled", "tenant", tenant, "retention policy", fmt.Sprintf("%v", policy))
100+
retentionByTenant[tenant] = policy
101+
}
102+
return retentionByTenant, nil
103+
}
104+
105+
// ApplyRetentionPolicyByTenant removes blocks depending on the specified retentionByTenant based on blocks MaxTime.
106+
func ApplyRetentionPolicyByTenant(
107+
ctx context.Context,
108+
logger log.Logger,
109+
bkt objstore.Bucket,
110+
metas map[ulid.ULID]*metadata.Meta,
111+
retentionByTenant map[string]RetentionPolicy,
112+
blocksMarkedForDeletion prometheus.Counter) error {
113+
if len(retentionByTenant) == 0 {
114+
level.Info(logger).Log("msg", "tenant retention is disabled due to no policy")
115+
return nil
116+
}
117+
level.Info(logger).Log("msg", "start tenant retention")
118+
for id, m := range metas {
119+
policy, ok := retentionByTenant[m.Thanos.GetTenant()]
120+
if !ok {
121+
continue
122+
}
123+
maxTime := time.Unix(m.MaxTime/1000, 0)
124+
if policy.isExpired(maxTime) {
125+
level.Info(logger).Log("msg", "applying retention: marking block for deletion", "id", id, "maxTime", maxTime.String())
126+
if err := block.MarkForDeletion(ctx, logger, bkt, id, fmt.Sprintf("block exceeding retention of %v", policy), blocksMarkedForDeletion); err != nil {
127+
return errors.Wrap(err, "delete block")
128+
}
129+
}
130+
}
131+
level.Info(logger).Log("msg", "tenant retention apply done")
132+
return nil
133+
}

0 commit comments

Comments
 (0)