Skip to content

Commit aa9bccf

Browse files
saswatamcodeGiedriusSMichaHoffmannthibaultmg
authored andcommitted
Cut patch release v0.35.1 (thanos-io#7394)
* compact: recover from panics (thanos-io#7318) For thanos-io#6775, it would be useful to know the exact block IDs to aid debugging. Signed-off-by: Giedrius Statkevičius <[email protected]> * Sidecar: wait for prometheus on startup (thanos-io#7323) Signed-off-by: Michael Hoffmann <[email protected]> * Receive: fix serverAsClient.Series goroutines leak (thanos-io#6948) * fix serverAsClient goroutines leak Signed-off-by: Thibault Mange <[email protected]> * fix lint Signed-off-by: Thibault Mange <[email protected]> * update changelog Signed-off-by: Thibault Mange <[email protected]> * delete invalid comment Signed-off-by: Thibault Mange <[email protected]> * remove temp dev test Signed-off-by: Thibault Mange <[email protected]> * remove timer channel drain Signed-off-by: Thibault Mange <[email protected]> --------- Signed-off-by: Thibault Mange <[email protected]> * Receive: fix stats (thanos-io#7373) If we account stats for remote write and local writes we will count them twice since the remote write will be counted locally again by the remote receiver instance. Signed-off-by: Michael Hoffmann <[email protected]> * *: Ensure objstore flag values are masked & disable debug/pprof/cmdline (thanos-io#7382) * *: Ensure objstore flag values are masked & disable debug/pprof/cmdline Signed-off-by: Saswata Mukherjee <[email protected]> * small fix Signed-off-by: Saswata Mukherjee <[email protected]> --------- Signed-off-by: Saswata Mukherjee <[email protected]> * Query: dont pass query hints to avoid triggering pushdown (thanos-io#7392) If we have a new querier it will create query hints even without the pushdown feature being present anymore. Old sidecars will then trigger query pushdown which leads to broken max,min,max_over_time and min_over_time. Signed-off-by: Michael Hoffmann <[email protected]> * Cut patch release v0.35.1 Signed-off-by: Saswata Mukherjee <[email protected]> --------- Signed-off-by: Giedrius Statkevičius <[email protected]> Signed-off-by: Michael Hoffmann <[email protected]> Signed-off-by: Thibault Mange <[email protected]> Signed-off-by: Saswata Mukherjee <[email protected]> Co-authored-by: Giedrius Statkevičius <[email protected]> Co-authored-by: Michael Hoffmann <[email protected]> Co-authored-by: Thibault Mange <[email protected]>
1 parent eb2dcfd commit aa9bccf

File tree

12 files changed

+184
-251
lines changed

12 files changed

+184
-251
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,23 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
1818

1919
### Removed
2020

21+
## [v0.35.1](https://github.com/thanos-io/thanos/tree/release-0.35) - 28.05.2024
22+
23+
### Fixed
24+
25+
- [#7323](https://github.com/thanos-io/thanos/pull/7323) Sidecar: wait for prometheus on startup
26+
- [#6948](https://github.com/thanos-io/thanos/pull/6948) Receive: fix goroutines leak during series requests to thanos store api.
27+
- [#7382](https://github.com/thanos-io/thanos/pull/7382) *: Ensure objstore flag values are masked & disable debug/pprof/cmdline
28+
- [#7392](https://github.com/thanos-io/thanos/pull/7392) Query: fix broken min, max for pre 0.34.1 sidecars
29+
- [#7373](https://github.com/thanos-io/thanos/pull/7373) Receive: Fix stats for remote write
30+
- [#7318](https://github.com/thanos-io/thanos/pull/7318) Compactor: Recover from panic to log block ID
31+
32+
### Added
33+
34+
### Changed
35+
36+
### Removed
37+
2138
## [v0.35.0](https://github.com/thanos-io/thanos/tree/release-0.35) - 02.05.2024
2239

2340
### Fixed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.35.0
1+
0.35.1

cmd/thanos/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ func getFlagsMap(flags []*kingpin.FlagModel) map[string]string {
214214
if boilerplateFlags.GetFlag(f.Name) != nil {
215215
continue
216216
}
217+
// Mask inline objstore flag which can have credentials.
218+
if f.Name == "objstore.config" || f.Name == "objstore.config-file" {
219+
flagsMap[f.Name] = "<REDACTED>"
220+
continue
221+
}
217222
flagsMap[f.Name] = f.Value.String()
218223
}
219224

cmd/thanos/sidecar.go

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -172,64 +172,87 @@ func runSidecar(
172172
Help: "Boolean indicator whether the sidecar can reach its Prometheus peer.",
173173
})
174174

175-
ctx, cancel := context.WithCancel(context.Background())
176-
g.Add(func() error {
177-
// Only check Prometheus's flags when upload is enabled.
178-
if uploads {
179-
// Check prometheus's flags to ensure same sidecar flags.
180-
if err := validatePrometheus(ctx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
181-
return errors.Wrap(err, "validate Prometheus flags")
182-
}
183-
}
175+
ctx := context.Background()
176+
// Only check Prometheus's flags when upload is enabled.
177+
if uploads {
178+
// Check prometheus's flags to ensure same sidecar flags.
179+
// We retry infinitely until we validated prometheus flags
180+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
181+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
182+
defer iterCancel()
184183

185-
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
186-
err := runutil.Retry(2*time.Second, ctx.Done(), func() error {
187-
if err := m.BuildVersion(ctx); err != nil {
184+
if err := validatePrometheus(iterCtx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
188185
level.Warn(logger).Log(
189-
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
186+
"msg", "failed to validate prometheus flags. Is Prometheus running? Retrying",
190187
"err", err,
191188
)
192189
return err
193190
}
194191

195192
level.Info(logger).Log(
196-
"msg", "successfully loaded prometheus version",
193+
"msg", "successfully validated prometheus flags",
197194
)
198195
return nil
199196
})
200197
if err != nil {
201-
return errors.Wrap(err, "failed to get prometheus version")
198+
return errors.Wrap(err, "failed to validate prometheus flags")
202199
}
200+
}
203201

204-
// Blocking query of external labels before joining as a Source Peer into gossip.
205-
// We retry infinitely until we reach and fetch labels from our Prometheus.
206-
err = runutil.Retry(2*time.Second, ctx.Done(), func() error {
207-
if err := m.UpdateLabels(ctx); err != nil {
208-
level.Warn(logger).Log(
209-
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
210-
"err", err,
211-
)
212-
promUp.Set(0)
213-
statusProber.NotReady(err)
214-
return err
215-
}
202+
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
203+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
204+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
205+
defer iterCancel()
216206

217-
level.Info(logger).Log(
218-
"msg", "successfully loaded prometheus external labels",
219-
"external_labels", m.Labels().String(),
207+
if err := m.BuildVersion(iterCtx); err != nil {
208+
level.Warn(logger).Log(
209+
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
210+
"err", err,
220211
)
221-
promUp.Set(1)
222-
statusProber.Ready()
223-
return nil
224-
})
225-
if err != nil {
226-
return errors.Wrap(err, "initial external labels query")
212+
return err
227213
}
228214

229-
if len(m.Labels()) == 0 {
230-
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
215+
level.Info(logger).Log(
216+
"msg", "successfully loaded prometheus version",
217+
)
218+
return nil
219+
})
220+
if err != nil {
221+
return errors.Wrap(err, "failed to get prometheus version")
222+
}
223+
224+
// Blocking query of external labels before joining as a Source Peer into gossip.
225+
// We retry infinitely until we reach and fetch labels from our Prometheus.
226+
err = runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
227+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
228+
defer iterCancel()
229+
230+
if err := m.UpdateLabels(iterCtx); err != nil {
231+
level.Warn(logger).Log(
232+
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
233+
"err", err,
234+
)
235+
return err
231236
}
232237

238+
level.Info(logger).Log(
239+
"msg", "successfully loaded prometheus external labels",
240+
"external_labels", m.Labels().String(),
241+
)
242+
return nil
243+
})
244+
if err != nil {
245+
return errors.Wrap(err, "initial external labels query")
246+
}
247+
248+
if len(m.Labels()) == 0 {
249+
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
250+
}
251+
promUp.Set(1)
252+
statusProber.Ready()
253+
254+
ctx, cancel := context.WithCancel(context.Background())
255+
g.Add(func() error {
233256
// Periodically query the Prometheus config. We use this as a heartbeat as well as for updating
234257
// the external labels we apply.
235258
return runutil.Repeat(conf.prometheus.getConfigInterval, ctx.Done(), func() error {

pkg/compact/compact.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"os"
1111
"path/filepath"
1212
"sort"
13+
"strings"
1314
"sync"
1415
"time"
1516

@@ -871,6 +872,21 @@ func (cg *Group) Compact(ctx context.Context, dir string, planner Planner, comp
871872
return false, ulid.ULID{}, errors.Wrap(err, "create compaction group dir")
872873
}
873874

875+
defer func() {
876+
if p := recover(); p != nil {
877+
var sb strings.Builder
878+
879+
cgIDs := cg.IDs()
880+
for i, blid := range cgIDs {
881+
_, _ = sb.WriteString(blid.String())
882+
if i < len(cgIDs)-1 {
883+
_, _ = sb.WriteString(",")
884+
}
885+
}
886+
rerr = fmt.Errorf("paniced while compacting %s: %v", sb.String(), p)
887+
}
888+
}()
889+
874890
errChan := make(chan error, 1)
875891
err := tracing.DoInSpanWithErr(ctx, "compaction_group", func(ctx context.Context) (err error) {
876892
shouldRerun, compID, err = cg.compact(ctx, subDir, planner, comp, blockDeletableChecker, compactionLifecycleCallback, errChan)

pkg/query/querier.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -311,20 +311,6 @@ func aggrsFromFunc(f string) []storepb.Aggr {
311311
return []storepb.Aggr{storepb.Aggr_COUNT, storepb.Aggr_SUM}
312312
}
313313

314-
func storeHintsFromPromHints(hints *storage.SelectHints) *storepb.QueryHints {
315-
return &storepb.QueryHints{
316-
StepMillis: hints.Step,
317-
Func: &storepb.Func{
318-
Name: hints.Func,
319-
},
320-
Grouping: &storepb.Grouping{
321-
By: hints.By,
322-
Labels: hints.Grouping,
323-
},
324-
Range: &storepb.Range{Millis: hints.Range},
325-
}
326-
}
327-
328314
func (q *querier) Select(ctx context.Context, _ bool, hints *storage.SelectHints, ms ...*labels.Matcher) storage.SeriesSet {
329315
if hints == nil {
330316
hints = &storage.SelectHints{
@@ -421,7 +407,6 @@ func (q *querier) selectFn(ctx context.Context, hints *storage.SelectHints, ms .
421407
ShardInfo: q.shardInfo,
422408
PartialResponseStrategy: q.partialResponseStrategy,
423409
SkipChunks: q.skipChunks,
424-
QueryHints: storeHintsFromPromHints(hints),
425410
}
426411
if q.isDedupEnabled() {
427412
// Soft ask to sort without replica labels and push them at the end of labelset.

pkg/receive/handler.go

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -685,35 +685,32 @@ type remoteWriteParams struct {
685685
alreadyReplicated bool
686686
}
687687

688-
func (h *Handler) gatherWriteStats(writes ...map[endpointReplica]map[string]trackedSeries) tenantRequestStats {
688+
func (h *Handler) gatherWriteStats(localWrites map[endpointReplica]map[string]trackedSeries) tenantRequestStats {
689689
var stats tenantRequestStats = make(tenantRequestStats)
690690

691-
for _, write := range writes {
692-
for er := range write {
693-
for tenant, series := range write[er] {
694-
samples := 0
691+
for er := range localWrites {
692+
for tenant, series := range localWrites[er] {
693+
samples := 0
695694

696-
for _, ts := range series.timeSeries {
697-
samples += len(ts.Samples)
698-
}
695+
for _, ts := range series.timeSeries {
696+
samples += len(ts.Samples)
697+
}
699698

700-
if st, ok := stats[tenant]; ok {
701-
st.timeseries += len(series.timeSeries)
702-
st.totalSamples += samples
699+
if st, ok := stats[tenant]; ok {
700+
st.timeseries += len(series.timeSeries)
701+
st.totalSamples += samples
703702

704-
stats[tenant] = st
705-
} else {
706-
stats[tenant] = requestStats{
707-
timeseries: len(series.timeSeries),
708-
totalSamples: samples,
709-
}
703+
stats[tenant] = st
704+
} else {
705+
stats[tenant] = requestStats{
706+
timeseries: len(series.timeSeries),
707+
totalSamples: samples,
710708
}
711709
}
712710
}
713711
}
714712

715713
return stats
716-
717714
}
718715

719716
func (h *Handler) fanoutForward(ctx context.Context, params remoteWriteParams) (tenantRequestStats, error) {
@@ -743,7 +740,7 @@ func (h *Handler) fanoutForward(ctx context.Context, params remoteWriteParams) (
743740
return stats, err
744741
}
745742

746-
stats = h.gatherWriteStats(localWrites, remoteWrites)
743+
stats = h.gatherWriteStats(localWrites)
747744

748745
// Prepare a buffered channel to receive the responses from the local and remote writes. Remote writes will all go
749746
// asynchronously and with this capacity we will never block on writing to the channel.

pkg/server/http/http.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ func (s *Server) Handle(pattern string, handler http.Handler) {
117117

118118
func registerProfiler(mux *http.ServeMux) {
119119
mux.HandleFunc("/debug/pprof/", pprof.Index)
120-
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
121120
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
122121
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
123122
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)

pkg/store/bucket.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,7 +1581,6 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, seriesSrv storepb.Store
15811581
var resp respSet
15821582
if s.sortingStrategy == sortingStrategyStore {
15831583
resp = newEagerRespSet(
1584-
srv.Context(),
15851584
span,
15861585
10*time.Minute,
15871586
blk.meta.ULID.String(),
@@ -1595,7 +1594,6 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, seriesSrv storepb.Store
15951594
)
15961595
} else {
15971596
resp = newLazyRespSet(
1598-
srv.Context(),
15991597
span,
16001598
10*time.Minute,
16011599
blk.meta.ULID.String(),

pkg/store/prometheus.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -163,19 +163,7 @@ func (p *PrometheusStore) Series(r *storepb.SeriesRequest, seriesSrv storepb.Sto
163163
// Don't ask for more than available time. This includes potential `minTime` flag limit.
164164
availableMinTime, _ := p.timestamps()
165165
if r.MinTime < availableMinTime {
166-
// Align min time with the step to avoid missing data when it gets retrieved by the upper layer's PromQL engine.
167-
// This also is necessary when Sidecar uploads a block and then availableMinTime
168-
// becomes a fixed timestamp.
169-
if r.QueryHints != nil && r.QueryHints.StepMillis != 0 {
170-
diff := availableMinTime - r.MinTime
171-
r.MinTime += (diff / r.QueryHints.StepMillis) * r.QueryHints.StepMillis
172-
// Add one more to strictly fit within --min-time -> infinity.
173-
if r.MinTime != availableMinTime {
174-
r.MinTime += r.QueryHints.StepMillis
175-
}
176-
} else {
177-
r.MinTime = availableMinTime
178-
}
166+
r.MinTime = availableMinTime
179167
}
180168

181169
extLsetToRemove := map[string]struct{}{}

0 commit comments

Comments
 (0)