Skip to content

Commit 2bc501a

Browse files
authored
Attach consistency check debug logs to trace spans (#9213)
* Attach consistency check debug logs to trace spans * Add changelog entry
1 parent 36b193c commit 2bc501a

File tree

5 files changed

+16
-18
lines changed

5 files changed

+16
-18
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
* [ENHANCEMENT] Ingester: add `cortex_lifecycler_read_only` metric which is set to 1 when ingester's lifecycler is set to read-only mode. #9095
8383
* [ENHANCEMENT] Add a new field, `encode_time_seconds` to query stats log messages, to record the amount of time it takes the query-frontend to encode a response. This does not include any serialization time for downstream components. #9062
8484
* [ENHANCEMENT] OTLP: If the flag `-distributor.otel-created-timestamp-zero-ingestion-enabled` is true, OTel start timestamps are converted to Prometheus zero samples to mark series start. #9131
85+
* [ENHANCEMENT] Querier: attach logs emitted during query consistency check to trace span for query. #9213
8586
* [BUGFIX] Ruler: add support for draining any outstanding alert notifications before shutting down. This can be enabled with the `-ruler.drain-notification-queue-on-shutdown=true` CLI flag. #8346
8687
* [BUGFIX] Query-frontend: fix `-querier.max-query-lookback` enforcement when `-compactor.blocks-retention-period` is not set, and viceversa. #8388
8788
* [BUGFIX] Ingester: fix sporadic `not found` error causing an internal server error if label names are queried with matchers during head compaction. #8391

pkg/querier/blocks_consistency_checker.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,15 @@ import (
2020
type BlocksConsistency struct {
2121
uploadGracePeriod time.Duration
2222
deletionGracePeriod time.Duration
23-
logger log.Logger
2423

2524
checksTotal prometheus.Counter
2625
checksFailed prometheus.Counter
2726
}
2827

29-
func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration, logger log.Logger, reg prometheus.Registerer) *BlocksConsistency {
28+
func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration, reg prometheus.Registerer) *BlocksConsistency {
3029
return &BlocksConsistency{
3130
uploadGracePeriod: uploadGracePeriod,
3231
deletionGracePeriod: deletionGracePeriod,
33-
logger: logger,
3432
checksTotal: promauto.With(reg).NewCounter(prometheus.CounterOpts{
3533
Name: "cortex_querier_blocks_consistency_checks_total",
3634
Help: "Total number of queries that needed to run with consistency checks. A consistency check is required when querying blocks from store-gateways to make sure that all blocks are queried.",
@@ -44,7 +42,7 @@ func NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod time.Duration,
4442

4543
// NewTracker creates a consistency tracker from the known blocks. It filters out any block uploaded within uploadGracePeriod
4644
// and with a deletion mark within deletionGracePeriod.
47-
func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDeletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark) BlocksConsistencyTracker {
45+
func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDeletionMarks map[ulid.ULID]*bucketindex.BlockDeletionMark, logger log.Logger) BlocksConsistencyTracker {
4846
blocksToTrack := make(map[ulid.ULID]struct{}, len(knownBlocks))
4947
for _, block := range knownBlocks {
5048
// Some recently uploaded blocks, already discovered by the querier, may not have been discovered
@@ -55,7 +53,7 @@ func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDele
5553
// - Blocks uploaded by compactor: the source blocks are marked for deletion but will continue to be
5654
// queried by queriers for a while (depends on the configured deletion marks delay).
5755
if c.uploadGracePeriod > 0 && time.Since(block.GetUploadedAt()) < c.uploadGracePeriod {
58-
level.Debug(c.logger).Log("msg", "block skipped from consistency check because it was uploaded recently", "block", block.ID.String(), "uploadedAt", block.GetUploadedAt().String())
56+
level.Debug(logger).Log("msg", "block skipped from consistency check because it was uploaded recently", "block", block.ID.String(), "uploadedAt", block.GetUploadedAt().String())
5957
continue
6058
}
6159

@@ -67,7 +65,7 @@ func (c *BlocksConsistency) NewTracker(knownBlocks bucketindex.Blocks, knownDele
6765
deletionTime := time.Unix(mark.DeletionTime, 0)
6866

6967
if c.deletionGracePeriod > 0 && time.Since(deletionTime) > c.deletionGracePeriod {
70-
level.Debug(c.logger).Log("msg", "block skipped from consistency check because it is marked for deletion", "block", block.ID.String(), "deletionTime", deletionTime.String())
68+
level.Debug(logger).Log("msg", "block skipped from consistency check because it is marked for deletion", "block", block.ID.String(), "deletionTime", deletionTime.String())
7169
continue
7270
}
7371
}

pkg/querier/blocks_consistency_checker_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ func TestBlocksConsistencyTracker_Check(t *testing.T) {
126126
for testName, testData := range tests {
127127
t.Run(testName, func(t *testing.T) {
128128
reg := prometheus.NewPedanticRegistry()
129-
c := NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod, log.NewNopLogger(), reg)
130-
tracker := c.NewTracker(testData.knownBlocks, testData.knownDeletionMarks)
129+
c := NewBlocksConsistency(uploadGracePeriod, deletionGracePeriod, reg)
130+
tracker := c.NewTracker(testData.knownBlocks, testData.knownDeletionMarks, log.NewNopLogger())
131131
var missingBlocks []ulid.ULID
132132
for _, queriedBlocksAttempt := range testData.queriedBlocks {
133133
missingBlocks = tracker.Check(queriedBlocksAttempt)

pkg/querier/blocks_store_queryable.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,6 @@ func NewBlocksStoreQueryableFromConfig(querierCfg Config, gatewayCfg storegatewa
258258
// recently marked for deletion, until the "ignore delay / 2". This means the consistency checker
259259
// exclude such blocks about 50% of the time before querier and store-gateway stops querying them.
260260
storageCfg.BucketStore.IgnoreDeletionMarksDelay/2,
261-
logger,
262261
reg,
263262
)
264263

@@ -548,7 +547,7 @@ func (q *blocksStoreQuerier) queryWithConsistencyCheck(
548547
touchedStores = map[string]struct{}{}
549548
)
550549

551-
consistencyTracker := q.consistency.NewTracker(knownBlocks, knownDeletionMarks)
550+
consistencyTracker := q.consistency.NewTracker(knownBlocks, knownDeletionMarks, spanLog)
552551
defer func() {
553552
// Do not track consistency check metrics if query failed with an error unrelated to consistency check (e.g. context canceled),
554553
// because it means we interrupted the requests, and we don't know whether consistency check would have succeeded

pkg/querier/blocks_store_queryable_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,7 +1563,7 @@ func TestBlocksStoreQuerier_Select(t *testing.T) {
15631563
maxT: maxT,
15641564
finder: finder,
15651565
stores: stores,
1566-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), reg),
1566+
consistency: NewBlocksConsistency(0, 0, reg),
15671567
logger: log.NewNopLogger(),
15681568
metrics: newBlocksStoreQueryableMetrics(reg),
15691569
limits: testData.limits,
@@ -1704,7 +1704,7 @@ func TestBlocksStoreQuerier_ShouldReturnContextCanceledIfContextWasCanceledWhile
17041704
maxT: maxT,
17051705
finder: finder,
17061706
stores: stores,
1707-
consistency: NewBlocksConsistency(0, 0, logger, reg),
1707+
consistency: NewBlocksConsistency(0, 0, reg),
17081708
logger: logger,
17091709
metrics: newBlocksStoreQueryableMetrics(reg),
17101710
limits: &blocksStoreLimitsMock{},
@@ -1932,7 +1932,7 @@ func TestBlocksStoreQuerier_Select_cancelledContext(t *testing.T) {
19321932
maxT: maxT,
19331933
finder: finder,
19341934
stores: stores,
1935-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil),
1935+
consistency: NewBlocksConsistency(0, 0, nil),
19361936
logger: log.NewNopLogger(),
19371937
metrics: newBlocksStoreQueryableMetrics(reg),
19381938
limits: &blocksStoreLimitsMock{},
@@ -2435,7 +2435,7 @@ func TestBlocksStoreQuerier_Labels(t *testing.T) {
24352435
maxT: maxT,
24362436
finder: finder,
24372437
stores: stores,
2438-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil),
2438+
consistency: NewBlocksConsistency(0, 0, nil),
24392439
logger: log.NewNopLogger(),
24402440
metrics: newBlocksStoreQueryableMetrics(reg),
24412441
limits: &blocksStoreLimitsMock{},
@@ -2506,7 +2506,7 @@ func TestBlocksStoreQuerier_Labels(t *testing.T) {
25062506
maxT: maxT,
25072507
finder: finder,
25082508
stores: stores,
2509-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil),
2509+
consistency: NewBlocksConsistency(0, 0, nil),
25102510
logger: log.NewNopLogger(),
25112511
metrics: newBlocksStoreQueryableMetrics(reg),
25122512
limits: &blocksStoreLimitsMock{},
@@ -2580,7 +2580,7 @@ func TestBlocksStoreQuerier_SelectSortedShouldHonorQueryStoreAfter(t *testing.T)
25802580
maxT: testData.queryMaxT,
25812581
finder: finder,
25822582
stores: &blocksStoreSetMock{},
2583-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil),
2583+
consistency: NewBlocksConsistency(0, 0, nil),
25842584
logger: log.NewNopLogger(),
25852585
metrics: newBlocksStoreQueryableMetrics(nil),
25862586
limits: &blocksStoreLimitsMock{},
@@ -2683,7 +2683,7 @@ func TestBlocksStoreQuerier_MaxLabelsQueryRange(t *testing.T) {
26832683
maxT: testData.queryMaxT,
26842684
finder: finder,
26852685
stores: &blocksStoreSetMock{},
2686-
consistency: NewBlocksConsistency(0, 0, log.NewNopLogger(), nil),
2686+
consistency: NewBlocksConsistency(0, 0, nil),
26872687
logger: log.NewNopLogger(),
26882688
metrics: newBlocksStoreQueryableMetrics(nil),
26892689
limits: &blocksStoreLimitsMock{
@@ -2824,7 +2824,7 @@ func TestBlocksStoreQuerier_PromQLExecution(t *testing.T) {
28242824

28252825
// Instantiate the querier that will be executed to run the query.
28262826
logger := log.NewNopLogger()
2827-
queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistency(0, 0, logger, nil), &blocksStoreLimitsMock{}, 0, 0, logger, nil)
2827+
queryable, err := NewBlocksStoreQueryable(stores, finder, NewBlocksConsistency(0, 0, nil), &blocksStoreLimitsMock{}, 0, 0, logger, nil)
28282828
require.NoError(t, err)
28292829
require.NoError(t, services.StartAndAwaitRunning(context.Background(), queryable))
28302830
defer services.StopAndAwaitTerminated(context.Background(), queryable) // nolint:errcheck

0 commit comments

Comments
 (0)