Skip to content

Commit f77525a

Browse files
Add -frontend.enabled-ruler-query-stats flag (#6504)
Signed-off-by: SungJin1212 <[email protected]> Signed-off-by: Friedrich Gonzalez <[email protected]> Co-authored-by: Friedrich Gonzalez <[email protected]>
1 parent 8958d59 commit f77525a

File tree

4 files changed

+61
-22
lines changed

4 files changed

+61
-22
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* [ENHANCEMENT] Querier: Apply bytes limiter to LabelNames and LabelValuesForLabelNames. #6568
1313
* [ENHANCEMENT] Query Frontend: Add a `too_many_tenants` reason label value to `cortex_rejected_queries_total` metric to track the rejected query count due to the # of tenant limits. #6569
1414
* [ENHANCEMENT] Alertmanager: Add receiver validations for msteamsv2 and rocketchat. #6606
15+
* [ENHANCEMENT] Query Frontend: Add a `-frontend.enabled-ruler-query-stats` flag to configure whether to report the query stats log for queries coming from the Ruler. #6504
1516
* [ENHANCEMENT] OTLP: Support otlp metadata ingestion. #6617
1617
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
1718
* [BUGFIX] Ingester: Fix labelset data race condition. #6573

docs/configuration/config-file-reference.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4175,6 +4175,12 @@ The `query_frontend_config` configures the Cortex query-frontend.
41754175
# CLI flag: -frontend.query-stats-enabled
41764176
[query_stats_enabled: <boolean> | default = false]
41774177
4178+
# If enabled, report the query stats log for queries coming from the ruler to
4179+
# evaluate rules. It only takes effect when '-ruler.frontend-address' is
4180+
# configured.
4181+
# CLI flag: -frontend.enabled-ruler-query-stats
4182+
[enabled_ruler_query_stats_log: <boolean> | default = false]
4183+
41784184
# If a querier disconnects without sending notification about graceful shutdown,
41794185
# the query-frontend will keep the querier in the tenant's shard until the
41804186
# forget delay has passed. This feature is useful to reduce the blast radius

pkg/frontend/transport/handler.go

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,17 @@ const (
7474

7575
// Config for a Handler.
7676
type HandlerConfig struct {
77-
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
78-
MaxBodySize int64 `yaml:"max_body_size"`
79-
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
77+
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
78+
MaxBodySize int64 `yaml:"max_body_size"`
79+
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
80+
EnabledRulerQueryStatsLog bool `yaml:"enabled_ruler_query_stats_log"`
8081
}
8182

8283
func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) {
8384
f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.")
8485
f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.")
8586
f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.")
87+
f.BoolVar(&cfg.EnabledRulerQueryStatsLog, "frontend.enabled-ruler-query-stats", false, "If enabled, report the query stats log for queries coming from the ruler to evaluate rules. It only takes effect when '-ruler.frontend-address' is configured.")
8688
}
8789

8890
// Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
@@ -245,10 +247,11 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
245247
r.Body = io.NopCloser(&buf)
246248
}
247249

250+
source := tripperware.GetSource(r.Header.Get("User-Agent"))
248251
// Log request
249252
if f.cfg.QueryStatsEnabled {
250253
queryString = f.parseRequestQueryString(r, buf)
251-
f.logQueryRequest(r, queryString)
254+
f.logQueryRequest(r, queryString, source)
252255
}
253256

254257
startTime := time.Now()
@@ -281,7 +284,6 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
281284
}
282285
}
283286

284-
source := tripperware.GetSource(r.Header.Get("User-Agent"))
285287
f.reportQueryStats(r, source, userID, queryString, queryResponseTime, stats, err, statusCode, resp)
286288
}
287289

@@ -322,7 +324,7 @@ func formatGrafanaStatsFields(r *http.Request) []interface{} {
322324
}
323325

324326
// logQueryRequest logs query request before query execution.
325-
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
327+
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values, source string) {
326328
logMessage := []interface{}{
327329
"msg", "query request",
328330
"component", "query-frontend",
@@ -346,9 +348,11 @@ func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
346348
logMessage = append(logMessage, "accept_encoding", acceptEncoding)
347349
}
348350

349-
logMessage = append(logMessage, formatQueryString(queryString)...)
350-
351-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
351+
shouldLog := source == tripperware.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == tripperware.SourceRuler)
352+
if shouldLog {
353+
logMessage = append(logMessage, formatQueryString(queryString)...)
354+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
355+
}
352356
}
353357

354358
// reportSlowQuery reports slow queries.
@@ -473,11 +477,15 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
473477
logMessage = append(logMessage, "error", s.Message())
474478
}
475479
}
476-
logMessage = append(logMessage, formatQueryString(queryString)...)
477-
if error != nil {
478-
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
479-
} else {
480-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
480+
481+
shouldLog := source == tripperware.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == tripperware.SourceRuler)
482+
if shouldLog {
483+
logMessage = append(logMessage, formatQueryString(queryString)...)
484+
if error != nil {
485+
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
486+
} else {
487+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
488+
}
481489
}
482490

483491
var reason string

pkg/frontend/transport/handler_test.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,28 +417,31 @@ func TestHandler_ServeHTTP(t *testing.T) {
417417
func TestReportQueryStatsFormat(t *testing.T) {
418418
outputBuf := bytes.NewBuffer(nil)
419419
logger := log.NewSyncLogger(log.NewLogfmtLogger(outputBuf))
420-
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true}, tenantfederation.Config{}, http.DefaultTransport, logger, nil)
421420
userID := "fake"
422421
req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/prometheus/api/v1/query", nil)
423422
resp := &http.Response{ContentLength: 1000}
424423
responseTime := time.Second
425424
statusCode := http.StatusOK
426425

427426
type testCase struct {
428-
queryString url.Values
429-
queryStats *querier_stats.QueryStats
430-
header http.Header
431-
responseErr error
432-
expectedLog string
427+
queryString url.Values
428+
queryStats *querier_stats.QueryStats
429+
header http.Header
430+
responseErr error
431+
expectedLog string
432+
enabledRulerQueryStatsLog bool
433+
source string
433434
}
434435

435436
tests := map[string]testCase{
436437
"should not include query and header details if empty": {
437438
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0`,
439+
source: tripperware.SourceAPI,
438440
},
439441
"should include query length and string at the end": {
440442
queryString: url.Values(map[string][]string{"query": {"up"}}),
441443
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0 query_length=2 param_query=up`,
444+
source: tripperware.SourceAPI,
442445
},
443446
"should include query stats": {
444447
queryStats: &querier_stats.QueryStats{
@@ -455,14 +458,17 @@ func TestReportQueryStatsFormat(t *testing.T) {
455458
},
456459
},
457460
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 samples_scanned=0 query_storage_wall_time_seconds=6000`,
461+
source: tripperware.SourceAPI,
458462
},
459463
"should include user agent": {
460464
header: http.Header{"User-Agent": []string{"Grafana"}},
461465
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0 user_agent=Grafana`,
466+
source: tripperware.SourceAPI,
462467
},
463468
"should include response error": {
464469
responseErr: errors.New("foo_err"),
465470
expectedLog: `level=error msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0 error=foo_err`,
471+
source: tripperware.SourceAPI,
466472
},
467473
"should include query priority": {
468474
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -471,6 +477,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
471477
PriorityAssigned: true,
472478
},
473479
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0 query_length=2 priority=99 param_query=up`,
480+
source: tripperware.SourceAPI,
474481
},
475482
"should include data fetch min and max time": {
476483
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -479,6 +486,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
479486
DataSelectMinTime: 1704067200000,
480487
},
481488
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0 data_select_max_time=1704153600 data_select_min_time=1704067200 query_length=2 param_query=up`,
489+
source: tripperware.SourceAPI,
482490
},
483491
"should include query stats with store gateway stats": {
484492
queryStats: &querier_stats.QueryStats{
@@ -497,16 +505,32 @@ func TestReportQueryStatsFormat(t *testing.T) {
497505
},
498506
},
499507
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 samples_scanned=0 store_gateway_touched_postings_count=20 store_gateway_touched_posting_bytes=200 query_storage_wall_time_seconds=6000`,
508+
source: tripperware.SourceAPI,
509+
},
510+
"should not report a log": {
511+
expectedLog: ``,
512+
source: tripperware.SourceRuler,
513+
enabledRulerQueryStatsLog: false,
514+
},
515+
"should report a log": {
516+
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 samples_scanned=0`,
517+
source: tripperware.SourceRuler,
518+
enabledRulerQueryStatsLog: true,
500519
},
501520
}
502521

503522
for testName, testData := range tests {
504523
t.Run(testName, func(t *testing.T) {
524+
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true, EnabledRulerQueryStatsLog: testData.enabledRulerQueryStatsLog}, tenantfederation.Config{}, http.DefaultTransport, logger, nil)
505525
req.Header = testData.header
506-
handler.reportQueryStats(req, tripperware.SourceAPI, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
526+
handler.reportQueryStats(req, testData.source, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
507527
data, err := io.ReadAll(outputBuf)
508528
require.NoError(t, err)
509-
require.Equal(t, testData.expectedLog+"\n", string(data))
529+
if testData.expectedLog == "" {
530+
require.Empty(t, string(data))
531+
} else {
532+
require.Equal(t, testData.expectedLog+"\n", string(data))
533+
}
510534
})
511535
}
512536
}

0 commit comments

Comments
 (0)