@@ -21,6 +21,7 @@ import (
2121 "github.com/prometheus/client_golang/prometheus"
2222 "github.com/prometheus/client_golang/prometheus/promauto"
2323 "github.com/thanos-io/thanos/internal/cortex/frontend/transport/utils"
24+ "github.com/thanos-io/thanos/internal/cortex/querier/queryrange"
2425 querier_stats "github.com/thanos-io/thanos/internal/cortex/querier/stats"
2526 "github.com/thanos-io/thanos/internal/cortex/tenant"
2627 "github.com/thanos-io/thanos/internal/cortex/util"
@@ -43,12 +44,13 @@ var (
4344
4445// HandlerConfig Config for a Handler.
4546type HandlerConfig struct {
46- LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
47- MaxBodySize int64 `yaml:"max_body_size"`
48- QueryStatsEnabled bool `yaml:"query_stats_enabled"`
49- LogFailedQueries bool `yaml:"log_failed_queries"`
50- FailedQueryCacheCapacity int `yaml:"failed_query_cache_capacity"`
51- SlowQueryLogsUserHeader string `yaml:"slow_query_logs_user_header"`
47+ LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
48+ MaxBodySize int64 `yaml:"max_body_size"`
49+ QueryStatsEnabled bool `yaml:"query_stats_enabled"`
50+ LogFailedQueries bool `yaml:"log_failed_queries"`
51+ FailedQueryCacheCapacity int `yaml:"failed_query_cache_capacity"`
52+ SlowQueryLogsUserHeader string `yaml:"slow_query_logs_user_header"`
53+ LogQueriesMoreExpensiveThan uint64 `yaml:"log_queries_more_expensive_than"`
5254}
5355
5456// Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
@@ -60,12 +62,13 @@ type Handler struct {
6062 failedQueryCache * utils.FailedQueryCache
6163
6264 // Metrics.
63- querySeconds * prometheus.CounterVec
64- querySeries * prometheus.CounterVec
65- queryBytes * prometheus.CounterVec
66- activeUsers * util.ActiveUsersCleanupService
67- slowQueryCount prometheus.Counter
68- failedQueryCount prometheus.Counter
65+ querySeconds * prometheus.CounterVec
66+ querySeries * prometheus.CounterVec
67+ queryBytes * prometheus.CounterVec
68+ activeUsers * util.ActiveUsersCleanupService
69+ slowQueryCount prometheus.Counter
70+ failedQueryCount prometheus.Counter
71+ expensiveQueryCount prometheus.Counter
6972}
7073
7174// NewHandler creates a new frontend handler.
@@ -119,6 +122,10 @@ func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logge
119122 Name : "cortex_failed_query_total" ,
120123 Help : "Total number of failed queries detected." ,
121124 })
125+ h .expensiveQueryCount = promauto .With (reg ).NewCounter (prometheus.CounterOpts {
126+ Name : "cortex_expensive_query_total" ,
127+ Help : "Total number of expensive queries detected." ,
128+ })
122129 return h
123130}
124131
@@ -204,12 +211,17 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
204211
205212 // Check whether we should parse the query string.
206213 shouldReportSlowQuery := f .cfg .LogQueriesLongerThan != 0 && queryResponseTime > f .cfg .LogQueriesLongerThan
207- if shouldReportSlowQuery || f .cfg .QueryStatsEnabled {
214+ queryBytesFetched := queryrange .GetQueryBytesFetchedFromHeader (resp .Header )
215+ shouldReportExpensiveQuery := f .cfg .LogQueriesMoreExpensiveThan != 0 && queryBytesFetched > f .cfg .LogQueriesMoreExpensiveThan
216+ if shouldReportSlowQuery || shouldReportExpensiveQuery || f .cfg .QueryStatsEnabled {
208217 queryString = f .parseRequestQueryString (r , buf )
209218 }
210219
211220 if shouldReportSlowQuery {
212- f .reportSlowQuery (r , hs , queryString , queryResponseTime )
221+ f .reportSlowQuery (r , hs , queryString , queryBytesFetched , queryResponseTime )
222+ }
223+ if shouldReportExpensiveQuery {
224+ f .reportExpensiveQuery (r , queryString , queryBytesFetched , queryResponseTime )
213225 }
214226 if f .cfg .QueryStatsEnabled {
215227 f .reportQueryStats (r , queryString , queryResponseTime , stats )
@@ -245,8 +257,37 @@ func (f *Handler) reportFailedQuery(r *http.Request, queryString url.Values, err
245257 level .Error (util_log .WithContext (r .Context (), f .log )).Log (logMessage ... )
246258}
247259
260+ func (f * Handler ) reportExpensiveQuery (r * http.Request , queryString url.Values , queryBytesFetched uint64 , queryResponseTime time.Duration ) {
261+ f .expensiveQueryCount .Inc ()
262+ // NOTE(GiedriusS): see https://github.com/grafana/grafana/pull/60301 for more info.
263+ grafanaDashboardUID := "-"
264+ if dashboardUID := r .Header .Get ("X-Dashboard-Uid" ); dashboardUID != "" {
265+ grafanaDashboardUID = dashboardUID
266+ }
267+ grafanaPanelID := "-"
268+ if panelID := r .Header .Get ("X-Panel-Id" ); panelID != "" {
269+ grafanaPanelID = panelID
270+ }
271+ remoteUser , _ , _ := r .BasicAuth ()
272+
273+ logMessage := append ([]interface {}{
274+ "msg" , "expensive query" ,
275+ "method" , r .Method ,
276+ "host" , r .Host ,
277+ "path" , r .URL .Path ,
278+ "remote_user" , remoteUser ,
279+ "remote_addr" , r .RemoteAddr ,
280+ "query_megabytes_fetched" , queryBytesFetched / (1024 * 1024 ),
281+ "grafana_dashboard_uid" , grafanaDashboardUID ,
282+ "grafana_panel_id" , grafanaPanelID ,
283+ "query_response_time" , queryResponseTime .String (),
284+ }, formatQueryString (queryString )... )
285+
286+ level .Error (util_log .WithContext (r .Context (), f .log )).Log (logMessage ... )
287+ }
288+
248289// reportSlowQuery reports slow queries.
249- func (f * Handler ) reportSlowQuery (r * http.Request , responseHeaders http.Header , queryString url.Values , queryResponseTime time.Duration ) {
290+ func (f * Handler ) reportSlowQuery (r * http.Request , responseHeaders http.Header , queryString url.Values , queryBytesFetched uint64 , queryResponseTime time.Duration ) {
250291 f .slowQueryCount .Inc ()
251292 // NOTE(GiedriusS): see https://github.com/grafana/grafana/pull/60301 for more info.
252293 grafanaDashboardUID := "-"
@@ -278,6 +319,7 @@ func (f *Handler) reportSlowQuery(r *http.Request, responseHeaders http.Header,
278319 "remote_user" , remoteUser ,
279320 "remote_addr" , r .RemoteAddr ,
280321 "time_taken" , queryResponseTime .String (),
322+ "query_megabytes_fetched" , queryBytesFetched / (1024 * 1024 ),
281323 "grafana_dashboard_uid" , grafanaDashboardUID ,
282324 "grafana_panel_id" , grafanaPanelID ,
283325 "trace_id" , thanosTraceID ,
0 commit comments