Skip to content

Commit 798677a

Browse files
chaudumSusana Ferreira
andauthored
Fix panic in instant query splitting when using unwrapped rate (#6348)
* Fix panic in instant query splitting when using unwrapped rate The range aggregation `rate()` supports both log ranges and unwrapped ranges, e.g. `rate({app="foo"} [$__interval])` and `rate({app="foo"} | unwrap bar [$__interval])` Since `rate()` was split into multiple `count_over_time()` over total duration, but `count_over_time()` does not support `unwrap`, unwrapped rate queries caused panics. This fix changes the splitting of `rate({app="foo"} | unwrap bar [$__interval]` into multiple `sum_over_time()` over total duration. Fixes #6344 Signed-off-by: Christian Haudum <[email protected]> * Add tests Signed-off-by: Christian Haudum <[email protected]> * Integrate review feedback Co-authored-by: Susana Ferreira <[email protected]> Co-authored-by: Susana Ferreira <[email protected]>
1 parent 8dcc2d6 commit 798677a

File tree

3 files changed

+48
-6
lines changed

3 files changed

+48
-6
lines changed

pkg/logql/downstream_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
125125
{`min_over_time({a=~".+"} | unwrap b [2s])`, time.Second},
126126
{`min_over_time({a=~".+"} | unwrap b [2s]) by (a)`, time.Second},
127127
{`rate({a=~".+"}[2s])`, time.Second},
128+
{`rate({a=~".+"} | unwrap b [2s])`, time.Second},
128129
{`bytes_rate({a=~".+"}[2s])`, time.Second},
129130

130131
// sum
@@ -136,6 +137,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
136137
{`sum(min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
137138
{`sum(min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
138139
{`sum(rate({a=~".+"}[2s]))`, time.Second},
140+
{`sum(rate({a=~".+"} | unwrap b [2s]))`, time.Second},
139141
{`sum(bytes_rate({a=~".+"}[2s]))`, time.Second},
140142

141143
// sum by
@@ -147,6 +149,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
147149
{`sum by (a) (min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
148150
{`sum by (a) (min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
149151
{`sum by (a) (rate({a=~".+"}[2s]))`, time.Second},
152+
{`sum by (a) (rate({a=~".+"} | unwrap b [2s]))`, time.Second},
150153
{`sum by (a) (bytes_rate({a=~".+"}[2s]))`, time.Second},
151154

152155
// count
@@ -158,6 +161,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
158161
{`count(min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
159162
{`count(min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
160163
{`count(rate({a=~".+"}[2s]))`, time.Second},
164+
{`count(rate({a=~".+"} | unwrap b [2s]))`, time.Second},
161165
{`count(bytes_rate({a=~".+"}[2s]))`, time.Second},
162166

163167
// count by
@@ -169,6 +173,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
169173
{`count by (a) (min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
170174
{`count by (a) (min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
171175
{`count by (a) (rate({a=~".+"}[2s]))`, time.Second},
176+
{`count by (a) (rate({a=~".+"} | unwrap b [2s]))`, time.Second},
172177
{`count by (a) (bytes_rate({a=~".+"}[2s]))`, time.Second},
173178

174179
// max
@@ -180,6 +185,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
180185
{`max(min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
181186
{`max(min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
182187
{`max(rate({a=~".+"}[2s]))`, time.Second},
188+
{`max(rate({a=~".+"} | unwrap b [2s]))`, time.Second},
183189
{`max(bytes_rate({a=~".+"}[2s]))`, time.Second},
184190

185191
// max by
@@ -191,6 +197,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
191197
{`max by (a) (min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
192198
{`max by (a) (min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
193199
{`max by (a) (rate({a=~".+"}[2s]))`, time.Second},
200+
{`max by (a) (rate({a=~".+"} | unwrap b [2s]))`, time.Second},
194201
{`max by (a) (bytes_rate({a=~".+"}[2s]))`, time.Second},
195202

196203
// min
@@ -202,6 +209,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
202209
{`min(min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
203210
{`min(min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
204211
{`min(rate({a=~".+"}[2s]))`, time.Second},
212+
{`min(rate({a=~".+"} | unwrap b [2s]))`, time.Second},
205213
{`min(bytes_rate({a=~".+"}[2s]))`, time.Second},
206214

207215
// min by
@@ -213,6 +221,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
213221
{`min by (a) (min_over_time({a=~".+"} | unwrap b [2s]))`, time.Second},
214222
{`min by (a) (min_over_time({a=~".+"} | unwrap b [2s]) by (a))`, time.Second},
215223
{`min by (a) (rate({a=~".+"}[2s]))`, time.Second},
224+
{`min by (a) (rate({a=~".+"} | unwrap b [2s]))`, time.Second},
216225
{`min by (a) (bytes_rate({a=~".+"}[2s]))`, time.Second},
217226

218227
// Label extraction stage
@@ -227,6 +236,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
227236
{`sum(min_over_time({a=~".+"} | logfmt | unwrap line [2s]))`, time.Second},
228237
{`sum(min_over_time({a=~".+"} | logfmt | unwrap line [2s]) by (a))`, time.Second},
229238
{`sum(rate({a=~".+"} | logfmt[2s]))`, time.Second},
239+
{`sum(rate({a=~".+"} | logfmt | unwrap line [2s]))`, time.Second},
230240
{`sum(bytes_rate({a=~".+"} | logfmt[2s]))`, time.Second},
231241
{`sum by (a) (bytes_over_time({a=~".+"} | logfmt [2s]))`, time.Second},
232242
{`sum by (a) (count_over_time({a=~".+"} | logfmt [2s]))`, time.Second},
@@ -236,6 +246,7 @@ func TestRangeMappingEquivalence(t *testing.T) {
236246
{`sum by (a) (min_over_time({a=~".+"} | logfmt | unwrap line [2s]))`, time.Second},
237247
{`sum by (a) (min_over_time({a=~".+"} | logfmt | unwrap line [2s]) by (a))`, time.Second},
238248
{`sum by (a) (rate({a=~".+"} | logfmt[2s]))`, time.Second},
249+
{`sum by (a) (rate({a=~".+"} | logfmt | unwrap line [2s]))`, time.Second},
239250
{`sum by (a) (bytes_rate({a=~".+"} | logfmt[2s]))`, time.Second},
240251

241252
{`count(max_over_time({a=~".+"} | logfmt | unwrap line [2s]) by (a))`, time.Second},

pkg/logql/rangemapper.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,11 @@ func hasLabelExtractionStage(expr syntax.SampleExpr) bool {
183183
// sumOverFullRange returns an expression that sums up individual downstream queries (with preserving labels)
184184
// and dividing it by the full range in seconds to calculate a rate value.
185185
// The operation defines the range aggregation operation of the downstream queries.
186-
// Example:
186+
// Examples:
187187
// rate({app="foo"}[2m])
188188
// => (sum without (count_over_time({app="foo"}[1m]) ++ count_over_time({app="foo"}[1m]) offset 1m) / 120)
189+
// rate({app="foo"} | unwrap bar [2m])
190+
// => (sum without (sum_over_time({app="foo"}[1m]) ++ sum_over_time({app="foo"}[1m]) offset 1m) / 120)
189191
func (m RangeMapper) sumOverFullRange(expr *syntax.RangeAggregationExpr, overrideDownstream *syntax.VectorAggregationExpr, operation string, rangeInterval time.Duration, recorder *downstreamRecorder) syntax.SampleExpr {
190192
var downstreamExpr syntax.SampleExpr = &syntax.RangeAggregationExpr{
191193
Left: expr.Left,
@@ -373,7 +375,15 @@ func (m RangeMapper) mapRangeAggregationExpr(expr *syntax.RangeAggregationExpr,
373375
if labelExtractor && vectorAggrPushdown.Operation != syntax.OpTypeSum {
374376
return expr
375377
}
376-
return m.sumOverFullRange(expr, vectorAggrPushdown, syntax.OpRangeTypeCount, rangeInterval, recorder)
378+
// rate({app="foo"}[2m]) =>
379+
// => (sum without (count_over_time({app="foo"}[1m]) ++ count_over_time({app="foo"}[1m]) offset 1m) / 120)
380+
op := syntax.OpRangeTypeCount
381+
if expr.Left.Unwrap != nil {
382+
// rate({app="foo"} | unwrap bar [2m])
383+
// => (sum without (sum_over_time({app="foo"}[1m]) ++ sum_over_time({app="foo"}[1m]) offset 1m) / 120)
384+
op = syntax.OpRangeTypeSum
385+
}
386+
return m.sumOverFullRange(expr, vectorAggrPushdown, op, rangeInterval, recorder)
377387
case syntax.OpRangeTypeBytesRate:
378388
if labelExtractor && vectorAggrPushdown.Operation != syntax.OpTypeSum {
379389
return expr

pkg/logql/rangemapper_test.go

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ func Test_SplitRangeVectorMapping(t *testing.T) {
130130
++ downstream<count_over_time({app="foo"}[1m]), shard=<nil>>
131131
) / 180)`,
132132
},
133+
{
134+
`rate({app="foo"} | unwrap bar[3m])`,
135+
`(sum without(
136+
downstream<sum_over_time({app="foo"} | unwrap bar [1m] offset 2m0s), shard=<nil>>
137+
++ downstream<sum_over_time({app="foo"} | unwrap bar [1m] offset 1m0s), shard=<nil>>
138+
++ downstream<sum_over_time({app="foo"} | unwrap bar [1m]), shard=<nil>>
139+
) / 180)`,
140+
},
133141
{
134142
`bytes_rate({app="foo"}[3m])`,
135143
`(sum without(
@@ -1471,6 +1479,23 @@ func Test_SplitRangeVectorMapping(t *testing.T) {
14711479
)
14721480
)`,
14731481
},
1482+
1483+
// regression test queries
1484+
{
1485+
`topk(10,sum by (org_id) (rate({container="query-frontend",namespace="loki"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__="" [3m])))`,
1486+
`topk(10,
1487+
sum by (org_id) (
1488+
(
1489+
sum without(
1490+
downstream<sum by(org_id)(sum_over_time({container="query-frontend",namespace="loki"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__="" [1m] offset 2m0s)),shard=<nil>>
1491+
++ downstream<sum by(org_id)(sum_over_time({container="query-frontend",namespace="loki"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__="" [1m] offset 1m0s)),shard=<nil>>
1492+
++ downstream<sum by(org_id)(sum_over_time({container="query-frontend",namespace="loki"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__="" [1m])),shard=<nil>>
1493+
)
1494+
/ 180
1495+
)
1496+
)
1497+
)`,
1498+
},
14741499
} {
14751500
tc := tc
14761501
t.Run(tc.expr, func(t *testing.T) {
@@ -1500,10 +1525,6 @@ func Test_SplitRangeVectorMapping_Noop(t *testing.T) {
15001525
`sum(avg_over_time({app="foo"} | unwrap bar[3m]))`,
15011526
`sum(avg_over_time({app="foo"} | unwrap bar[3m]))`,
15021527
},
1503-
{ // this query caused a panic in ops
1504-
`topk(10,sum by (cluster,org_id) (rate({container="query-frontend",namespace="loki-prod",cluster="prod-us-central-0"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__=""[1h])))`,
1505-
`topk(10,sum by (cluster,org_id) (rate({container="query-frontend",namespace="loki-prod",cluster="prod-us-central-0"} |= "metrics.go" | logfmt | unwrap bytes(total_bytes) | __error__=""[1h])))`,
1506-
},
15071528

15081529
// should be noop if range interval is lower or equal to split interval (1m)
15091530
{

0 commit comments

Comments
 (0)