Skip to content

Commit 2168dc2

Browse files
authored
fix: expressionUsesOnlyAllowedLabelsForMetricRegexp logic (#98)
Signed-off-by: Martin Chodur <m.chodur@seznam.cz>
1 parent 6401cfa commit 2168dc2

File tree

7 files changed

+101
-36
lines changed

7 files changed

+101
-36
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [3.6.1] - 2024-12-01
11+
- Fixed: FIxed the `expressionUsesOnlyAllowedLabelsForMetricRegexp` validator that might return false positives when used on more complex expressions with vector matching and functions using labels.
12+
1013
## [3.6.0] - 2024-11-29
1114
- Added: Configuration file can now be in Jsonnet format if the config file name ends with `.jsonnet` and it will get automatically evaluated.
1215

docs/validations.md

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,9 @@ params:
268268

269269
#### `expressionUsesOnlyAllowedLabelsForMetricRegexp`
270270

271-
Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins.
272-
Different metric name matchers are handled as follows:
273-
* `{__name__="foo",..}, foo{...}` - `foo` is matched literally against given `metricNameRegexp`, if matches, expr is validated against `allowedLabels`
274-
* `{__name__=~"foo",..}` - skipped
275-
* `{__name__!="foo",}, {__name__!~"foo"}` - skipped
271+
Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins. If the metric name is omitted in the query, or matched using regexp or any negative matcher on the `__name__` label, the rule will be skipped.
272+
273+
The check rather ignores validation of labels, where it cannot be sure if they are targeting only the metric in question, like aggregations by labels on top of vector matching expression where the labels might come from the other part of the expr.
276274

277275
> If using kube-state-metrics for exposing labels information about K8S objects (kube_*_labels) only those labels whitelisted by kube-state-metrics admin will be available.
278276
> Might be useful to check that users does not use any other in their expressions.

pkg/validator/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ var registeredUniversalRuleValidators = map[string]validatorCreator{
2525
"validFunctionsOnCounters": newValidFunctionsOnCounters,
2626
"rateBeforeAggregation": newRateBeforeAggregation,
2727
"expressionDoesNotUseLabels": newExpressionDoesNotUseLabels,
28-
"expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUseOnlyWhitelistedLabelsForMetric,
28+
"expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUsesOnlyAllowedLabelsForMetricRegexp,
2929
"expressionDoesNotUseOlderDataThan": newExpressionDoesNotUseOlderDataThan,
3030
"expressionDoesNotUseRangeShorterThan": newExpressionDoesNotUseRangeShorterThan,
3131
"expressionDoesNotUseMetrics": newExpressionDoesNotUseMetrics,

pkg/validator/promql_expression.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ type expressionUsesOnlyAllowedLabelsForMetricRegexp struct {
127127
metricNameRegexp *regexp.Regexp
128128
}
129129

130-
func newExpressionUseOnlyWhitelistedLabelsForMetric(paramsConfig yaml.Node) (Validator, error) {
130+
func newExpressionUsesOnlyAllowedLabelsForMetricRegexp(paramsConfig yaml.Node) (Validator, error) {
131131
params := struct {
132132
AllowedLabels []string `yaml:"allowedLabels"`
133133
MetricNameRegexp string `yaml:"metricNameRegexp"`

pkg/validator/promql_expression_helpers.go

Lines changed: 70 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ package validator
22

33
import (
44
"fmt"
5+
"maps"
56
"regexp"
7+
"slices"
68

79
"github.com/prometheus/prometheus/model/labels"
810
"github.com/prometheus/prometheus/promql/parser"
9-
"golang.org/x/exp/slices"
1011
)
1112

1213
func init() {
@@ -37,9 +38,18 @@ func labelsUsedInSelectorForMetric(selector *parser.VectorSelector, metricRegexp
3738
return usedLabels, metricUsed
3839
}
3940

41+
func parserCallStringArgValue(e parser.Expr) string {
42+
val, ok := e.(*parser.StringLiteral)
43+
if !ok {
44+
return "" // Ignore the error, this shouldn't happen anyway, parser should already catch this.
45+
}
46+
return val.Val
47+
}
48+
4049
// Returns a list of labels which are used in given expr in relation to given metric.
41-
// Beside labels within vector selector itself, it adds labels used in Aggregate expressions and labels used in Binary expression.
42-
// For Binary expressions it may report false positives as the current implementation does not consider on which side of group_left/group_right is the given metric.
50+
// It traverses the whole expression tree top to bottom and collects all labels used in selectors, operators, functions etc.
51+
// In case of vector matching, it also collects labels used in vector matching only relevant to the part of the expression where the metric is used.
52+
// If the vector matching uses grouping, any labels used on top of the expression are not validated, since they might come from the other side of the expression.
4353
func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp) ([]string, error) {
4454
promQl, err := parser.ParseExpr(expr)
4555
if err != nil {
@@ -49,29 +59,72 @@ func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp)
4959
var usedLabels []string
5060

5161
labelsUpInExpr := func(path []parser.Node) []string {
52-
usedLabels := []string{}
53-
for _, n := range path {
62+
usedLabels := map[string]struct{}{}
63+
for i, n := range path {
5464
switch v := n.(type) {
5565
case *parser.AggregateExpr:
56-
usedLabels = append(usedLabels, v.Grouping...)
66+
for _, l := range v.Grouping {
67+
usedLabels[l] = struct{}{}
68+
}
5769
case *parser.BinaryExpr:
58-
if v.VectorMatching != nil {
59-
usedLabels = append(usedLabels, v.VectorMatching.Include...)
60-
usedLabels = append(usedLabels, v.VectorMatching.MatchingLabels...)
70+
if v.VectorMatching == nil {
71+
continue
72+
}
73+
// If any group_left/group_right is used, we need to reset the used labels, since any labels used on top of this expression might come from the other side of the expression.
74+
if v.VectorMatching.Include != nil {
75+
usedLabels = map[string]struct{}{}
76+
}
77+
// Validate only the on(...) labels. The ignoring(...) might target the other side of the binary expression.
78+
if v.VectorMatching.On {
79+
for _, l := range v.VectorMatching.MatchingLabels {
80+
usedLabels[l] = struct{}{}
81+
}
82+
}
83+
// We want to validate the group_left/group_right labels only if the validated metric is on the "one" of the many-one/one-to.many side.
84+
nextExpr := path[i+1].String()
85+
if (v.VectorMatching.Card == parser.CardManyToOne && v.RHS.String() == nextExpr) || (v.VectorMatching.Card == parser.CardOneToMany && v.LHS.String() == nextExpr) {
86+
for _, l := range v.VectorMatching.Include {
87+
usedLabels[l] = struct{}{}
88+
}
89+
}
90+
case *parser.Call:
91+
switch v.Func.Name {
92+
case "label_replace":
93+
// Any PromQL "above" this label_replace can use the destination synthetic label, so drop it from the list of already used labels.
94+
delete(usedLabels, parserCallStringArgValue(v.Args[1]))
95+
usedLabels[parserCallStringArgValue(v.Args[3])] = struct{}{} // The source_label is interesting for us
96+
case "label_join":
97+
delete(usedLabels, parserCallStringArgValue(v.Args[1]))
98+
// label_join is variadic, so we need to iterate over all labels that are used in the expression
99+
for _, l := range v.Args[3:] {
100+
usedLabels[parserCallStringArgValue(l)] = struct{}{}
101+
}
102+
case "sort_by_label":
103+
for _, l := range v.Args[1:] {
104+
usedLabels[parserCallStringArgValue(l)] = struct{}{}
105+
}
106+
case "sort_by_label_desc":
107+
for _, l := range v.Args[1:] {
108+
usedLabels[parserCallStringArgValue(l)] = struct{}{}
109+
}
61110
}
62111
}
63112
}
64-
return usedLabels
113+
delete(usedLabels, "") // Used in case of errors so just drop it
114+
return slices.Collect(maps.Keys(usedLabels))
65115
}
66116

67117
parser.Inspect(promQl, func(n parser.Node, path []parser.Node) error {
68-
if v, isVectorSelector := n.(*parser.VectorSelector); isVectorSelector {
69-
selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp)
70-
if ok {
71-
metricInExpr = true
72-
usedLabels = append(usedLabels, selectorUsedLabels...)
73-
usedLabels = append(usedLabels, labelsUpInExpr(path)...)
74-
}
118+
v, isVectorSelector := n.(*parser.VectorSelector)
119+
if !isVectorSelector {
120+
return nil
121+
}
122+
selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp)
123+
if ok {
124+
metricInExpr = true
125+
usedLabels = append(usedLabels, selectorUsedLabels...)
126+
// The path does not contain the current node, so we need to append it since some cases need also the last node.
127+
usedLabels = append(usedLabels, labelsUpInExpr(append(path, n))...)
75128
}
76129
return nil
77130
})

pkg/validator/promql_expression_helpers_test.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package validator
22

33
import (
44
"errors"
5+
"fmt"
56
"reflect"
67
"regexp"
78
"testing"
@@ -48,16 +49,28 @@ func TestGetExpressionUsedLabelsForMetric(t *testing.T) {
4849
{expr: "kube_pod_labels{label_app!='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app"}},
4950
{expr: "kube_pod_labels{label_app='foo'} * on(pod) kube_pod_info{}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
5051
{expr: "kube_pod_info{} * on(pod) group_left(label_workload) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "label_workload", "pod"}},
51-
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}},
52-
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}},
52+
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
53+
{expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}},
54+
{expr: "sum(kube_pod_info * kube_pod_labels) by (foo)", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}},
55+
{expr: "label_replace(kube_pod_labels, 'bar', '$1', 'foo', '.*')", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}},
56+
{expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (foo)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3"}},
57+
{expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (bar)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3", "bar"}},
58+
{expr: `sum(kube_pod_labels * on (foo, bar) group_right(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
59+
{expr: `sum(kube_pod_labels * on (foo, bar) group_left(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar"}},
60+
{expr: `sum(kube_pod_labels * ignoring (foo, bar) group_left() kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel}},
61+
{expr: `sum(kube_pod_labels * on (foo, bar) kube_pod_info) by (baz)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
62+
{expr: `sort_by_label(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
63+
{expr: `sort_by_label_desc(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}},
5364
}
5465

55-
for _, test := range tests {
56-
labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric))
57-
assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels)
58-
if !errors.Is(err, test.expectedErr) {
59-
t.Errorf("Expected error %v, but got %v", test.expectedErr, err)
60-
}
66+
for i, test := range tests {
67+
t.Run(fmt.Sprintf("test_case_%d", i), func(t *testing.T) {
68+
labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric))
69+
assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels)
70+
if !errors.Is(err, test.expectedErr) {
71+
t.Errorf("Expected error %v, but got %v", test.expectedErr, err)
72+
}
73+
})
6174
}
6275
}
6376

pkg/validator/validator_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,8 @@ var testCases = []struct {
105105
{name: "ruleExprDoesUseForbiddenLabelInBy", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "sum(kube_pod_labels) by (app)"}, expectedErrors: 1},
106106
{name: "ruleExprDoesUseForbiddenLabelInOn", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) up"}, expectedErrors: 1},
107107
{name: "ruleExprDoesUseForbiddenLabelInGroup", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "group(kube_pod_labels) by (label_app)"}, expectedErrors: 1},
108-
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 2},
109-
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2},
110-
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1},
111-
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 1},
108+
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer1", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1},
109+
{name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer2", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2},
112110

113111
// expressionDoesNotUseOlderDataThan
114112
{name: "ruleExprDoesNotUseOlderData", validator: expressionDoesNotUseOlderDataThan{limit: time.Hour}, rule: rulefmt.Rule{Expr: "up{xxx='yyy'}"}, expectedErrors: 0},

0 commit comments

Comments
 (0)