Skip to content

Commit 711b0fc

Browse files
authored
WIP: Add logic and command to lint recording rules (#46)
* Add logic and command to lint recording rules based on naming best practices. Signed-off-by: Callum Styan <[email protected]> * Rename recording rules names check, since we're not linting based on the definition of linting for the existing function. Also adds strict bool to the check. Signed-off-by: Callum Styan <[email protected]> * Don't try to evaluate recording rule names if the name is empty (because the rule is actually an alerting rule). Signed-off-by: Callum Styan <[email protected]>
1 parent 22629b3 commit 711b0fc

File tree

3 files changed

+116
-1
lines changed

3 files changed

+116
-1
lines changed

pkg/commands/rules.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ type RuleCommand struct {
6363
// Lint Rules Config
6464
LintDryRun bool
6565

66+
// Rules check flags
67+
Strict bool
68+
6669
DisableColor bool
6770
}
6871

@@ -99,6 +102,9 @@ func (r *RuleCommand) Register(app *kingpin.Application) {
99102
lintCmd := rulesCmd.
100103
Command("lint", "formats a set of rule files. It reorders keys alphabetically, uses 4 spaces as indentantion, and formats PromQL expressions to a single line.").
101104
Action(r.lint)
105+
checkCmd := rulesCmd.
106+
Command("check", "runs various best practice checks against rules.").
107+
Action(r.checkRecordingRuleNames)
102108

103109
// Require Cortex cluster address and tentant ID on all these commands
104110
for _, c := range []*kingpin.CmdClause{listCmd, printRulesCmd, getRuleGroupCmd, deleteRuleGroupCmd, loadRulesCmd, diffRulesCmd, syncRulesCmd} {
@@ -166,6 +172,14 @@ func (r *RuleCommand) Register(app *kingpin.Application) {
166172
"Comma separated list of paths to directories containing rules yaml files. Each file in a directory with a .yml or .yaml suffix will be parsed.",
167173
).StringVar(&r.RuleFilesPath)
168174
lintCmd.Flag("dry-run", "Performs a trial run that doesn't make any changes and (mostly) produces the same outpupt as a real run.").Short('n').BoolVar(&r.LintDryRun)
175+
176+
checkCmd.Arg("rule-files", "The rule files to check.").Required().ExistingFilesVar(&r.RuleFilesList)
177+
checkCmd.Flag("rule-files", "The rule files to check. Flag can be reused to load multiple files.").StringVar(&r.RuleFiles)
178+
checkCmd.Flag(
179+
"rule-dirs",
180+
"Comma separated list of paths to directories containing rules yaml files. Each file in a directory with a .yml or .yaml suffix will be parsed.",
181+
).StringVar(&r.RuleFilesPath)
182+
checkCmd.Flag("strict", "fails rules checks that do not match best practices exactly").BoolVar(&r.Strict)
169183
}
170184

171185
func (r *RuleCommand) setup(k *kingpin.ParseContext) error {
@@ -556,6 +570,27 @@ func (r *RuleCommand) lint(k *kingpin.ParseContext) error {
556570
return nil
557571
}
558572

573+
func (r *RuleCommand) checkRecordingRuleNames(k *kingpin.ParseContext) error {
574+
err := r.setupFiles()
575+
if err != nil {
576+
return errors.Wrap(err, "check operation unsuccessful, unable to load rules files")
577+
}
578+
579+
namespaces, err := rules.ParseFiles(r.RuleFilesList)
580+
if err != nil {
581+
return errors.Wrap(err, "check operation unsuccessful, unable to parse rules files")
582+
}
583+
584+
for _, ruleNamespace := range namespaces {
585+
n := ruleNamespace.CheckRecordingRules(r.Strict)
586+
if n != 0 {
587+
return fmt.Errorf("%d erroneous recording rule names", n)
588+
}
589+
}
590+
591+
return nil
592+
}
593+
559594
// save saves a set of rule files to to disk. You can specify whenever you want the
560595
// file(s) to be edited in-place.
561596
func save(nss map[string]rules.RuleNamespace, i bool) error {

pkg/rules/rules.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ type RuleNamespace struct {
2121
}
2222

2323
// LintPromQLExpressions runs the `expr` from a rule through the PromQL parser and
24-
// compares its output. if it differs from the parser, it uses the parser's instead.
24+
// compares its output. If it differs from the parser, it uses the parser's instead.
2525
func (r RuleNamespace) LintPromQLExpressions() (int, int, error) {
2626
// `count` represents the number of rules we evalated.
2727
// `mod` represents the number of rules linted.
@@ -51,6 +51,39 @@ func (r RuleNamespace) LintPromQLExpressions() (int, int, error) {
5151
return count, mod, nil
5252
}
5353

54+
// CheckRecordingRules checks that recording rules have at least one colon in their name, this is based
55+
// on the recording rules best practices here: https://prometheus.io/docs/practices/rules/
56+
// Returns the number of rules that don't match the requirements, and error if that number is not 0.
57+
func (r RuleNamespace) CheckRecordingRules(strict bool) int {
58+
var name string
59+
var count int
60+
reqChunks := 2
61+
if strict {
62+
reqChunks = 3
63+
}
64+
for _, group := range r.Groups {
65+
for _, rule := range group.Rules {
66+
// Assume if there is a rule.Record that this is a recording rule.
67+
if rule.Record == "" {
68+
continue
69+
}
70+
name = rule.Record
71+
log.WithFields(log.Fields{"rule": name}).Debugf("linting recording rule name")
72+
chunks := strings.Split(name, ":")
73+
if len(chunks) < reqChunks {
74+
count++
75+
log.WithFields(log.Fields{
76+
"rule": getRuleName(rule),
77+
"ruleGroup": group.Name,
78+
"file": r.Filepath,
79+
"error": "recording rule name does not match level:metric:operation format, must contain at least one colon",
80+
}).Errorf("bad recording rule name")
81+
}
82+
}
83+
}
84+
return count
85+
}
86+
5487
// AggregateBy modifies the aggregation rules in groups to include a given Label.
5588
func (r RuleNamespace) AggregateBy(label string) (int, int, error) {
5689
// `count` represents the number of rules we evalated.

pkg/rules/rules_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,50 @@ func TestLintPromQLExpressions(t *testing.T) {
155155
})
156156
}
157157
}
158+
159+
func TestCheckRecordingRules(t *testing.T) {
160+
tt := []struct {
161+
name string
162+
ruleName string
163+
count int
164+
strict bool
165+
}{
166+
{
167+
name: "follows rule name conventions",
168+
ruleName: "level:metric:operation",
169+
count: 0,
170+
},
171+
{
172+
name: "doesn't follow rule name conventions",
173+
ruleName: "level_metric_operation",
174+
count: 1,
175+
},
176+
{
177+
name: "almost follows rule name conventions",
178+
ruleName: "level:metric_operation",
179+
count: 1,
180+
strict: true,
181+
},
182+
{
183+
name: "almost follows rule name conventions",
184+
ruleName: "level:metric_operation",
185+
count: 0,
186+
},
187+
{
188+
name: "follows rule name conventions extra",
189+
ruleName: "level:metric:something_else:operation",
190+
count: 0,
191+
},
192+
}
193+
194+
for _, tc := range tt {
195+
t.Run(tc.name, func(t *testing.T) {
196+
r := RuleNamespace{Groups: []rulefmt.RuleGroup{{Rules: []rulefmt.Rule{
197+
{Record: tc.ruleName, Expr: "rate(some_metric_total)[5m]"},
198+
}}}}
199+
200+
n := r.CheckRecordingRules(tc.strict)
201+
require.Equal(t, tc.count, n, "failed rule: %s", tc.ruleName)
202+
})
203+
}
204+
}

0 commit comments

Comments
 (0)