Skip to content

Commit 5b2f6f5

Browse files
simonswinejtlisi
andauthored
Add overrides-exporter to cortextool (#91)
Exports Cortex runtime configuration overrides as metrics. Signed-off-by: Christian Simon <[email protected]> Co-authored-by: Jacob Lisi <[email protected]>
1 parent cd544fd commit 5b2f6f5

File tree

11 files changed

+1123
-6
lines changed

11 files changed

+1123
-6
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## unreleased / master
44

55
* [ENHANCEMENT] Loadgen: Allow users to selectively disable query or write loadgen by leaving their respective URL configs empty. #95
6+
* [FEATURE] Add overrides-exporter to cortextool, which exports Cortex runtime configuration overrides as metrics. #91
67

78
## v0.3.2
89

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ This commands checks rules against the recommended [best practices](https://prom
9696

9797
cortextool rules check ./example_rules_one.yaml
9898

99+
100+
#### Overrides Exporter
101+
102+
The Overrides Exporter allows to continuously export [per tenant configuration overrides][runtime-config] as metrics. Optionally it can also export a presets file (cf. example [override config file] and [presets file]).
103+
104+
cortextool overrides-exporter --overrides-file overrides.yaml --presets-file presets.yaml
105+
106+
[override config file]:./pkg/commands/testdata/overrides.yaml
107+
[presets file]:./pkg/commands/testdata/presets.yaml
108+
[runtime-config]:https://cortexmetrics.io/docs/configuration/arguments/#runtime-configuration-file
109+
99110
## chunktool
100111

101112
This repo also contains the `chunktool`. A client meant to interact with chunks stored and indexed in cortex backends.

cmd/cortextool/main.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ import (
1111
)
1212

1313
var (
14-
ruleCommand commands.RuleCommand
15-
alertCommand commands.AlertCommand
16-
alertmanagerCommand commands.AlertmanagerCommand
17-
logConfig commands.LoggerConfig
18-
pushGateway commands.PushGatewayConfig
19-
loadgenCommand commands.LoadgenCommand
14+
ruleCommand commands.RuleCommand
15+
alertCommand commands.AlertCommand
16+
alertmanagerCommand commands.AlertmanagerCommand
17+
logConfig commands.LoggerConfig
18+
pushGateway commands.PushGatewayConfig
19+
loadgenCommand commands.LoadgenCommand
20+
overridesExporterCommand = commands.NewOverridesExporterCommand()
2021
)
2122

2223
func main() {
@@ -27,6 +28,7 @@ func main() {
2728
ruleCommand.Register(app)
2829
pushGateway.Register(app)
2930
loadgenCommand.Register(app)
31+
overridesExporterCommand.Register(app)
3032

3133
app.Command("version", "Get the version of the cortextool CLI").Action(func(k *kingpin.ParseContext) error {
3234
fmt.Print(version.Template)

pkg/commands/overrides_exporter.go

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
package commands
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io/ioutil"
8+
"net/http"
9+
"os"
10+
"os/signal"
11+
"sync"
12+
"time"
13+
14+
"github.com/cortexproject/cortex/pkg/util/validation"
15+
"github.com/prometheus/client_golang/prometheus"
16+
"github.com/prometheus/client_golang/prometheus/promauto"
17+
"github.com/prometheus/client_golang/prometheus/promhttp"
18+
"github.com/sirupsen/logrus"
19+
"gopkg.in/alecthomas/kingpin.v2"
20+
"gopkg.in/yaml.v3"
21+
)
22+
23+
type OverridesExporterCommand struct {
24+
listenAddress string
25+
metricsEndpoint string
26+
overridesFilePath string
27+
presetsFilePath string
28+
refreshInterval time.Duration
29+
30+
registry *prometheus.Registry
31+
overrideGauge *prometheus.GaugeVec
32+
}
33+
34+
func NewOverridesExporterCommand() *OverridesExporterCommand {
35+
registry := prometheus.NewRegistry()
36+
return &OverridesExporterCommand{
37+
registry: registry,
38+
overrideGauge: promauto.With(registry).NewGaugeVec(prometheus.GaugeOpts{
39+
Name: "cortex_overrides",
40+
Help: "Various different limits.",
41+
}, []string{"limit_type", "type", "user"}),
42+
}
43+
}
44+
45+
func (o *OverridesExporterCommand) Register(app *kingpin.Application) {
46+
overridesExporterCommand := app.Command("overrides-exporter", "The overrides exporter allow to expose metrics about the runtime configuration of Cortex.").Action(o.run)
47+
overridesExporterCommand.Flag("overrides-file", "File path where overrides config is stored.").Required().StringVar(&o.overridesFilePath)
48+
// Presets are the small user, medium user, etc config that we have defined.
49+
overridesExporterCommand.Flag("presets-file", "File path where presets config stored.").Default("").StringVar(&o.presetsFilePath)
50+
overridesExporterCommand.Flag("listen-address", "Address on which to expose metrics.").Default(":9683").StringVar(&o.listenAddress)
51+
overridesExporterCommand.Flag("metrics-endpoint", "Path under which to expose metrics.").Default("/metrics").StringVar(&o.metricsEndpoint)
52+
overridesExporterCommand.Flag("refresh-interval", "Interval how often the overrides and potentially presets files get refreshed.").Default("1m").DurationVar(&o.refreshInterval)
53+
}
54+
55+
func (o *OverridesExporterCommand) updateOverridesMetrics() error {
56+
if o.overridesFilePath == "" {
57+
return errors.New("overrides filepath is empty")
58+
}
59+
60+
logrus.Debug("updating overrides")
61+
62+
overrides := &struct {
63+
TenantLimits map[string]*validation.Limits `yaml:"overrides"`
64+
}{}
65+
bytes, err := ioutil.ReadFile(o.overridesFilePath)
66+
if err != nil {
67+
return fmt.Errorf("failed to update overrides, err: %w", err)
68+
}
69+
if err := yaml.Unmarshal(bytes, overrides); err != nil {
70+
return fmt.Errorf("failed to update overrides, err: %w", err)
71+
}
72+
o.updateMetrics("tenant", overrides.TenantLimits)
73+
74+
return nil
75+
}
76+
77+
func (o *OverridesExporterCommand) updatePresetsMetrics() error {
78+
if o.presetsFilePath == "" {
79+
return nil
80+
}
81+
82+
logrus.Debug("updating presets")
83+
84+
presets := &struct {
85+
Presets map[string]*validation.Limits `yaml:"presets"`
86+
}{}
87+
bytes, err := ioutil.ReadFile(o.presetsFilePath)
88+
if err != nil {
89+
return fmt.Errorf("failed to update presets, error reading file: %w", err)
90+
}
91+
if err := yaml.Unmarshal(bytes, presets); err != nil {
92+
return fmt.Errorf("failed to update presets, error parsing YAML: %w", err)
93+
}
94+
o.updateMetrics("preset", presets.Presets)
95+
return nil
96+
}
97+
98+
func (o *OverridesExporterCommand) updateMetrics(typ string, limitsMap map[string]*validation.Limits) {
99+
for user, limits := range limitsMap {
100+
o.overrideGauge.WithLabelValues(
101+
"max_series_per_query", typ, user,
102+
).Set(float64(limits.MaxSeriesPerQuery))
103+
o.overrideGauge.WithLabelValues(
104+
"max_samples_per_query", typ, user,
105+
).Set(float64(limits.MaxSamplesPerQuery))
106+
o.overrideGauge.WithLabelValues(
107+
"max_local_series_per_user", typ, user,
108+
).Set(float64(limits.MaxLocalSeriesPerUser))
109+
o.overrideGauge.WithLabelValues(
110+
"max_local_series_per_metric", typ, user,
111+
).Set(float64(limits.MaxLocalSeriesPerMetric))
112+
o.overrideGauge.WithLabelValues(
113+
"max_global_series_per_user", typ, user,
114+
).Set(float64(limits.MaxGlobalSeriesPerUser))
115+
o.overrideGauge.WithLabelValues(
116+
"max_global_series_per_metric", typ, user,
117+
).Set(float64(limits.MaxGlobalSeriesPerMetric))
118+
o.overrideGauge.WithLabelValues(
119+
"ingestion_rate", typ, user,
120+
).Set(limits.IngestionRate)
121+
o.overrideGauge.WithLabelValues(
122+
"ingestion_burst_size", typ, user,
123+
).Set(float64(limits.IngestionBurstSize))
124+
}
125+
}
126+
127+
func (o *OverridesExporterCommand) run(k *kingpin.ParseContext) error {
128+
if o.overridesFilePath == "" {
129+
return errors.New("Empty overrides file path")
130+
}
131+
132+
// Update the metrics once before starting.
133+
if err := o.updateOverridesMetrics(); err != nil {
134+
return err
135+
}
136+
if err := o.updatePresetsMetrics(); err != nil {
137+
return err
138+
}
139+
140+
stopCh := make(chan struct{})
141+
var wg sync.WaitGroup
142+
defer func() {
143+
close(stopCh)
144+
wg.Wait()
145+
}()
146+
147+
// Update the metrics every 1 minute.
148+
wg.Add(1)
149+
go func() {
150+
defer wg.Done()
151+
152+
for {
153+
select {
154+
case <-stopCh:
155+
return
156+
case <-time.After(o.refreshInterval):
157+
if err := o.updateOverridesMetrics(); err != nil {
158+
logrus.Warnf("error updating override metrics: %s", err)
159+
}
160+
if err := o.updatePresetsMetrics(); err != nil {
161+
logrus.Warnf("error updating presets metrics: %s", err)
162+
}
163+
}
164+
}
165+
}()
166+
167+
mux := http.NewServeMux()
168+
mux.Handle(o.metricsEndpoint, promhttp.HandlerFor(o.registry, promhttp.HandlerOpts{
169+
MaxRequestsInFlight: 10,
170+
Registry: o.registry,
171+
}))
172+
173+
mux.HandleFunc("/ready", func(w http.ResponseWriter, _ *http.Request) {
174+
http.Error(w, "ready", http.StatusOK)
175+
})
176+
177+
signalCh := make(chan os.Signal, 1)
178+
signal.Notify(signalCh, os.Interrupt)
179+
180+
server := &http.Server{
181+
Addr: o.listenAddress,
182+
Handler: mux,
183+
}
184+
185+
// Block until a signal is received.
186+
wg.Add(1)
187+
go func() {
188+
defer wg.Done()
189+
190+
select {
191+
case <-stopCh:
192+
return
193+
case s := <-signalCh:
194+
logrus.Infof("got signal: %s", s)
195+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
196+
defer cancel()
197+
if err := server.Shutdown(ctx); err != nil {
198+
logrus.Warnf("error shutting down http server: %s", err)
199+
}
200+
}
201+
}()
202+
203+
mode := "runtime config overrides"
204+
if o.presetsFilePath != "" {
205+
mode += " and presets"
206+
}
207+
logrus.Infof("exposing %s metrics on %s", mode, o.listenAddress)
208+
if err := server.ListenAndServe(); err != http.ErrServerClosed {
209+
return err
210+
}
211+
212+
return nil
213+
}

0 commit comments

Comments
 (0)