Skip to content

Commit 23e281c

Browse files
authored
Merge pull request #165 from flant/feat_metrics_from_hooks
feat: allow to send metrics from hooks
2 parents 6f6ad3e + 1a76d91 commit 23e281c

File tree

6 files changed

+147
-20
lines changed

6 files changed

+147
-20
lines changed

METRICS.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,28 @@ Shell-operator exports Prometheus metrics to the `/metrics` path. The default po
88
* `shell_operator_hook_allowed_errors{hook="hook-name"}` – this is the counter of hooks’ execution errors. It only tracks errors of hooks that are allowed to exit with an error (the parameter `allowFailure: true` is set in the configuration). The metric has a “hook” label with the name of a failed hook.
99
* `shell_operator_tasks_queue_length` – a gauge showing the length of the working queue. This metric can be used to warn about stuck hooks. It has no labels.
1010
* `shell_operator_live_ticks` – a counter that increases every 10 seconds. This metric can be used for alerting about an unhealthy Shell-operator. It has no labels.
11+
12+
## Custom metrics
13+
14+
Hooks can export metrics by writing a set of operation on JSON format into $METRICS_PATH file.
15+
16+
Operation to increase a counter:
17+
18+
```json
19+
{"name":"metric_name","add":1,"labels":{"label1":"value1"}}
20+
```
21+
22+
Operation to set a value for a gauge:
23+
24+
```json
25+
{"name":"metric_name","set":33,"labels":{"label1":"value1"}}
26+
```
27+
28+
Labels are not required, but Shell-operator adds a `hook` label.
29+
30+
Several metrics can be expored at once. For example, this script will create 2 metrics:
31+
32+
```
33+
echo '{"name":"hook_metric_count","add":1,"labels":{"label1":"value1"}}' >> $METRICS_PATH
34+
echo '{"name":"hook_metrics_items","add":1,"labels":{"label1":"value1"}}' >> $METRICS_PATH
35+
```

go.sum

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ github.com/flant/go-openapi-validate v0.19.4-0.20200313141509-0c0fba4d39e1/go.mo
9999
github.com/flant/libjq-go v1.0.1-0.20200205115921-27e93c18c17f h1:3tmztWJjf61sHfHYLSMi5TDdz5jtmcVqe43PSwsxNvE=
100100
github.com/flant/libjq-go v1.0.1-0.20200205115921-27e93c18c17f/go.mod h1:+SYqi5wsNjtQVlkPg0Ep5IOuN+ydg79Jo/gk4/PuS8c=
101101
github.com/flant/libjq-go v1.6.1-0.20200331115542-04a1a2e80daa/go.mod h1:+SYqi5wsNjtQVlkPg0Ep5IOuN+ydg79Jo/gk4/PuS8c=
102+
github.com/flant/libjq-go v1.6.1-0.20200401092614-198670408da1 h1:pOPBJDB7PZz/SKa13mlR3bvkRJ0KWKRe6v+KZOObkKw=
102103
github.com/flant/libjq-go v1.6.1-0.20200401092614-198670408da1/go.mod h1:+SYqi5wsNjtQVlkPg0Ep5IOuN+ydg79Jo/gk4/PuS8c=
103104
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
104105
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=

pkg/hook/hook.go

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/flant/shell-operator/pkg/app"
1818
"github.com/flant/shell-operator/pkg/executor"
1919
"github.com/flant/shell-operator/pkg/hook/controller"
20+
"github.com/flant/shell-operator/pkg/metrics_storage"
2021
)
2122

2223
type CommonHook interface {
@@ -66,38 +67,50 @@ func (h *Hook) GetHookController() controller.HookController {
6667
return h.HookController
6768
}
6869

69-
func (h *Hook) Run(bindingType BindingType, context []BindingContext, logLabels map[string]string) error {
70+
func (h *Hook) Run(bindingType BindingType, context []BindingContext, logLabels map[string]string) ([]metrics_storage.MetricOperation, error) {
7071
// Refresh snapshots
7172
freshBindingContext := h.HookController.UpdateSnapshots(context)
7273

7374
versionedContextList := ConvertBindingContextList(h.Config.Version, freshBindingContext)
7475

7576
contextPath, err := h.prepareBindingContextJsonFile(versionedContextList)
7677
if err != nil {
77-
return err
78+
return nil, err
7879
}
80+
81+
metricsPath, err := h.prepareMetricsFile()
82+
if err != nil {
83+
return nil, err
84+
}
85+
7986
// remove tmp file on hook exit
8087
defer func() {
81-
if app.DebugKeepTmpFiles == "yes" {
82-
return
88+
if app.DebugKeepTmpFiles != "yes" {
89+
os.Remove(contextPath)
90+
os.Remove(metricsPath)
8391
}
84-
os.Remove(contextPath)
8592
}()
8693

8794
envs := []string{}
8895
envs = append(envs, os.Environ()...)
8996
if contextPath != "" {
9097
envs = append(envs, fmt.Sprintf("BINDING_CONTEXT_PATH=%s", contextPath))
98+
envs = append(envs, fmt.Sprintf("METRICS_PATH=%s", metricsPath))
9199
}
92100

93101
hookCmd := executor.MakeCommand(path.Dir(h.Path), h.Path, []string{}, envs)
94102

95103
err = executor.RunAndLogLines(hookCmd, logLabels)
96104
if err != nil {
97-
return fmt.Errorf("%s FAILED: %s", h.Name, err)
105+
return nil, fmt.Errorf("%s FAILED: %s", h.Name, err)
106+
}
107+
108+
metrics, err := metrics_storage.MetricOperationsFromFile(metricsPath)
109+
if err != nil {
110+
return nil, fmt.Errorf("got bad metrics: %s", err)
98111
}
99112

100-
return nil
113+
return metrics, nil
101114
}
102115

103116
func (h *Hook) SafeName() string {
@@ -150,3 +163,14 @@ func (h *Hook) prepareBindingContextJsonFile(context BindingContextList) (string
150163

151164
return bindingContextPath, nil
152165
}
166+
167+
func (h *Hook) prepareMetricsFile() (string, error) {
168+
metricsPath := filepath.Join(h.TmpDir, fmt.Sprintf("hook-%s-metrics-%s.json", h.SafeName(), uuid.NewV4().String()))
169+
170+
err := ioutil.WriteFile(metricsPath, []byte{}, 0644)
171+
if err != nil {
172+
return "", err
173+
}
174+
175+
return metricsPath, nil
176+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package metrics_storage
2+
3+
import (
4+
"bytes"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"io/ioutil"
9+
)
10+
11+
type MetricOperation struct {
12+
Name string `json:"name"`
13+
Add *float64 `json:"add,omitempty"`
14+
Set *float64 `json:"set,omitempty"`
15+
Labels map[string]string `json:"labels"`
16+
}
17+
18+
func MetricOperationsFromReader(r io.Reader) ([]MetricOperation, error) {
19+
var operations = make([]MetricOperation, 0)
20+
21+
dec := json.NewDecoder(r)
22+
for {
23+
var metricOperation MetricOperation
24+
if err := dec.Decode(&metricOperation); err == io.EOF {
25+
break
26+
} else if err != nil {
27+
return nil, err
28+
}
29+
30+
operations = append(operations, metricOperation)
31+
}
32+
33+
return operations, nil
34+
}
35+
36+
func MetricOperationsFromBytes(data []byte) ([]MetricOperation, error) {
37+
return MetricOperationsFromReader(bytes.NewReader(data))
38+
}
39+
40+
func MetricOperationsFromFile(filePath string) ([]MetricOperation, error) {
41+
data, err := ioutil.ReadFile(filePath)
42+
if err != nil {
43+
return nil, fmt.Errorf("cannot read %s: %s", filePath, err)
44+
}
45+
46+
if len(data) == 0 {
47+
return nil, nil
48+
}
49+
return MetricOperationsFromBytes(data)
50+
}

pkg/metrics_storage/metrics_storage.go

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package metrics_storage
22

33
import (
44
"context"
5+
"fmt"
56

7+
utils "github.com/flant/shell-operator/pkg/utils/labels"
68
"github.com/prometheus/client_golang/prometheus"
79
log "github.com/sirupsen/logrus"
810
)
@@ -38,31 +40,49 @@ func (m *MetricStorage) Stop() {
3840
}
3941
}
4042

41-
func (storage *MetricStorage) Start() {
43+
func (m *MetricStorage) Start() {
4244
go func() {
4345
for {
4446
select {
45-
case metric := <-storage.MetricChan:
46-
metric.store(storage)
47-
case <-storage.ctx.Done():
47+
case metric := <-m.MetricChan:
48+
metric.store(m)
49+
case <-m.ctx.Done():
4850
return
4951
}
5052
}
5153
}()
5254
}
5355

54-
func (storage *MetricStorage) SendGauge(metric string, value float64, labels map[string]string) {
55-
storage.MetricChan <- NewGaugeMetric(storage.Prefix+metric, value, labels)
56+
func (m *MetricStorage) SendGauge(metric string, value float64, labels map[string]string) {
57+
m.MetricChan <- NewGaugeMetric(m.Prefix+metric, value, labels)
5658
}
57-
func (storage *MetricStorage) SendCounter(metric string, value float64, labels map[string]string) {
58-
storage.MetricChan <- NewCounterMetric(storage.Prefix+metric, value, labels)
59+
func (m *MetricStorage) SendCounter(metric string, value float64, labels map[string]string) {
60+
m.MetricChan <- NewCounterMetric(m.Prefix+metric, value, labels)
5961
}
6062

61-
func (storage *MetricStorage) SendGaugeNoPrefix(metric string, value float64, labels map[string]string) {
62-
storage.MetricChan <- NewGaugeMetric(metric, value, labels)
63+
func (m *MetricStorage) SendGaugeNoPrefix(metric string, value float64, labels map[string]string) {
64+
m.MetricChan <- NewGaugeMetric(metric, value, labels)
6365
}
64-
func (storage *MetricStorage) SendCounterNoPrefix(metric string, value float64, labels map[string]string) {
65-
storage.MetricChan <- NewCounterMetric(metric, value, labels)
66+
func (m *MetricStorage) SendCounterNoPrefix(metric string, value float64, labels map[string]string) {
67+
m.MetricChan <- NewCounterMetric(metric, value, labels)
68+
}
69+
70+
func (m *MetricStorage) SendBatch(ops []MetricOperation, labels map[string]string) error {
71+
// Apply metric operations
72+
for _, metricOp := range ops {
73+
labels := utils.MergeLabels(metricOp.Labels, labels)
74+
75+
if metricOp.Add != nil {
76+
m.SendCounterNoPrefix(metricOp.Name, *metricOp.Add, labels)
77+
continue
78+
}
79+
if metricOp.Set != nil {
80+
m.SendGaugeNoPrefix(metricOp.Name, *metricOp.Set, labels)
81+
continue
82+
}
83+
return fmt.Errorf("no operation in metric from module hook, name=%s", metricOp.Name)
84+
}
85+
return nil
6686
}
6787

6888
type Metric interface {

pkg/shell-operator/operator.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,14 @@ func (op *ShellOperator) TaskHandler(t task.Task) queue.TaskResult {
294294
}
295295
}
296296

297-
err := taskHook.Run(hookMeta.BindingType, hookMeta.BindingContext, hookLogLabels)
297+
metrics, err := taskHook.Run(hookMeta.BindingType, hookMeta.BindingContext, hookLogLabels)
298+
299+
if err == nil {
300+
err = op.MetricStorage.SendBatch(metrics, map[string]string{
301+
"hook": hookMeta.HookName,
302+
})
303+
}
304+
298305
if err != nil {
299306
hookLabel := taskHook.SafeName()
300307

0 commit comments

Comments
 (0)