Skip to content

Commit 1b66d85

Browse files
author
Foivos Filippopoulos
committed
Add prometheus metrics
1 parent 122fbaa commit 1b66d85

File tree

5 files changed

+235
-57
lines changed

5 files changed

+235
-57
lines changed

main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"strings"
99

1010
log "github.com/sirupsen/logrus"
11+
"github.com/utilitywarehouse/gcp-disk-snapshotter/metrics"
1112
"github.com/utilitywarehouse/gcp-disk-snapshotter/models"
1213
"github.com/utilitywarehouse/gcp-disk-snapshotter/snapshot"
1314
"github.com/utilitywarehouse/gcp-disk-snapshotter/watch"
@@ -76,13 +77,19 @@ func main() {
7677
for _, l := range snapshotConfigs.Labels {
7778
log.Debug("label: ", l.Label.Key, " ", l.Label.Value)
7879
}
80+
81+
// Init metrics
82+
metrics := &metrics.Prometheus{}
83+
metrics.Init()
84+
7985
// Create a snapshotter
8086
gsc := snapshot.CreateGCPSnapClient(project, snapPrefix, zones)
8187

8288
// Start watching
8389
watcher := &watch.Watcher{
8490
GSC: gsc,
8591
WatchInterval: watchInterval,
92+
Metrics: metrics,
8693
}
8794
watcher.Watch(snapshotConfigs)
8895

metrics/mock_prometheus.go

Lines changed: 73 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

metrics/prometheus.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
package metrics
2+
3+
import (
4+
"net/http"
5+
"strconv"
6+
7+
"github.com/prometheus/client_golang/prometheus"
8+
"github.com/prometheus/client_golang/prometheus/promhttp"
9+
log "github.com/sirupsen/logrus"
10+
"github.com/utilitywarehouse/go-operational/op"
11+
)
12+
13+
type Prometheus struct {
14+
createSnapshotSuccess *prometheus.CounterVec
15+
deleteSnapshotSuccess *prometheus.CounterVec
16+
operationSuccess *prometheus.CounterVec
17+
}
18+
19+
// PrometheusInterface allows for mocking out the functionality of Prometheus when testing the full process of an apply run.
20+
type PrometheusInterface interface {
21+
Init()
22+
UpdateCreateSnapshotStatus(disk string, success bool)
23+
UpdateDeleteSnapshotStatus(disk string, success bool)
24+
UpdateOperationStatus(operation_type string, success bool)
25+
}
26+
27+
func (p *Prometheus) Init() {
28+
p.createSnapshotSuccess = prometheus.NewCounterVec(prometheus.CounterOpts{
29+
Name: "gcp_disk_snapshotter_create_api_call_count",
30+
Help: "Success metric for snapshots created per disk",
31+
},
32+
[]string{
33+
// Path of the file that was applied
34+
"disk",
35+
// Result: true if creation api call was successful, false otherwise
36+
"success",
37+
},
38+
)
39+
p.deleteSnapshotSuccess = prometheus.NewCounterVec(prometheus.CounterOpts{
40+
Name: "gcp_disk_snapshotter_delete_api_call_count",
41+
Help: "Success metric for snapshots deletion per disk",
42+
},
43+
[]string{
44+
// Path of the file that was applied
45+
"disk",
46+
// Result: true if deletion api call was successful, false otherwise
47+
"success",
48+
},
49+
)
50+
p.operationSuccess = prometheus.NewCounterVec(prometheus.CounterOpts{
51+
Name: "gcp_disk_snapshotter_operation_count",
52+
Help: "Success metric for operations initiated by disk snapshotter",
53+
},
54+
[]string{
55+
// Global or Zonal
56+
"operation_type",
57+
// Result: true if the operation was successful, false otherwise
58+
"success",
59+
},
60+
)
61+
prometheus.MustRegister(p.createSnapshotSuccess)
62+
prometheus.MustRegister(p.deleteSnapshotSuccess)
63+
prometheus.MustRegister(p.operationSuccess)
64+
65+
go startServer()
66+
}
67+
68+
func startServer() {
69+
log.Info("starting HTTP endpoints ...")
70+
71+
mux := http.NewServeMux()
72+
mux.Handle("/__/", op.NewHandler(
73+
op.NewStatus("gcp-disk-snapshotter", "gcp-disk-snapshotter handles snapshot creation/deletion on gcp for a given set of disks").
74+
AddOwner("infrastructure", "#infra").
75+
AddLink("github", "https://github.com/utilitywarehouse/gcp-disk-snapshotter").
76+
SetRevision("master").
77+
AddChecker("running", func(cr *op.CheckResponse) { cr.Healthy("service is running") }).
78+
ReadyAlways(),
79+
))
80+
mux.Handle("/metrics", promhttp.Handler())
81+
82+
if err := http.ListenAndServe(":5000", mux); err != nil {
83+
log.Fatal("could not start HTTP router: ", err)
84+
}
85+
}
86+
87+
// UpdateCreateSnapshotStatus increments the given disk's Counter for either successful create attempts or failed apply attempts.
88+
func (p *Prometheus) UpdateCreateSnapshotStatus(disk string, success bool) {
89+
p.createSnapshotSuccess.With(prometheus.Labels{
90+
"disk": disk, "success": strconv.FormatBool(success),
91+
}).Inc()
92+
}
93+
94+
// UpdateDeleteSnapshotStatus increments the given disk's Counter for either successful delete attempts or failed apply attempts.
95+
func (p *Prometheus) UpdateDeleteSnapshotStatus(disk string, success bool) {
96+
p.deleteSnapshotSuccess.With(prometheus.Labels{
97+
"disk": disk, "success": strconv.FormatBool(success),
98+
}).Inc()
99+
}
100+
101+
// UpdateDeleteSnapshotStatus increments the given disk's Counter for either successful delete attempts or failed apply attempts.
102+
func (p *Prometheus) UpdateOperationStatus(operation_type string, success bool) {
103+
p.operationSuccess.With(prometheus.Labels{
104+
"operation_type": operation_type, "success": strconv.FormatBool(success),
105+
}).Inc()
106+
}

watch/watch.go

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"time"
66

77
log "github.com/sirupsen/logrus"
8+
"github.com/utilitywarehouse/gcp-disk-snapshotter/metrics"
89
"github.com/utilitywarehouse/gcp-disk-snapshotter/models"
910
"github.com/utilitywarehouse/gcp-disk-snapshotter/snapshot"
1011
compute "google.golang.org/api/compute/v1"
@@ -16,12 +17,13 @@ const GCPSnapshotTimestampLayout string = "2006-01-02T15:04:05Z07:00"
1617
type Watcher struct {
1718
GSC snapshot.GCPSnapClientInterface
1819
WatchInterval int
20+
Metrics metrics.PrometheusInterface
1921
}
2022

2123
type WatcherInterface interface {
2224
Watch(sc *models.SnapshotConfigs)
2325
CheckAndSnapDisks(disks []compute.Disk, retentionStart, lastAcceptedCreation int)
24-
deleteSnapshots(sl []compute.Snapshot)
26+
deleteSnapshot(s compute.Snapshot)
2527
createSnapshot(d compute.Disk)
2628
pollZonalOperation(operation, zone string)
2729
}
@@ -93,32 +95,38 @@ func (w *Watcher) CheckAndSnapDisks(disks []compute.Disk, retentionStart, lastAc
9395
}
9496

9597
// Delete old snaps
96-
if err := w.deleteSnapshots(snapsToDelete); err != nil {
97-
log.Error(err)
98+
for _, s := range snapsToDelete {
99+
if err := w.deleteSnapshot(s); err != nil {
100+
log.Error("error deleting snapshot: ", err)
101+
w.Metrics.UpdateDeleteSnapshotStatus(disk.Name, false)
102+
} else {
103+
w.Metrics.UpdateDeleteSnapshotStatus(disk.Name, true)
104+
}
98105
}
99106

100107
// Take snapshot if needed
101108
if snapNeeded {
102109
if err := w.createSnapshot(disk); err != nil {
103-
log.Error(err)
110+
log.Error("error creating snapshot: ", err)
111+
w.Metrics.UpdateCreateSnapshotStatus(disk.Name, false)
112+
} else {
113+
w.Metrics.UpdateCreateSnapshotStatus(disk.Name, true)
104114
}
105115
}
106116

107117
}
108118
}
109119

110-
func (w *Watcher) deleteSnapshots(sl []compute.Snapshot) error {
111-
for _, s := range sl {
112-
log.Info("Attempting to delete snapshot: ", s.Name)
113-
op, err := w.GSC.DeleteSnapshot(s.Name)
114-
if err != nil {
115-
return err
116-
}
120+
func (w *Watcher) deleteSnapshot(s compute.Snapshot) error {
121+
log.Info("Attempting to delete snapshot: ", s.Name)
122+
op, err := w.GSC.DeleteSnapshot(s.Name)
123+
if err != nil {
124+
return err
125+
}
117126

118-
// Delete snapshot is a global operation!!!
119-
go w.pollGlobalOperation(op)
127+
// Delete snapshot is a global operation!!!
128+
go w.pollGlobalOperation(op)
120129

121-
}
122130
return nil
123131
}
124132

@@ -141,10 +149,12 @@ func (w *Watcher) pollZonalOperation(operation, zone string) {
141149
status, err := w.GSC.GetZonalOperationStatus(operation, zone)
142150
if err != nil {
143151
log.Error("Operation failed: ", operation, err)
152+
w.Metrics.UpdateOperationStatus("zonal", false)
144153
break
145154
}
146155
if status == "DONE" {
147156
log.Info("Operation succeeded: ", operation)
157+
w.Metrics.UpdateOperationStatus("zonal", true)
148158
break
149159
}
150160
time.Sleep(1 * time.Second)
@@ -156,10 +166,12 @@ func (w *Watcher) pollGlobalOperation(operation string) {
156166
status, err := w.GSC.GetGlobalOperationStatus(operation)
157167
if err != nil {
158168
log.Error("Operation failed: ", operation, err)
169+
w.Metrics.UpdateOperationStatus("global", false)
159170
break
160171
}
161172
if status == "DONE" {
162173
log.Info("Operation succeeded: ", operation)
174+
w.Metrics.UpdateOperationStatus("global", true)
163175
break
164176
}
165177
time.Sleep(1 * time.Second)

0 commit comments

Comments
 (0)