Skip to content

Commit aef7565

Browse files
authored
Add metrics and keep app alive (#50)
* - Add metrics for sucessful and failed queries - Add keepAlive flag to not kill the app when the query fail * - Add metric names prefix - Remove duplicated matric calls - Add extra label to update duration metic
1 parent 0e81c96 commit aef7565

File tree

1 file changed

+32
-8
lines changed

1 file changed

+32
-8
lines changed

main.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,29 @@ import (
2626
"golang.org/x/net/context"
2727

2828
"github.com/prometheus/client_golang/prometheus"
29+
"github.com/prometheus/client_golang/prometheus/promauto"
2930
)
3031

3132
var (
32-
counterSources = flagx.StringArray{}
33-
gaugeSources = flagx.StringArray{}
34-
project = flag.String("project", "", "GCP project name.")
35-
refresh = flag.Duration("refresh", 5*time.Minute, "Interval between updating metrics.")
33+
gaugeSources = flagx.StringArray{}
34+
project = flag.String("project", "", "GCP project name.")
35+
refresh = flag.Duration("refresh", 5*time.Minute, "Interval between updating metrics.")
36+
keepAlive = flag.Bool("keepAlive", false, "Keep the process alive even if query fails to execute.")
37+
38+
successFilesCounter = promauto.NewCounterVec(prometheus.CounterOpts{
39+
Name: "bqx_success_files_executed_total",
40+
Help: "The total number of successfully executed files",
41+
}, []string{"filename"})
42+
43+
failedFilesCounter = promauto.NewCounterVec(prometheus.CounterOpts{
44+
Name: "bqx_failed_files_executed_total",
45+
Help: "The total number of failed executed files",
46+
}, []string{"filename"})
47+
updateDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
48+
Name: "bqx_query_runtime_duration_seconds",
49+
Help: "Duration taken for updating files",
50+
Buckets: []float64{.1, .25, .5, 1, 2.5, 5, 10, 30, 60, 120, 300, 600},
51+
}, []string{"filename", "status"})
3652
)
3753

3854
func init() {
@@ -69,12 +85,13 @@ func fileToQuery(filename string, vars map[string]string) string {
6985
return q
7086
}
7187

72-
func reloadRegisterUpdate(client *bigquery.Client, files []setup.File, vars map[string]string) {
88+
func reloadRegisterUpdate(client *bigquery.Client, files []setup.File, vars map[string]string, keepAlive bool) {
7389
var wg sync.WaitGroup
7490
for i := range files {
7591
wg.Add(1)
7692
go func(f *setup.File) {
7793
modified, err := f.IsModified()
94+
start := time.Now()
7895
if modified && err == nil {
7996
c := sql.NewCollector(
8097
newRunner(client), prometheus.GaugeValue,
@@ -85,14 +102,21 @@ func reloadRegisterUpdate(client *bigquery.Client, files []setup.File, vars map[
85102
// uses the same name but changes the metrics reported. Because
86103
// this cannot be recovered, we use rtx.Must to exit and allow
87104
// the runtime environment to restart.
88-
rtx.Must(f.Register(c), "Failed to register collector: aborting")
105+
err = f.Register(c)
106+
if !keepAlive {
107+
rtx.Must(f.Register(c), "Failed to register collector: aborting")
108+
}
89109
} else {
90-
start := time.Now()
91110
err = f.Update()
92111
log.Println("Updating:", fileToMetric(f.Name), time.Since(start))
93112
}
94113
if err != nil {
114+
failedFilesCounter.WithLabelValues(fileToMetric(f.Name)).Inc()
115+
updateDuration.WithLabelValues(fileToMetric(f.Name), "failed").Observe(time.Since(start).Seconds())
95116
log.Println("Error:", f.Name, err)
117+
} else {
118+
successFilesCounter.WithLabelValues(fileToMetric(f.Name)).Inc()
119+
updateDuration.WithLabelValues(fileToMetric(f.Name), "success").Observe(time.Since(start).Seconds())
96120
}
97121
wg.Done()
98122
}(&files[i])
@@ -125,7 +149,7 @@ func main() {
125149
}
126150

127151
for mainCtx.Err() == nil {
128-
reloadRegisterUpdate(client, files, vars)
152+
reloadRegisterUpdate(client, files, vars, *keepAlive)
129153
sleepUntilNext(*refresh)
130154
}
131155
}

0 commit comments

Comments
 (0)