@@ -26,13 +26,29 @@ import (
26
26
"golang.org/x/net/context"
27
27
28
28
"github.com/prometheus/client_golang/prometheus"
29
+ "github.com/prometheus/client_golang/prometheus/promauto"
29
30
)
30
31
31
32
var (
32
- counterSources = flagx.StringArray {}
33
- gaugeSources = flagx.StringArray {}
34
- project = flag .String ("project" , "" , "GCP project name." )
35
- refresh = flag .Duration ("refresh" , 5 * time .Minute , "Interval between updating metrics." )
33
+ gaugeSources = flagx.StringArray {}
34
+ project = flag .String ("project" , "" , "GCP project name." )
35
+ refresh = flag .Duration ("refresh" , 5 * time .Minute , "Interval between updating metrics." )
36
+ keepAlive = flag .Bool ("keepAlive" , false , "Keep the process alive even if query fails to execute." )
37
+
38
+ successFilesCounter = promauto .NewCounterVec (prometheus.CounterOpts {
39
+ Name : "bqx_success_files_executed_total" ,
40
+ Help : "The total number of successfully executed files" ,
41
+ }, []string {"filename" })
42
+
43
+ failedFilesCounter = promauto .NewCounterVec (prometheus.CounterOpts {
44
+ Name : "bqx_failed_files_executed_total" ,
45
+ Help : "The total number of failed executed files" ,
46
+ }, []string {"filename" })
47
+ updateDuration = promauto .NewHistogramVec (prometheus.HistogramOpts {
48
+ Name : "bqx_query_runtime_duration_seconds" ,
49
+ Help : "Duration taken for updating files" ,
50
+ Buckets : []float64 {.1 , .25 , .5 , 1 , 2.5 , 5 , 10 , 30 , 60 , 120 , 300 , 600 },
51
+ }, []string {"filename" , "status" })
36
52
)
37
53
38
54
func init () {
@@ -69,12 +85,13 @@ func fileToQuery(filename string, vars map[string]string) string {
69
85
return q
70
86
}
71
87
72
- func reloadRegisterUpdate (client * bigquery.Client , files []setup.File , vars map [string ]string ) {
88
+ func reloadRegisterUpdate (client * bigquery.Client , files []setup.File , vars map [string ]string , keepAlive bool ) {
73
89
var wg sync.WaitGroup
74
90
for i := range files {
75
91
wg .Add (1 )
76
92
go func (f * setup.File ) {
77
93
modified , err := f .IsModified ()
94
+ start := time .Now ()
78
95
if modified && err == nil {
79
96
c := sql .NewCollector (
80
97
newRunner (client ), prometheus .GaugeValue ,
@@ -85,14 +102,21 @@ func reloadRegisterUpdate(client *bigquery.Client, files []setup.File, vars map[
85
102
// uses the same name but changes the metrics reported. Because
86
103
// this cannot be recovered, we use rtx.Must to exit and allow
87
104
// the runtime environment to restart.
88
- rtx .Must (f .Register (c ), "Failed to register collector: aborting" )
105
+ err = f .Register (c )
106
+ if ! keepAlive {
107
+ rtx .Must (f .Register (c ), "Failed to register collector: aborting" )
108
+ }
89
109
} else {
90
- start := time .Now ()
91
110
err = f .Update ()
92
111
log .Println ("Updating:" , fileToMetric (f .Name ), time .Since (start ))
93
112
}
94
113
if err != nil {
114
+ failedFilesCounter .WithLabelValues (fileToMetric (f .Name )).Inc ()
115
+ updateDuration .WithLabelValues (fileToMetric (f .Name ), "failed" ).Observe (time .Since (start ).Seconds ())
95
116
log .Println ("Error:" , f .Name , err )
117
+ } else {
118
+ successFilesCounter .WithLabelValues (fileToMetric (f .Name )).Inc ()
119
+ updateDuration .WithLabelValues (fileToMetric (f .Name ), "success" ).Observe (time .Since (start ).Seconds ())
96
120
}
97
121
wg .Done ()
98
122
}(& files [i ])
@@ -125,7 +149,7 @@ func main() {
125
149
}
126
150
127
151
for mainCtx .Err () == nil {
128
- reloadRegisterUpdate (client , files , vars )
152
+ reloadRegisterUpdate (client , files , vars , * keepAlive )
129
153
sleepUntilNext (* refresh )
130
154
}
131
155
}
0 commit comments