Skip to content

Commit 697a426

Browse files
authored
Ansible add metrics (#5438)
* Users can now create metrics through Ansible Metrics are created with the osdk_metrics module in operator-sdk-ansible-util which communicates with an ansible API server. Signed-off-by: austin <[email protected]> * bump collection Signed-off-by: austin <[email protected]> * cleanup Signed-off-by: austin <[email protected]> * Add copyright info Signed-off-by: austin <[email protected]> * clean up log message levels Signed-off-by: austin <[email protected]> * add metrics address option and fix errors Signed-off-by: austin <[email protected]> * log cleanup Signed-off-by: austin <[email protected]> * sanity checks Signed-off-by: austin <[email protected]> * comments Signed-off-by: austin <[email protected]> * bump retries to avoid flake Signed-off-by: austin <[email protected]> * feedback from ryan Signed-off-by: austin <[email protected]> * helper functions Signed-off-by: austin <[email protected]> * handle error Signed-off-by: austin <[email protected]> * refactor metric creation into helper Signed-off-by: austin <[email protected]> * revert Run changes for now Signed-off-by: austin <[email protected]> * put Run in goroutine, not goroutine in Run Signed-off-by: austin <[email protected]> * s/server/err since it is error type Signed-off-by: austin <[email protected]> * read and close request body, catch error on negative counter.Add value Signed-off-by: austin <[email protected]> * explicitly ignore errors returned by copy Signed-off-by: austin <[email protected]>
1 parent 00cef3a commit 697a426

File tree

6 files changed

+249
-3
lines changed

6 files changed

+249
-3
lines changed

hack/generate/samples/internal/ansible/testdata/tasks/clusterannotationtest_test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,5 @@
4747
name: "externalnamespace"
4848
register: external_namespace
4949
until: external_namespace.resources[0].metadata.labels["foo"] == "bar"
50+
retries: 6
5051

hack/tests/e2e-ansible-molecule.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pip3 install cryptography==3.3.2 molecule==3.0.2
3131
pip3 install ansible-lint yamllint
3232
pip3 install docker==4.2.2 openshift==0.12.1 jmespath
3333
ansible-galaxy collection install 'kubernetes.core:==2.2.0'
34+
ansible-galaxy collection install 'operator_sdk.util:==0.4.0'
3435

3536
header_text "Copying molecule testdata scenarios"
3637
ROOTDIR="$(pwd)"
@@ -48,7 +49,6 @@ else
4849
fi
4950
KUSTOMIZE_PATH=${KUSTOMIZE} TEST_OPERATOR_NAMESPACE=default molecule test -s kind
5051

51-
5252
header_text "Running Default test with advanced-molecule-operator"
5353
pushd $TMPDIR/advanced-molecule-operator
5454

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// Copyright 2022 The Operator-SDK Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package apiserver
16+
17+
import (
18+
"encoding/json"
19+
"fmt"
20+
"io"
21+
"net/http"
22+
23+
logf "sigs.k8s.io/controller-runtime/pkg/log"
24+
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
25+
26+
"github.com/operator-framework/operator-sdk/internal/ansible/metrics"
27+
)
28+
29+
var log = logf.Log.WithName("apiserver")
30+
31+
type Options struct {
32+
Address string
33+
Port int
34+
}
35+
36+
func Run(options Options) error {
37+
mux := http.NewServeMux()
38+
mux.HandleFunc("/metrics", metricsHandler)
39+
40+
server := http.Server{
41+
Addr: fmt.Sprintf("%s:%d", options.Address, options.Port),
42+
Handler: mux,
43+
}
44+
log.Info("Starting to serve metrics listener", "Address", server.Addr)
45+
return server.ListenAndServe()
46+
}
47+
48+
func metricsHandler(w http.ResponseWriter, r *http.Request) {
49+
defer func() {
50+
_, _ = io.Copy(io.Discard, r.Body)
51+
r.Body.Close()
52+
}()
53+
log.V(3).Info(fmt.Sprintf("%s %s", r.Method, r.URL))
54+
55+
var userMetric metrics.UserMetric
56+
57+
switch r.Method {
58+
case http.MethodPost:
59+
log.V(3).Info("The apiserver has received a POST")
60+
err := json.NewDecoder(r.Body).Decode(&userMetric)
61+
if err != nil {
62+
log.Info(err.Error())
63+
http.Error(w, err.Error(), http.StatusBadRequest)
64+
return
65+
}
66+
err = metrics.HandleUserMetric(crmetrics.Registry, userMetric)
67+
if err != nil {
68+
log.Info(err.Error())
69+
http.Error(w, err.Error(), http.StatusBadRequest)
70+
}
71+
default:
72+
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
73+
}
74+
75+
}

internal/ansible/controller/reconcile.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,8 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon
187187
return reconcile.Result{}, err
188188
}
189189
}
190-
191190
if module, found := event.EventData["task_action"]; found {
192-
if module == "operator_sdk.util.requeue_after" && event.Event != eventapi.EventRunnerOnFailed {
191+
if module == "operator_sdk.util.requeue_after" || module == "requeue_after" && event.Event != eventapi.EventRunnerOnFailed {
193192
if data, exists := event.EventData["res"]; exists {
194193
if fields, check := data.(map[string]interface{}); check {
195194
requeueDuration, err := time.ParseDuration(fields["period"].(string))

internal/ansible/metrics/metrics.go

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package metrics
1616

1717
import (
18+
"errors"
1819
"fmt"
1920

2021
"github.com/prometheus/client_golang/prometheus"
@@ -60,6 +61,8 @@ var (
6061
[]string{
6162
"GVK",
6263
})
64+
65+
userMetrics = map[string]prometheus.Collector{}
6366
)
6467

6568
func init() {
@@ -82,6 +85,165 @@ func RegisterBuildInfo(r prometheus.Registerer) {
8285
r.MustRegister(buildInfo)
8386
}
8487

88+
type UserMetric struct {
89+
Name string `json:"name" yaml:"name"`
90+
Help string `json:"description" yaml:"description"`
91+
Counter *UserMetricCounter `json:"counter,omitempty" yaml:"counter,omitempty"`
92+
Gauge *UserMetricGauge `json:"gauge,omitempty" yaml:"gauge,omitempty"`
93+
Histogram *UserMetricHistogram `json:"histogram,omitempty" yaml:"histogram,omitempty"`
94+
Summary *UserMetricSummary `json:"summary,omitempty" yaml:"summary,omitempty"`
95+
}
96+
97+
type UserMetricCounter struct {
98+
Inc bool `json:"increment,omitempty" yaml:"increment,omitempty"`
99+
Add float64 `json:"add,omitempty" yaml:"add,omitempty"`
100+
}
101+
102+
type UserMetricGauge struct {
103+
Set float64 `json:"set,omitempty" yaml:"set,omitempty"`
104+
Inc bool `json:"increment,omitempty" yaml:"increment,omitempty"`
105+
Dec bool `json:"decrement,omitempty" yaml:"decrement,omitempty"`
106+
SetToCurrentTime bool `json:"set_to_current_time,omitempty" yaml:"set_to_current_time,omitempty"`
107+
Add float64 `json:"add,omitempty" yaml:"add,omitempty"`
108+
Sub float64 `json:"subtract,omitempty" yaml:"subtract,omitempty"`
109+
}
110+
111+
type UserMetricHistogram struct {
112+
Observe float64 `json:"observe,omitempty" yaml:"observe,omitempty"`
113+
}
114+
115+
type UserMetricSummary struct {
116+
Observe float64 `json:"observe,omitempty" yaml:"observe,omitempty"`
117+
}
118+
119+
func validateMetricSpec(metricSpec UserMetric) error {
120+
var metricConfigs int
121+
if metricSpec.Counter != nil {
122+
metricConfigs++
123+
}
124+
if metricSpec.Gauge != nil {
125+
metricConfigs++
126+
}
127+
if metricSpec.Summary != nil {
128+
metricConfigs++
129+
}
130+
if metricSpec.Histogram != nil {
131+
metricConfigs++
132+
}
133+
if metricConfigs > 1 {
134+
return errors.New("only one metric can be processed at a time")
135+
} else if metricConfigs == 0 {
136+
return errors.New("a request should contain at least one metric")
137+
}
138+
return nil
139+
}
140+
141+
func handleCounter(metricSpec UserMetric, counter prometheus.Counter) error {
142+
if metricSpec.Counter == nil {
143+
return fmt.Errorf("cannot change metric type of %s, which is a counter", metricSpec.Name)
144+
}
145+
if metricSpec.Counter.Inc {
146+
counter.Inc()
147+
} else if metricSpec.Counter.Add != 0.0 {
148+
if metricSpec.Counter.Add < 0 {
149+
return errors.New("counter metrics cannot decrease in value")
150+
}
151+
counter.Add(metricSpec.Counter.Add)
152+
}
153+
return nil
154+
}
155+
156+
func handleGauge(metricSpec UserMetric, gauge prometheus.Gauge) error {
157+
if metricSpec.Gauge == nil {
158+
return fmt.Errorf("cannot change metric type of %s, which is a gauge", metricSpec.Name)
159+
}
160+
if metricSpec.Gauge.Inc {
161+
gauge.Inc()
162+
} else if metricSpec.Gauge.Dec {
163+
gauge.Dec()
164+
} else if metricSpec.Gauge.Add != 0.0 {
165+
gauge.Add(metricSpec.Gauge.Add)
166+
} else if metricSpec.Gauge.Sub != 0.0 {
167+
gauge.Sub(metricSpec.Gauge.Sub)
168+
} else if metricSpec.Gauge.Set != 0.0 {
169+
gauge.Set(metricSpec.Gauge.Set)
170+
} else if metricSpec.Gauge.SetToCurrentTime {
171+
gauge.SetToCurrentTime()
172+
}
173+
return nil
174+
}
175+
176+
func handleSummaryOrHistogram(metricSpec UserMetric, summary prometheus.Summary) error {
177+
if metricSpec.Histogram == nil && metricSpec.Summary == nil {
178+
return fmt.Errorf("cannot change metric type of %s, which is a histogram or summary", metricSpec.Name)
179+
}
180+
if metricSpec.Histogram != nil {
181+
summary.Observe(metricSpec.Histogram.Observe)
182+
} else if metricSpec.Summary != nil {
183+
summary.Observe(metricSpec.Summary.Observe)
184+
}
185+
return nil
186+
}
187+
188+
func ensureMetric(r prometheus.Registerer, metricSpec UserMetric) {
189+
if _, ok := userMetrics[metricSpec.Name]; !ok {
190+
// This is the first time we've seen this metric
191+
logf.Log.WithName("metrics").Info("Registering", "metric", metricSpec.Name)
192+
if metricSpec.Counter != nil {
193+
userMetrics[metricSpec.Name] = prometheus.NewCounter(prometheus.CounterOpts{
194+
Name: metricSpec.Name,
195+
Help: metricSpec.Help,
196+
})
197+
}
198+
if metricSpec.Gauge != nil {
199+
userMetrics[metricSpec.Name] = prometheus.NewGauge(prometheus.GaugeOpts{
200+
Name: metricSpec.Name,
201+
Help: metricSpec.Help,
202+
})
203+
}
204+
if metricSpec.Histogram != nil {
205+
userMetrics[metricSpec.Name] = prometheus.NewHistogram(prometheus.HistogramOpts{
206+
Name: metricSpec.Name,
207+
Help: metricSpec.Help,
208+
})
209+
}
210+
if metricSpec.Summary != nil {
211+
userMetrics[metricSpec.Name] = prometheus.NewSummary(prometheus.SummaryOpts{
212+
Name: metricSpec.Name,
213+
Help: metricSpec.Help,
214+
})
215+
}
216+
if err := r.Register(userMetrics[metricSpec.Name]); err != nil {
217+
logf.Log.WithName("metrics").Info("Unable to register %s metric with prometheus.", metricSpec.Name)
218+
}
219+
}
220+
}
221+
222+
func HandleUserMetric(r prometheus.Registerer, metricSpec UserMetric) error {
223+
if err := validateMetricSpec(metricSpec); err != nil {
224+
return err
225+
}
226+
ensureMetric(r, metricSpec)
227+
collector := userMetrics[metricSpec.Name]
228+
switch v := collector.(type) {
229+
// Gauge must be first, because a Counter is a Gauge, but a Gauge is not a Counter.
230+
case prometheus.Gauge:
231+
if err := handleGauge(metricSpec, v); err != nil {
232+
return err
233+
}
234+
case prometheus.Counter:
235+
if err := handleCounter(metricSpec, v); err != nil {
236+
return err
237+
}
238+
// Histogram and Summary interfaces are identical, so we accept either case.
239+
case prometheus.Histogram:
240+
if err := handleSummaryOrHistogram(metricSpec, v); err != nil {
241+
return err
242+
}
243+
}
244+
return nil
245+
}
246+
85247
func ReconcileSucceeded(gvk string) {
86248
defer recoverMetricPanic()
87249
reconcileResults.WithLabelValues(gvk, "succeeded").Inc()

internal/cmd/ansible-operator/run/cmd.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
3939
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
4040

41+
"github.com/operator-framework/operator-sdk/internal/ansible/apiserver"
4142
"github.com/operator-framework/operator-sdk/internal/ansible/controller"
4243
"github.com/operator-framework/operator-sdk/internal/ansible/events"
4344
"github.com/operator-framework/operator-sdk/internal/ansible/flags"
@@ -262,6 +263,14 @@ func run(cmd *cobra.Command, f *flags.Flags) {
262263
log.Error(err, "Error starting proxy.")
263264
os.Exit(1)
264265
}
266+
// start the ansible-operator api server
267+
go func() {
268+
err = apiserver.Run(apiserver.Options{
269+
Address: "localhost",
270+
Port: 5050,
271+
})
272+
done <- err
273+
}()
265274

266275
// start the operator
267276
go func() {

0 commit comments

Comments
 (0)