Skip to content

Commit c6bd492

Browse files
committed
Disable prometheus port when not allowed (#1078) and add unit test for opentelemetry (#1088)
1 parent 66402db commit c6bd492

File tree

2 files changed

+260
-9
lines changed

2 files changed

+260
-9
lines changed

pkg/observability/opentelemetry.go

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ package observability
1616
import (
1717
"context"
1818
"fmt"
19+
"net"
1920
"net/http"
21+
"strconv"
2022
"time"
2123

2224
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -36,6 +38,7 @@ var (
3638
labelNodeStatusKey = attribute.Key("node/status")
3739
labelNodeNameKey = attribute.Key("node/name")
3840
labelEventIDKey = attribute.Key("node/event-id")
41+
metricsEndpoint = "/metrics"
3942
)
4043

4144
// Metrics represents the stats for observability
@@ -62,13 +65,14 @@ func InitMetrics(enabled bool, port int) (Metrics, error) {
6265

6366
// Starts an async process to collect golang runtime stats
6467
// go.opentelemetry.io/contrib/instrumentation/runtime
65-
if err = runtime.Start(
66-
runtime.WithMeterProvider(provider),
67-
runtime.WithMinimumReadMemStatsInterval(1*time.Second)); err != nil {
68+
err = runtime.Start(runtime.WithMeterProvider(provider), runtime.WithMinimumReadMemStatsInterval(1*time.Second))
69+
if err != nil {
6870
return Metrics{}, fmt.Errorf("failed to start Go runtime metrics collection: %w", err)
6971
}
7072

71-
go serveMetrics(port)
73+
if enabled {
74+
serveMetrics(port)
75+
}
7276

7377
return metrics, nil
7478
}
@@ -135,10 +139,19 @@ func registerMetricsWith(provider *metric.MeterProvider) (Metrics, error) {
135139
}, nil
136140
}
137141

138-
func serveMetrics(port int) {
139-
log.Info().Msgf("Starting to serve handler /metrics, port %d", port)
140-
http.Handle("/metrics", promhttp.Handler())
141-
if err := http.ListenAndServe(fmt.Sprintf(":%d", port), nil); err != nil {
142-
log.Err(err).Msg("Failed to listen and serve http server")
142+
func serveMetrics(port int) *http.Server {
143+
http.Handle(metricsEndpoint, promhttp.Handler())
144+
145+
server := &http.Server{
146+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
143147
}
148+
149+
go func() {
150+
log.Info().Msgf("Starting to serve handler %s, port %d", metricsEndpoint, port)
151+
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
152+
log.Err(err).Msg("Failed to listen and serve http server")
153+
}
154+
}()
155+
156+
return server
144157
}
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// Copyright 2016-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
package observability
15+
16+
import (
17+
"context"
18+
"errors"
19+
"fmt"
20+
"net"
21+
"net/http"
22+
"net/http/httptest"
23+
"strconv"
24+
"strings"
25+
"testing"
26+
"time"
27+
28+
"github.com/prometheus/client_golang/prometheus/promhttp"
29+
"go.opentelemetry.io/otel/attribute"
30+
"go.opentelemetry.io/otel/exporters/prometheus"
31+
api "go.opentelemetry.io/otel/metric"
32+
"go.opentelemetry.io/otel/sdk/metric"
33+
34+
h "github.com/aws/aws-node-termination-handler/pkg/test"
35+
)
36+
37+
var (
38+
mockNth = "aws.node.termination.handler"
39+
mockErrorEvent = "mockErrorEvent"
40+
mockAction = "cordon-and-drain"
41+
mockNodeName1 = "nodeName1"
42+
mockNodeName2 = "nodeName2"
43+
mockNodeName3 = "nodeName3"
44+
mockEventID1 = "eventID1"
45+
mockEventID2 = "eventID2"
46+
mockEventID3 = "eventID3"
47+
successStatus = "success"
48+
errorStatus = "error"
49+
mockDefaultPort = 9092
50+
mockClosedPort = 9093
51+
)
52+
53+
func TestInitMetrics(t *testing.T) {
54+
getMetrics(t)
55+
56+
rr := mockMetricsRequest()
57+
58+
validateStatus(t, rr)
59+
60+
metricsMap := getMetricsMap(rr.Body.String())
61+
62+
runtimeMetrics := []string{
63+
"go_gc_gogc_percent",
64+
"go_memstats_frees_total",
65+
"go_goroutines",
66+
}
67+
68+
for _, metricName := range runtimeMetrics {
69+
_, exists := metricsMap[metricName]
70+
h.Assert(t, exists, fmt.Sprintf("%v metric should be present", metricName))
71+
}
72+
}
73+
74+
func TestErrorEventsInc(t *testing.T) {
75+
metrics := getMetrics(t)
76+
77+
metrics.ErrorEventsInc(mockErrorEvent)
78+
79+
rr := mockMetricsRequest()
80+
81+
validateStatus(t, rr)
82+
83+
metricsMap := getMetricsMap(rr.Body.String())
84+
85+
validateEventErrorTotal(t, metricsMap, 1)
86+
validateActionTotalV2(t, metricsMap, 0, successStatus)
87+
validateActionTotalV2(t, metricsMap, 0, errorStatus)
88+
}
89+
90+
func TestNodeActionsInc(t *testing.T) {
91+
metrics := getMetrics(t)
92+
93+
metrics.NodeActionsInc(mockAction, mockNodeName1, mockEventID1, nil)
94+
metrics.NodeActionsInc(mockAction, mockNodeName2, mockEventID2, nil)
95+
metrics.NodeActionsInc(mockAction, mockNodeName3, mockEventID3, errors.New("mockError"))
96+
97+
rr := mockMetricsRequest()
98+
99+
validateStatus(t, rr)
100+
101+
metricsMap := getMetricsMap(rr.Body.String())
102+
103+
validateEventErrorTotal(t, metricsMap, 0)
104+
validateActionTotalV2(t, metricsMap, 2, successStatus)
105+
validateActionTotalV2(t, metricsMap, 1, errorStatus)
106+
}
107+
108+
func TestRegisterMetricsWith(t *testing.T) {
109+
const errorEventMetricsTotal = 23
110+
const successActionMetricsTotal = 31
111+
const errorActionMetricsTotal = 97
112+
113+
metrics := getMetrics(t)
114+
115+
errorEventLables := []attribute.KeyValue{labelEventErrorWhereKey.String(mockErrorEvent)}
116+
successActionLables := []attribute.KeyValue{labelNodeActionKey.String(mockAction), labelNodeStatusKey.String(successStatus)}
117+
errorActionLables := []attribute.KeyValue{labelNodeActionKey.String(mockAction), labelNodeStatusKey.String(errorStatus)}
118+
119+
for i := 0; i < errorEventMetricsTotal; i++ {
120+
metrics.errorEventsCounter.Add(context.Background(), 1, api.WithAttributes(errorEventLables...))
121+
}
122+
for i := 0; i < successActionMetricsTotal; i++ {
123+
metrics.actionsCounterV2.Add(context.Background(), 1, api.WithAttributes(successActionLables...))
124+
}
125+
for i := 0; i < errorActionMetricsTotal; i++ {
126+
metrics.actionsCounterV2.Add(context.Background(), 1, api.WithAttributes(errorActionLables...))
127+
}
128+
129+
rr := mockMetricsRequest()
130+
131+
validateStatus(t, rr)
132+
133+
metricsMap := getMetricsMap(rr.Body.String())
134+
135+
validateEventErrorTotal(t, metricsMap, errorEventMetricsTotal)
136+
validateActionTotalV2(t, metricsMap, successActionMetricsTotal, successStatus)
137+
validateActionTotalV2(t, metricsMap, errorActionMetricsTotal, errorStatus)
138+
}
139+
140+
func TestServeMetrics(t *testing.T) {
141+
server := serveMetrics(mockDefaultPort)
142+
143+
defer func() {
144+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
145+
defer cancel()
146+
server.Shutdown(ctx)
147+
}()
148+
149+
time.Sleep(100 * time.Millisecond)
150+
151+
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", mockDefaultPort), time.Second)
152+
if err != nil {
153+
t.Errorf("server not listening on port %d: %v", mockDefaultPort, err)
154+
}
155+
conn.Close()
156+
157+
conn, err = net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", mockClosedPort), time.Second)
158+
if err == nil {
159+
conn.Close()
160+
t.Errorf("server should not listening on port %d: %v", mockClosedPort, err)
161+
}
162+
}
163+
164+
func getMetrics(t *testing.T) *Metrics {
165+
exporter, err := prometheus.New()
166+
if err != nil {
167+
t.Errorf("failed to create Prometheus exporter: %v", err)
168+
}
169+
provider := metric.NewMeterProvider(metric.WithReader(exporter))
170+
metrics, err := registerMetricsWith(provider)
171+
if err != nil {
172+
t.Errorf("failed to register metrics with Prometheus provider: %v", err)
173+
}
174+
metrics.enabled = true
175+
176+
t.Cleanup(func() {
177+
if provider != nil {
178+
provider.Shutdown(context.Background())
179+
}
180+
if exporter != nil {
181+
exporter.Shutdown(context.Background())
182+
}
183+
})
184+
185+
return &metrics
186+
}
187+
188+
func mockMetricsRequest() *httptest.ResponseRecorder {
189+
handler := promhttp.Handler()
190+
req := httptest.NewRequest("GET", metricsEndpoint, nil)
191+
rr := httptest.NewRecorder()
192+
handler.ServeHTTP(rr, req)
193+
return rr
194+
}
195+
196+
func validateStatus(t *testing.T, rr *httptest.ResponseRecorder) {
197+
status := rr.Code
198+
h.Equals(t, http.StatusOK, status)
199+
}
200+
201+
func getMetricsMap(body string) map[string]string {
202+
metricsMap := make(map[string]string)
203+
lines := strings.Split(body, "\n")
204+
for _, line := range lines {
205+
if len(strings.TrimSpace(line)) == 0 {
206+
continue
207+
}
208+
if strings.HasPrefix(strings.TrimSpace(line), "# ") {
209+
continue
210+
}
211+
parts := strings.SplitN(line, " ", 2)
212+
if len(parts) != 2 {
213+
continue
214+
}
215+
key := parts[0]
216+
value := parts[1]
217+
metricsMap[key] = value
218+
}
219+
return metricsMap
220+
}
221+
222+
func validateEventErrorTotal(t *testing.T, metricsMap map[string]string, expectedTotal int) {
223+
eventErrorTotalKey := fmt.Sprintf("events_error_total{event_error_where=\"%v\",otel_scope_name=\"%v\",otel_scope_version=\"\"}", mockErrorEvent, mockNth)
224+
actualValue, exists := metricsMap[eventErrorTotalKey]
225+
if !exists {
226+
actualValue = "0"
227+
}
228+
h.Equals(t, strconv.Itoa(expectedTotal), actualValue)
229+
}
230+
231+
func validateActionTotalV2(t *testing.T, metricsMap map[string]string, expectedTotal int, nodeStatus string) {
232+
actionTotalKey := fmt.Sprintf("actions_total{node_action=\"%v\",node_status=\"%v\",otel_scope_name=\"%v\",otel_scope_version=\"\"}", mockAction, nodeStatus, mockNth)
233+
actualValue, exists := metricsMap[actionTotalKey]
234+
if !exists {
235+
actualValue = "0"
236+
}
237+
h.Equals(t, strconv.Itoa(expectedTotal), actualValue)
238+
}

0 commit comments

Comments
 (0)