Skip to content

Commit 9d673ea

Browse files
authored
RHOAIENG-13916: chore(odh-notebook-controller/tests): introduce oTel-based testing (#570)
1 parent c0772a3 commit 9d673ea

File tree

6 files changed

+263
-41
lines changed

6 files changed

+263
-41
lines changed

components/odh-notebook-controller/controllers/notebook_webhook.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,19 @@ import (
1919
"context"
2020
"encoding/json"
2121
"fmt"
22-
configv1 "github.com/openshift/api/config/v1"
2322
"net/http"
2423
"sort"
2524
"strings"
25+
"sync"
26+
27+
"go.opentelemetry.io/otel"
28+
"go.opentelemetry.io/otel/attribute"
29+
"go.opentelemetry.io/otel/trace"
2630

2731
"github.com/go-logr/logr"
2832
nbv1 "github.com/kubeflow/kubeflow/components/notebook-controller/api/v1"
2933
"github.com/kubeflow/kubeflow/components/notebook-controller/pkg/culler"
34+
configv1 "github.com/openshift/api/config/v1"
3035
admissionv1 "k8s.io/api/admission/v1"
3136
corev1 "k8s.io/api/core/v1"
3237
"k8s.io/apimachinery/pkg/api/equality"
@@ -57,6 +62,12 @@ type NotebookWebhook struct {
5762

5863
var proxyEnvVars = make(map[string]string, 3)
5964

65+
// https://github.com/open-telemetry/opentelemetry-go/pull/1674#issuecomment-793558199
66+
// https://github.com/open-telemetry/opentelemetry-go/issues/4291#issuecomment-1629797725
67+
var getWebhookTracer func() trace.Tracer = sync.OnceValue(func() trace.Tracer {
68+
return otel.GetTracerProvider().Tracer("opendatahub.io/kubeflow/components/odh-notebook-controller/controllers/notebook_webhook.go")
69+
})
70+
6071
// InjectReconciliationLock injects the kubeflow notebook controller culling
6172
// stop annotation to explicitly start the notebook pod when the ODH notebook
6273
// controller finishes the reconciliation. Otherwise, a race condition may happen
@@ -264,6 +275,15 @@ func (w *NotebookWebhook) Handle(ctx context.Context, req admission.Request) adm
264275
log := w.Log.WithValues("notebook", req.Name, "namespace", req.Namespace)
265276
ctx = logr.NewContext(ctx, log)
266277

278+
// Initialize OpenTelemetry tracer.
279+
// This is a noop in production code and is (so far) only used for testing.
280+
ctx, span := getWebhookTracer().Start(ctx, "handleFunc", trace.WithNewRoot(), trace.WithAttributes(
281+
attribute.String("notebook", req.Name),
282+
attribute.String("namespace", req.Namespace),
283+
attribute.String("operation", string(req.Operation)),
284+
))
285+
defer span.End()
286+
267287
notebook := &nbv1.Notebook{}
268288

269289
err := w.Decoder.Decode(req, notebook)
@@ -350,6 +370,9 @@ func (w *NotebookWebhook) maybeRestartRunningNotebook(ctx context.Context, req a
350370
var err error
351371
log := logr.FromContextOrDiscard(ctx)
352372

373+
ctx, span := getWebhookTracer().Start(ctx, "maybeRestartRunningNotebook")
374+
defer span.End()
375+
353376
// Notebook that was just created can be updated
354377
if req.Operation == admissionv1.Create {
355378
log.Info("Not blocking update, notebook is being newly created")
@@ -625,6 +648,8 @@ func InjectCertConfig(notebook *nbv1.Notebook, configMapName string) error {
625648
// Otherwise, it checks the last-image-selection annotation to find the image stream and fetches the image from status.dockerImageReference,
626649
// assigning it to the container.image value.
627650
func SetContainerImageFromRegistry(ctx context.Context, config *rest.Config, notebook *nbv1.Notebook, log logr.Logger, namespace string) error {
651+
span := trace.SpanFromContext(ctx)
652+
628653
// Create a dynamic client
629654
dynamicClient, err := dynamic.NewForConfig(config)
630655
if err != nil {
@@ -717,6 +742,7 @@ func SetContainerImageFromRegistry(ctx context.Context, config *rest.Config, not
717742
}
718743
}
719744
if !imagestreamFound {
745+
span.AddEvent("imagestream-not-found")
720746
log.Error(nil, "ImageStream not found in main controller namespace, or the ImageStream is present but does not contain a dockerImageReference for the specified tag",
721747
"imageSelected", imageSelected[0], "tag", imageSelected[1], "namespace", namespace)
722748
}

components/odh-notebook-controller/controllers/notebook_webhook_test.go

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controllers
1818
import (
1919
"context"
2020
"fmt"
21+
2122
nbv1 "github.com/kubeflow/kubeflow/components/notebook-controller/api/v1"
2223
corev1 "k8s.io/api/core/v1"
2324
apierrs "k8s.io/apimachinery/pkg/api/errors"
@@ -53,7 +54,9 @@ var _ = Describe("The Openshift Notebook webhook", func() {
5354
// currently we expect that Notebook CR is always created,
5455
// and when unable to resolve imagestream, image: is left alone
5556
expectedImage string
56-
// todo(jdanek): also consider Observing for the log message, https://www.youtube.com/watch?v=prLRI3VEVq4
57+
// see https://www.youtube.com/watch?v=prLRI3VEVq4 for Observability Driven Development intro
58+
expectedEvents []string
59+
unexpectedEvents []string
5760
}{
5861
{
5962
name: "ImageStream with all that is needful",
@@ -109,6 +112,9 @@ var _ = Describe("The Openshift Notebook webhook", func() {
109112
},
110113
},
111114
expectedImage: "quay.io/modh/odh-generic-data-science-notebook@sha256:76e6af79c601a323f75a58e7005de0beac66b8cccc3d2b67efb6d11d85f0cfa1",
115+
unexpectedEvents: []string{
116+
"imagestream-not-found",
117+
},
112118
},
113119
{
114120
name: "ImageStream with a tag without items (RHOAIENG-13916)",
@@ -158,23 +164,47 @@ var _ = Describe("The Openshift Notebook webhook", func() {
158164
},
159165
// there is no update to the Notebook
160166
expectedImage: ":some-tag",
167+
expectedEvents: []string{
168+
"imagestream-not-found",
169+
},
161170
},
162171
}
163172

173+
BeforeEach(func() {
174+
Expect(tracings.TraceProvider.ForceFlush(context.Background())).To(Succeed())
175+
tracings.SpanExporter.Reset()
176+
})
177+
164178
for _, testCase := range testCases {
165-
testCase := testCase // create a copy to get correct capture in the `It` closure, https://go.dev/blog/loopvar-preview
166-
It(fmt.Sprintf("Should create a Notebook resource successfully: %s", testCase.name), func() {
167-
By("Creating a Notebook resource successfully")
168-
Expect(cli.Create(ctx, testCase.imageStream, &client.CreateOptions{})).To(Succeed())
169-
// if our webhook panics, then cli.Create will err
170-
Expect(cli.Create(ctx, testCase.notebook, &client.CreateOptions{})).To(Succeed())
179+
Context(fmt.Sprintf("The Notebook webhook test case: %s", testCase.name), func() {
180+
It("Should create a Notebook resource successfully", func() {
181+
By("Creating a Notebook resource successfully")
182+
Expect(cli.Create(ctx, testCase.imageStream, &client.CreateOptions{})).To(Succeed())
183+
// if our webhook panics, then cli.Create will err
184+
Expect(cli.Create(ctx, testCase.notebook, &client.CreateOptions{})).To(Succeed())
171185

172-
By("Checking that the webhook modified the notebook CR with the expected image")
173-
Expect(testCase.notebook.Spec.Template.Spec.Containers[0].Image).To(Equal(testCase.expectedImage))
186+
By("Checking that the webhook modified the notebook CR with the expected image")
187+
Expect(testCase.notebook.Spec.Template.Spec.Containers[0].Image).To(Equal(testCase.expectedImage))
174188

175-
By("Deleting the created resources")
176-
Expect(cli.Delete(ctx, testCase.notebook, &client.DeleteOptions{})).To(Succeed())
177-
Expect(cli.Delete(ctx, testCase.imageStream, &client.DeleteOptions{})).To(Succeed())
189+
By("Checking telemetry events")
190+
Expect(tracings.TraceProvider.ForceFlush(context.Background())).To(Succeed())
191+
spans := tracings.SpanExporter.GetSpans()
192+
events := make([]string, 0)
193+
for _, span := range spans {
194+
for _, event := range span.Events {
195+
events = append(events, event.Name)
196+
}
197+
}
198+
Expect(events).To(ContainElements(testCase.expectedEvents))
199+
for _, unexpectedEvent := range testCase.unexpectedEvents {
200+
Expect(events).ToNot(ContainElement(unexpectedEvent))
201+
}
202+
})
203+
AfterEach(func() {
204+
By("Deleting the created resources")
205+
Expect(cli.Delete(ctx, testCase.notebook, &client.DeleteOptions{})).To(Succeed())
206+
Expect(cli.Delete(ctx, testCase.imageStream, &client.DeleteOptions{})).To(Succeed())
207+
})
178208
})
179209
}
180210
})
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package controllers
2+
3+
import (
4+
"context"
5+
"errors"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
10+
"go.opentelemetry.io/otel"
11+
"go.opentelemetry.io/otel/propagation"
12+
"go.opentelemetry.io/otel/sdk/trace"
13+
"go.opentelemetry.io/otel/sdk/trace/tracetest"
14+
)
15+
16+
var tracings struct {
17+
SpanExporter *tracetest.InMemoryExporter
18+
TraceProvider *trace.TracerProvider
19+
}
20+
21+
// context examples
22+
// https://github.com/go-logr/logr/pull/27
23+
24+
// setupOTelSDK bootstraps the OpenTelemetry pipeline.
25+
// If it does not return an error, make sure to call shutdown for proper cleanup.
26+
func setupOTelSDK(ctx context.Context) (shutdown func(context.Context) error, err error) {
27+
var shutdownFuncs []func(context.Context) error
28+
29+
// shutdown() calls cleanup functions registered via shutdownFuncs.
30+
// The errors from the calls are joined.
31+
// Each registered cleanup will be invoked once.
32+
shutdown = func(ctx context.Context) error {
33+
var err error
34+
for _, fn := range shutdownFuncs {
35+
err = errors.Join(err, fn(ctx))
36+
}
37+
shutdownFuncs = nil
38+
return err
39+
}
40+
41+
// handleErr calls shutdown for cleanup and makes sure that all errors are returned.
42+
handleErr := func(inErr error) {
43+
err = errors.Join(inErr, shutdown(ctx))
44+
}
45+
46+
// Set up propagator.
47+
prop := newPropagator()
48+
otel.SetTextMapPropagator(prop)
49+
50+
// Set up trace provider.
51+
tracerProvider, err := newTraceProvider()
52+
if err != nil {
53+
handleErr(err)
54+
return
55+
}
56+
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
57+
otel.SetTracerProvider(tracerProvider)
58+
59+
return
60+
}
61+
62+
func newPropagator() propagation.TextMapPropagator {
63+
return propagation.NewCompositeTextMapPropagator(
64+
propagation.TraceContext{},
65+
propagation.Baggage{},
66+
)
67+
}
68+
69+
func newTraceProvider() (*trace.TracerProvider, error) {
70+
tracings.SpanExporter = tracetest.NewInMemoryExporter()
71+
72+
tracings.TraceProvider = trace.NewTracerProvider(
73+
trace.WithBatcher(tracings.SpanExporter),
74+
)
75+
76+
return tracings.TraceProvider, nil
77+
}
78+
79+
// Example tests to show how to work with oTel
80+
81+
const name = "opendatahub.io/kubeflow/components/odh-notebook-controller/controllers/opentelemetry_test.go"
82+
83+
var (
84+
tracer = otel.Tracer(name)
85+
)
86+
87+
func TestHelloOTel(t *testing.T) {
88+
// Set up OpenTelemetry.
89+
otelShutdown, err := setupOTelSDK(ctx)
90+
if err != nil {
91+
return
92+
}
93+
// Handle shutdown properly so nothing leaks.
94+
defer func() {
95+
err = errors.Join(err, otelShutdown(context.Background()))
96+
}()
97+
98+
func() {
99+
_, span := tracer.Start(context.Background(), "roll")
100+
//defer span.End()
101+
span.AddEvent("do_stuff")
102+
span.End()
103+
}()
104+
105+
assert.Nil(t, tracings.TraceProvider.ForceFlush(context.Background()))
106+
spans := tracings.SpanExporter.GetSpans()
107+
assert.Len(t, spans, 1)
108+
}
109+
110+
// TestOpenTelemetryInMemoryExporterReadout demonstrates accessing spans from test code.
111+
// Code is copied from https://github.com/open-telemetry/opentelemetry-go/issues/2080
112+
func TestOpenTelemetryInMemoryExporterReadout(t *testing.T) {
113+
ctx := context.Background()
114+
115+
exp := tracetest.NewInMemoryExporter()
116+
117+
tp := trace.NewTracerProvider(
118+
trace.WithBatcher(exp),
119+
)
120+
121+
tracer := tp.Tracer("tracer")
122+
123+
_, span := tracer.Start(ctx, "span")
124+
span.End()
125+
126+
err := tp.ForceFlush(ctx)
127+
assert.NoError(t, err)
128+
129+
// Expect well-defined behavior due to calling ForceFlush
130+
assert.Len(t, exp.GetSpans(), 1)
131+
}

components/odh-notebook-controller/controllers/suite_test.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,29 @@ import (
1818
"context"
1919
"crypto/tls"
2020
"fmt"
21-
"k8s.io/utils/ptr"
2221
"net"
2322
"os"
2423
"path/filepath"
2524
"testing"
2625
"time"
2726

28-
v1 "k8s.io/api/core/v1"
29-
netv1 "k8s.io/api/networking/v1"
30-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
. "github.com/onsi/ginkgo"
28+
. "github.com/onsi/gomega"
29+
30+
"go.opentelemetry.io/otel"
3131

3232
"go.uber.org/zap/zapcore"
3333
"k8s.io/apimachinery/pkg/runtime"
3434
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
35+
"k8s.io/utils/ptr"
3536
ctrl "sigs.k8s.io/controller-runtime"
3637

3738
nbv1 "github.com/kubeflow/kubeflow/components/notebook-controller/api/v1"
38-
39-
. "github.com/onsi/ginkgo"
40-
. "github.com/onsi/gomega"
4139
routev1 "github.com/openshift/api/route/v1"
40+
v1 "k8s.io/api/core/v1"
41+
netv1 "k8s.io/api/networking/v1"
42+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
43+
4244
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
4345
"k8s.io/client-go/rest"
4446
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -63,6 +65,7 @@ var (
6365

6466
ctx context.Context
6567
cancel context.CancelFunc
68+
otelShutdown func(context.Context) error
6669
managerStopped = make(chan struct{})
6770

6871
testNamespaces = []string{}
@@ -80,6 +83,7 @@ func TestAPIs(t *testing.T) {
8083
}
8184

8285
var _ = BeforeSuite(func() {
86+
var err error
8387
ctx, cancel = context.WithCancel(context.Background())
8488

8589
// Initialize logger
@@ -89,6 +93,12 @@ var _ = BeforeSuite(func() {
8993
}
9094
logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseFlagOptions(&opts)))
9195

96+
// initialize tracer
97+
// Set up OpenTelemetry.
98+
otelShutdown, err = setupOTelSDK(ctx)
99+
Expect(err).ToNot(HaveOccurred())
100+
tracer = otel.Tracer("odh-notebook-controller/controllers/suite_test.go")
101+
92102
// Initialize test environment:
93103
// https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/envtest#Environment.Start
94104
By("Bootstrapping test environment")
@@ -120,7 +130,6 @@ var _ = BeforeSuite(func() {
120130
GinkgoT().Logf("DEBUG_WRITE_AUDITLOG environment variable was not provided")
121131
}
122132

123-
var err error
124133
cfg, err = envTest.Start()
125134
Expect(err).NotTo(HaveOccurred())
126135
Expect(cfg).NotTo(BeNil())
@@ -240,4 +249,7 @@ var _ = AfterSuite(func() {
240249
// TODO: Stop cert controller-runtime.certwatcher before manager
241250
err := envTest.Stop()
242251
Expect(err).NotTo(HaveOccurred())
252+
253+
err = otelShutdown(context.Background())
254+
Expect(err).NotTo(HaveOccurred())
243255
})

0 commit comments

Comments
 (0)