Skip to content

Commit 4e4b8c4

Browse files
authored
Refine AppSignals AutoMonitor and SDK Injection checks (#342)
* Add validation to skip ADOT SDK injection for incompatible configurations Skip ADOT SDK injection in the following scenarios: - runAsNonRoot=true without runAsUser (prevents init container failures) - Custom OTLP endpoints without Application Signals enabled (avoids conflicts) - Application Signals explicitly disabled (allows custom OTel configuration) Key changes: - Add envFrom ConfigMap resolution for validation (N+1 query optimization) - Add comprehensive unit tests (33 scenarios in helper_test.go) - Skip injection when OTel Collector container already exists - Expand excluded namespaces for auto-monitor (40+ common system namespaces) - Check for OpenTelemetry operator installation before enabling auto-monitor - Protocol-aware CloudWatch agent endpoint detection Files modified: - pkg/instrumentation/helper.go: Core validation logic - pkg/instrumentation/helper_test.go: Comprehensive unit tests - pkg/instrumentation/sdk.go: envFrom caching, OTC detection - pkg/instrumentation/sdk_test.go: Integration tests - pkg/instrumentation/{java,python,nodejs,dotnet}.go: Pass container params - pkg/instrumentation/auto/monitor.go: Expanded excluded namespaces - pkg/instrumentation/auto/util.go: OpenTelemetry operator check * Update logging for configure AutoMonitor functions * Replace -a to -r for cp cmd on Python and NodeJs sdk injection * fix: skip auto-monitor when api-resource fetching returns uncaptured errors * nit refactoring - use consts for otel env vars
1 parent 0fda7f9 commit 4e4b8c4

File tree

16 files changed

+1009
-67
lines changed

16 files changed

+1009
-67
lines changed

pkg/instrumentation/auto/monitor.go

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,50 @@ import (
2626
"github.com/aws/amazon-cloudwatch-agent-operator/pkg/instrumentation"
2727
)
2828

29-
var excludedNamespaces = []string{"kube-system", "amazon-cloudwatch"}
29+
// List of namespaces excluded from ApplicationSignals auto-monitoring by default (best-effort basis).
30+
// Customers must explicitly opt in to enable ApplicationSignals for workloads in these namespaces.
31+
var excludedNamespaces = []string{
32+
// --- Monitoring & Observability ---
33+
"monitoring", // Prometheus, kube-prometheus-stack, Grafana
34+
"loki", // Loki, Promtail
35+
"observability", // Tempo, Jaeger
36+
"opentelemetry-operator-system", // OpenTelemetry Collector / Operator
37+
"amazon-cloudwatch", // Amazon CloudWatch Agent
38+
"tracing", // Zipkin or similar tracing tools
39+
40+
// --- Ingress & Networking ---
41+
"ingress-nginx", // NGINX Ingress Controller
42+
"traefik", // Traefik Ingress Controller
43+
"istio-system", // Istio control plane
44+
"linkerd", // Linkerd service mesh
45+
"cert-manager", // Cert-Manager for TLS certificates
46+
"external-dns", // ExternalDNS controller
47+
48+
// --- Cloud Integrations ---
49+
"aws-load-balancer-controller", // AWS Load Balancer Controller
50+
"kube-system", // Cluster Autoscaler, CSI Drivers, Metrics Server
51+
"external-dns", // ExternalDNS (sometimes deployed here too)
52+
53+
// --- Security & Policy ---
54+
"kyverno", // Kyverno policy engine
55+
"gatekeeper-system", // OPA Gatekeeper
56+
"falco", // Falco runtime security
57+
"vault", // HashiCorp Vault for secrets
58+
59+
// --- CI/CD & Management ---
60+
"argocd", // Argo CD
61+
"flux-system", // Flux CD
62+
"jenkins", // Jenkins CI/CD
63+
"tekton-pipelines", // Tekton pipelines
64+
"spinnaker", // Spinnaker
65+
"harbor", // Harbor container registry
66+
"reloader", // Reloader for configmap/secret reloads
67+
68+
// --- Other Utilities ---
69+
"keda", // KEDA event-driven autoscaler
70+
"velero", // Velero backup/restore
71+
"minio", // MinIO object storage
72+
}
3073

3174
const (
3275
ByLabel = "IndexByLabel"

pkg/instrumentation/auto/util.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"os"
1111

1212
"github.com/go-logr/logr"
13+
"k8s.io/apimachinery/pkg/api/errors"
1314
"k8s.io/client-go/kubernetes"
1415
"k8s.io/client-go/rest"
1516
ctrl "sigs.k8s.io/controller-runtime"
@@ -62,6 +63,21 @@ func configureAutoMonitor(ctx context.Context, autoMonitorConfigStr string, clie
6263
return nil, fmt.Errorf("unable to unmarshal auto-monitor config: %w", err)
6364
}
6465

66+
resources, err := clientSet.Discovery().ServerResourcesForGroupVersion("opentelemetry.io/v1alpha1")
67+
if err == nil {
68+
for _, r := range resources.APIResources {
69+
if r.Name == "instrumentations" {
70+
setupLog.Info("W! auto-monitor is disabled due to the presence of opentelemetry.io/v1alpha1 group version")
71+
return nil, nil
72+
}
73+
}
74+
} else {
75+
if !errors.IsNotFound(err) {
76+
setupLog.Info(fmt.Sprintf("W! auto-monitor is disabled due to failures in retrieving server groups: %v", err))
77+
return nil, nil
78+
}
79+
}
80+
6581
logger := ctrl.Log.WithName("auto_monitor")
6682
return NewMonitor(ctx, *autoMonitorConfig, clientSet, client, reader, logger), nil
6783
}

pkg/instrumentation/auto/util_test.go

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ import (
1111

1212
"github.com/go-logr/logr/testr"
1313
"github.com/stretchr/testify/assert"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
discoveryfake "k8s.io/client-go/discovery/fake"
1416
"k8s.io/client-go/kubernetes/fake"
15-
1617
fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake"
1718
)
1819

@@ -28,6 +29,7 @@ func TestCreateInstrumentationAnnotator(t *testing.T) {
2829
envDisableMonitor bool
2930
autoAnnotationConfig string
3031
autoMonitorConfig string
32+
otelSetupExists bool
3133
expectNilAnnotator bool
3234
expectedType string
3335
}{
@@ -94,6 +96,16 @@ func TestCreateInstrumentationAnnotator(t *testing.T) {
9496
expectNilAnnotator: false,
9597
expectedType: "*auto.AnnotationMutators",
9698
},
99+
{
100+
name: "Disable auto monitor with otel setup found",
101+
envDisableAnnotation: false,
102+
envDisableMonitor: false,
103+
otelSetupExists: true,
104+
autoAnnotationConfig: ``,
105+
autoMonitorConfig: `{"monitorAllServices":true}`,
106+
expectNilAnnotator: true,
107+
expectedType: "",
108+
},
97109
}
98110

99111
for _, tt := range tests {
@@ -111,8 +123,29 @@ func TestCreateInstrumentationAnnotator(t *testing.T) {
111123
os.Unsetenv("DISABLE_AUTO_MONITOR")
112124
}
113125

126+
fakeClientset := fake.NewSimpleClientset()
127+
if tt.otelSetupExists {
128+
fakeDiscovery, ok := fakeClientset.Discovery().(*discoveryfake.FakeDiscovery)
129+
if !ok {
130+
t.Fatal("couldn't convert Discovery() to *FakeDiscovery")
131+
}
132+
133+
fakeDiscovery.Resources = []*metav1.APIResourceList{
134+
{
135+
GroupVersion: "opentelemetry.io/v1alpha1",
136+
APIResources: []metav1.APIResource{
137+
{
138+
Name: "instrumentations",
139+
Namespaced: true,
140+
Kind: "Instrumentation",
141+
Verbs: []string{"get", "list", "watch", "create", "update", "patch", "delete"},
142+
},
143+
},
144+
},
145+
}
146+
}
114147
// Call the function
115-
annotator := createInstrumentationAnnotatorWithClientset(tt.autoMonitorConfig, tt.autoAnnotationConfig, ctx, fake.NewSimpleClientset(), fakeClient, fakeClient, logger)
148+
annotator := createInstrumentationAnnotatorWithClientset(tt.autoMonitorConfig, tt.autoAnnotationConfig, ctx, fakeClientset, fakeClient, fakeClient, logger)
116149

117150
// Check results
118151
if tt.expectNilAnnotator {

pkg/instrumentation/dotnet.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,22 @@ var (
5454
dotNetCommandWindows = []string{"CMD", "/c", "xcopy", "/e", "autoinstrumentation\\*", dotnetInstrMountPathWindows}
5555
)
5656

57-
func injectDotNetSDK(dotNetSpec v1alpha1.DotNet, pod corev1.Pod, index int, runtime string) (corev1.Pod, error) {
58-
59-
// caller checks if there is at least one container.
57+
func injectDotNetSDK(dotNetSpec v1alpha1.DotNet, pod corev1.Pod, index int, runtime string, allEnvs []corev1.EnvVar) (corev1.Pod, error) {
6058
container := &pod.Spec.Containers[index]
6159

6260
err := validateContainerEnv(container.Env, envDotNetStartupHook, envDotNetAdditionalDeps, envDotNetSharedStore)
6361
if err != nil {
6462
return pod, err
6563
}
6664

67-
// check if OTEL_DOTNET_AUTO_HOME env var is already set in the container
65+
// Check if ADOT SDK should be injected based on all environment variables and security context
66+
if !shouldInjectADOTSDK(allEnvs, pod, container) {
67+
return pod, fmt.Errorf("ADOT DOTNET SDK injection skipped due to incompatible OTel configuration")
68+
}
69+
70+
// check if OTEL_DOTNET_AUTO_HOME env var is already set
6871
// if it is already set, then we assume that .NET Auto-instrumentation is already configured for this container
69-
if getIndexOfEnv(container.Env, envDotNetOTelAutoHome) > -1 {
72+
if getIndexOfEnv(allEnvs, envDotNetOTelAutoHome) > -1 {
7073
return pod, errors.New("OTEL_DOTNET_AUTO_HOME environment variable is already set in the container")
7174
}
7275

@@ -86,10 +89,9 @@ func injectDotNetSDK(dotNetSpec v1alpha1.DotNet, pod corev1.Pod, index int, runt
8689
return pod, fmt.Errorf("provided instrumentation.opentelemetry.io/dotnet-runtime annotation value '%s' is not supported", runtime)
8790
}
8891

89-
// inject .NET instrumentation spec env vars.
92+
// inject .NET instrumentation spec env vars with validation
9093
for _, env := range dotNetSpec.Env {
91-
idx := getIndexOfEnv(container.Env, env.Name)
92-
if idx == -1 {
94+
if shouldInjectEnvVar(allEnvs, env.Name) {
9395
container.Env = append(container.Env, env)
9496
}
9597
}

pkg/instrumentation/dotnet_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,12 @@ func TestInjectDotNetSDK(t *testing.T) {
539539

540540
for _, test := range tests {
541541
t.Run(test.name, func(t *testing.T) {
542-
pod, err := injectDotNetSDK(test.DotNet, test.pod, 0, test.runtime)
542+
// Pass container's env vars for validation
543+
allEnvs := []corev1.EnvVar{}
544+
if len(test.pod.Spec.Containers) > 0 {
545+
allEnvs = test.pod.Spec.Containers[0].Env
546+
}
547+
pod, err := injectDotNetSDK(test.DotNet, test.pod, 0, test.runtime, allEnvs)
543548
assert.Equal(t, test.expected, pod)
544549
assert.Equal(t, test.err, err)
545550
})

0 commit comments

Comments
 (0)