diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 62c97fa1088803..762183856eb69a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1207,6 +1207,15 @@ workflow: compare_to: $COMPARE_TO_BRANCH when: on_success +.on_admission_controller_or_e2e_changes: + - !reference [.on_e2e_main_release_or_rc] + - changes: + paths: + - pkg/clusteragent/admission/**/* + - pkg/util/kubernetes/cloudprovider/**/* + - test/new-e2e/tests/admission-controller/**/* + compare_to: $COMPARE_TO_BRANCH + .on_ssi_or_e2e_changes: - !reference [.on_e2e_main_release_or_rc] - changes: diff --git a/.gitlab/test/e2e/e2e.yml b/.gitlab/test/e2e/e2e.yml index 52cb46fa33fc2b..8d08ec47a12583 100644 --- a/.gitlab/test/e2e/e2e.yml +++ b/.gitlab/test/e2e/e2e.yml @@ -949,6 +949,37 @@ new-e2e-otel: TEAM: otel ON_NIGHTLY_FIPS: "true" +.new-e2e_admission_controller: + extends: .new_e2e_template + rules: + - !reference [.on_admission_controller_or_e2e_changes] + - !reference [.manual] + needs: + - !reference [.needs_new_e2e_template] + - qa_dca + - qa_agent_linux + variables: + TARGETS: ./tests/admission-controller + TEAM: container-integrations + +new-e2e-admission-controller-kind: + extends: .new-e2e_admission_controller + variables: + E2E_PROVISIONER: kind + E2E_STACK_NAME_SUFFIX: kind + +new-e2e-admission-controller-eks: + extends: .new-e2e_admission_controller + variables: + E2E_PROVISIONER: eks + E2E_STACK_NAME_SUFFIX: eks + +new-e2e-admission-controller-gke: + extends: .new-e2e_admission_controller + variables: + E2E_PROVISIONER: gke + E2E_STACK_NAME_SUFFIX: gke + .new-e2e_ssi: extends: .new_e2e_template rules: diff --git a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar.go b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar.go index 399edbf5fdcd46..4084e6daeab5cd 100644 --- a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar.go +++ b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar.go @@ -85,7 +85,7 @@ func NewWebhook(datadogConfig config.Component) *Webhook { nsSelector, objSelector := labelSelectors(datadogConfig, profileOverrides) - containerRegistry := mutatecommon.ContainerRegistry(datadogConfig, "admission_controller.agent_sidecar.container_registry") + containerRegistry := mutatecommon.ContainerRegistry(context.TODO(), datadogConfig, "admission_controller.agent_sidecar.container_registry") return &Webhook{ name: webhookName, diff --git a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go index 3437419f8518d7..c35044854c0973 100644 --- a/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go +++ b/pkg/clusteragent/admission/mutate/agent_sidecar/agent_sidecar_test.go @@ -697,8 +697,8 @@ func TestDefaultSidecarTemplateAgentImage(t *testing.T) { { name: "no configuration set", setConfig: func() model.Config { return configmock.New(t) }, - containerRegistry: commonRegistry, - expectedImage: commonRegistry + "/agent:latest", + containerRegistry: "registry.datadoghq.com", + expectedImage: "registry.datadoghq.com/agent:latest", }, { name: "setting custom registry, image and tag", diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go index 51b78df660a1aa..98fa1da87849ec 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/auto_instrumentation_test.go @@ -1381,7 +1381,7 @@ func TestAutoinstrumentation(t *testing.T) { shouldMutate: true, expected: &expected{ initContainerImages: []string{ - "gcr.io/datadoghq/dd-lib-php-init:v1", + "registry.datadoghq.com/dd-lib-php-init:v1", "docker.io/library/apm-inject-package:v27", }, containerNames: defaultContainerNames, @@ -1409,7 +1409,7 @@ func TestAutoinstrumentation(t *testing.T) { shouldMutate: true, expected: &expected{ initContainerImages: []string{ - "gcr.io/datadoghq/apm-inject:0", + "registry.datadoghq.com/apm-inject:0", "foo/bar:1", }, containerNames: defaultContainerNames, diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go index 7be211b96c210d..b63ad697916c7a 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/config.go @@ -8,6 +8,7 @@ package autoinstrumentation import ( + "context" "encoding/json" "errors" "fmt" @@ -125,7 +126,7 @@ func NewConfig(datadogConfig config.Component) (*Config, error) { return nil, fmt.Errorf("unable to parse init-container's resources from configuration: %w", err) } - containerRegistry := mutatecommon.ContainerRegistry(datadogConfig, "admission_controller.auto_instrumentation.container_registry") + containerRegistry := mutatecommon.ContainerRegistry(context.TODO(), datadogConfig, "admission_controller.auto_instrumentation.container_registry") mutateUnlabelled := datadogConfig.GetBool("admission_controller.mutate_unlabelled") return &Config{ diff --git a/pkg/clusteragent/admission/mutate/autoinstrumentation/imageresolver/config_test.go b/pkg/clusteragent/admission/mutate/autoinstrumentation/imageresolver/config_test.go index 8566991d65f58c..eb5989283f8850 100644 --- a/pkg/clusteragent/admission/mutate/autoinstrumentation/imageresolver/config_test.go +++ b/pkg/clusteragent/admission/mutate/autoinstrumentation/imageresolver/config_test.go @@ -33,7 +33,7 @@ func TestNewConfig(t *testing.T) { }, expectedState: Config{ Site: "datadoghq.com", - DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}}, + DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}, "datadoghq.azurecr.io": {}, "registry.datadoghq.com": {}}, BucketID: "2", DigestCacheTTL: 1 * time.Hour, Enabled: true, @@ -64,7 +64,7 @@ func TestNewConfig(t *testing.T) { }, expectedState: Config{ Site: "datad0g.com", - DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}}, + DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}, "datadoghq.azurecr.io": {}, "registry.datadoghq.com": {}}, BucketID: "2", DigestCacheTTL: 1 * time.Hour, Enabled: true, @@ -80,7 +80,7 @@ func TestNewConfig(t *testing.T) { }, expectedState: Config{ Site: "datadoghq.com", - DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}}, + DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}, "datadoghq.azurecr.io": {}, "registry.datadoghq.com": {}}, BucketID: "0", DigestCacheTTL: 1 * time.Hour, Enabled: true, @@ -97,7 +97,7 @@ func TestNewConfig(t *testing.T) { }, expectedState: Config{ Site: "datadoghq.com", - DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}}, + DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}, "datadoghq.azurecr.io": {}, "registry.datadoghq.com": {}}, BucketID: "0", DigestCacheTTL: 1 * time.Hour, Enabled: false, @@ -114,7 +114,7 @@ func TestNewConfig(t *testing.T) { }, expectedState: Config{ Site: "datadoghq.com", - DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}}, + DDRegistries: map[string]struct{}{"gcr.io/datadoghq": {}, "docker.io/datadog": {}, "public.ecr.aws/datadog": {}, "datadoghq.azurecr.io": {}, "registry.datadoghq.com": {}}, BucketID: "0", DigestCacheTTL: 2 * time.Hour, Enabled: true, diff --git a/pkg/clusteragent/admission/mutate/common/common.go b/pkg/clusteragent/admission/mutate/common/common.go index 5225dfdb20b383..1d5ddd2948f6c4 100644 --- a/pkg/clusteragent/admission/mutate/common/common.go +++ b/pkg/clusteragent/admission/mutate/common/common.go @@ -9,6 +9,7 @@ package common import ( + "context" "encoding/json" "fmt" "slices" @@ -21,6 +22,7 @@ import ( "github.com/DataDog/datadog-agent/comp/core/config" "github.com/DataDog/datadog-agent/pkg/clusteragent/admission/metrics" + "github.com/DataDog/datadog-agent/pkg/util/kubernetes/cloudprovider" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -195,15 +197,40 @@ func containsVolumeMount(volumeMounts []corev1.VolumeMount, element corev1.Volum return false } +// defaultRegistryForProvider returns the preferred container registry for the +// given cloud provider. This follows the Datadog recommendation of using +// cloud-provider-specific registries for lower latency and reduced egress. +func defaultRegistryForProvider(provider string) string { + switch provider { + case "eks": + return "public.ecr.aws/datadog" + case "gke": + return "gcr.io/datadoghq" + case "aks": + return "datadoghq.azurecr.io" + default: + return "registry.datadoghq.com" + } +} + // ContainerRegistry gets the container registry config using the specified // config option, and falls back to the default container registry if no -// webhook-specific container registry is set. -func ContainerRegistry(datadogConfig config.Component, specificConfigOpt string) string { - if datadogConfig.IsSet(specificConfigOpt) { +// webhook-specific container registry is set. If no global registry is +// explicitly configured, it auto-selects based on the detected cloud provider, +// falling back to registry.datadoghq.com. +func ContainerRegistry(ctx context.Context, datadogConfig config.Component, specificConfigOpt string) string { + if datadogConfig.IsConfigured(specificConfigOpt) { return datadogConfig.GetString(specificConfigOpt) } - return datadogConfig.GetString("admission_controller.container_registry") + if datadogConfig.IsConfigured("admission_controller.container_registry") { + return datadogConfig.GetString("admission_controller.container_registry") + } + + provider := cloudprovider.DCAGetName(ctx) + registry := defaultRegistryForProvider(provider) + log.Infof("Auto-detected cloud provider %q, using container registry %q", provider, registry) + return registry } // MarkVolumeAsSafeToEvictForAutoscaler adds the Kubernetes cluster-autoscaler diff --git a/pkg/clusteragent/admission/mutate/common/common_test.go b/pkg/clusteragent/admission/mutate/common/common_test.go index f8cd9b327ecdee..9dcd1720dbb914 100644 --- a/pkg/clusteragent/admission/mutate/common/common_test.go +++ b/pkg/clusteragent/admission/mutate/common/common_test.go @@ -8,12 +8,15 @@ package common import ( + "context" "reflect" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + + configmock "github.com/DataDog/datadog-agent/pkg/config/mock" ) func Test_contains(t *testing.T) { @@ -324,3 +327,66 @@ func TestMarkVolumeAsSafeToEvictForAutoscaler(t *testing.T) { } } + +func Test_defaultRegistryForProvider(t *testing.T) { + tests := []struct { + provider string + expected string + }{ + {"eks", "public.ecr.aws/datadog"}, + {"gke", "gcr.io/datadoghq"}, + {"aks", "datadoghq.azurecr.io"}, + {"", "registry.datadoghq.com"}, + {"unknown", "registry.datadoghq.com"}, + } + + for _, tt := range tests { + t.Run("provider_"+tt.provider, func(t *testing.T) { + assert.Equal(t, tt.expected, defaultRegistryForProvider(tt.provider)) + }) + } +} + +func Test_ContainerRegistry(t *testing.T) { + tests := []struct { + name string + specificKey string + specificValue string + globalValue string + expectedPrefix string + }{ + { + name: "webhook-specific config wins", + specificKey: "admission_controller.auto_instrumentation.container_registry", + specificValue: "my-custom-registry.io/datadog", + globalValue: "global-registry.io/datadog", + expectedPrefix: "my-custom-registry.io/datadog", + }, + { + name: "global config used when no specific config", + specificKey: "admission_controller.auto_instrumentation.container_registry", + globalValue: "global-registry.io/datadog", + expectedPrefix: "global-registry.io/datadog", + }, + { + name: "auto-detection when no config set", + specificKey: "admission_controller.auto_instrumentation.container_registry", + expectedPrefix: "registry.datadoghq.com", // no cloud provider detected in tests → default + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockConfig := configmock.New(t) + if tt.specificValue != "" { + mockConfig.SetWithoutSource(tt.specificKey, tt.specificValue) + } + if tt.globalValue != "" { + mockConfig.SetWithoutSource("admission_controller.container_registry", tt.globalValue) + } + + result := ContainerRegistry(context.TODO(), mockConfig, tt.specificKey) + assert.Equal(t, tt.expectedPrefix, result) + }) + } +} diff --git a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go index 1bc9a79238d86c..57fa2fd0770749 100644 --- a/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go +++ b/pkg/clusteragent/admission/mutate/cwsinstrumentation/cws_instrumentation.go @@ -348,7 +348,7 @@ func NewCWSInstrumentation(wmeta workloadmeta.Component, datadogConfig config.Co cwsInjectorImageName := pkgconfigsetup.Datadog().GetString("admission_controller.cws_instrumentation.image_name") cwsInjectorImageTag := pkgconfigsetup.Datadog().GetString("admission_controller.cws_instrumentation.image_tag") - cwsInjectorContainerRegistry := mutatecommon.ContainerRegistry(datadogConfig, "admission_controller.cws_instrumentation.container_registry") + cwsInjectorContainerRegistry := mutatecommon.ContainerRegistry(context.TODO(), datadogConfig, "admission_controller.cws_instrumentation.container_registry") if len(cwsInjectorImageName) == 0 { return nil, errors.New("can't initialize CWS Instrumentation without an image_name") diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index b30bb72d4c8deb..e9732c7fd6ac9c 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -951,7 +951,7 @@ func InitConfig(config pkgconfigmodel.Setup) { config.BindEnvAndSetDefault("admission_controller.mutation.enabled", true) config.BindEnvAndSetDefault("admission_controller.mutate_unlabelled", false) config.BindEnvAndSetDefault("admission_controller.port", 8000) - config.BindEnvAndSetDefault("admission_controller.container_registry", "gcr.io/datadoghq") + config.BindEnvAndSetDefault("admission_controller.container_registry", "registry.datadoghq.com") config.BindEnvAndSetDefault("admission_controller.timeout_seconds", 10) // in seconds (see kubernetes/kubernetes#71508) config.BindEnvAndSetDefault("admission_controller.service_name", "datadog-admission-controller") config.BindEnvAndSetDefault("admission_controller.certificate.validity_bound", 365*24) // validity bound of the certificate created by the controller (in hours, default 1 year) @@ -984,6 +984,8 @@ func InitConfig(config pkgconfigmodel.Setup) { "gcr.io/datadoghq", "docker.io/datadog", "public.ecr.aws/datadog", + "datadoghq.azurecr.io", + "registry.datadoghq.com", }) config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.gradual_rollout.enabled", true) config.BindEnvAndSetDefault("admission_controller.auto_instrumentation.gradual_rollout.cache_ttl", "1h") diff --git a/releasenotes-dca/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml b/releasenotes-dca/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml new file mode 100644 index 00000000000000..c6b3a4bd4798cf --- /dev/null +++ b/releasenotes-dca/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml @@ -0,0 +1,13 @@ +--- +enhancements: + - | + The admission controller now auto-selects the container registry based on the + detected cloud provider (EKS uses ``public.ecr.aws/datadog``, GKE uses + ``gcr.io/datadoghq``, AKS uses ``datadoghq.azurecr.io``). The new default for + environments where no cloud provider is detected is ``registry.datadoghq.com`` + (previously ``gcr.io/datadoghq``). Explicit configuration via + ``admission_controller.container_registry`` or webhook-specific overrides still + takes precedence. To restore the previous default, set + ``admission_controller.container_registry`` to ``gcr.io/datadoghq``. See + `Changing your container registry `_ + for all available registries and configuration options. diff --git a/releasenotes/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml b/releasenotes/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml new file mode 100644 index 00000000000000..a3845fb91092f9 --- /dev/null +++ b/releasenotes/notes/admission-controller-auto-registry-detection-3e96f82298a87d08.yaml @@ -0,0 +1,11 @@ +--- +enhancements: + - | + The default container registry for the admission controller has changed from + ``gcr.io/datadoghq`` to ``registry.datadoghq.com``. On managed Kubernetes + environments, the cluster agent now auto-selects a cloud-provider-specific + registry (EKS: ``public.ecr.aws/datadog``, GKE: ``gcr.io/datadoghq``, AKS: + ``datadoghq.azurecr.io``). To restore the previous default, set + ``admission_controller.container_registry`` to ``gcr.io/datadoghq``. See + `Changing your container registry `_ + for all available registries and configuration options. diff --git a/test/new-e2e/tests/admission-controller/provisioner.go b/test/new-e2e/tests/admission-controller/provisioner.go new file mode 100644 index 00000000000000..aa6c980b798a79 --- /dev/null +++ b/test/new-e2e/tests/admission-controller/provisioner.go @@ -0,0 +1,127 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package admissioncontroller + +import ( + "strings" + + "github.com/DataDog/datadog-agent/test/e2e-framework/components/datadog/kubernetesagentparams" + kubeComp "github.com/DataDog/datadog-agent/test/e2e-framework/components/kubernetes" + scenarioeks "github.com/DataDog/datadog-agent/test/e2e-framework/scenarios/aws/eks" + "github.com/DataDog/datadog-agent/test/e2e-framework/scenarios/aws/kindvm" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/environments" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners" + proveks "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners/aws/kubernetes/eks" + provkindvm "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners/aws/kubernetes/kindvm" + provgke "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners/gcp/kubernetes" + localkind "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners/local/kubernetes" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/runner" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/runner/parameters" +) + +// ProvisionerType represents the type of Kubernetes provisioner to use. +type ProvisionerType string + +const ( + // ProvisionerKindAWS uses Kind running on an AWS VM (default). + ProvisionerKindAWS ProvisionerType = "kind" + // ProvisionerKindLocal uses Kind running locally. + ProvisionerKindLocal ProvisionerType = "kind-local" + // ProvisionerEKS uses Amazon EKS. + ProvisionerEKS ProvisionerType = "eks" + // ProvisionerGKE uses Google Kubernetes Engine. + ProvisionerGKE ProvisionerType = "gke" +) + +// ProvisionerOptions contains common options for Kubernetes provisioners. +type ProvisionerOptions struct { + AgentOptions []kubernetesagentparams.Option + WorkloadAppFunc kubeComp.WorkloadAppFunc + AgentDependentWorkloadAppFunc kubeComp.AgentDependentWorkloadAppFunc +} + +// Provisioner returns a Kubernetes provisioner based on E2E_PROVISIONER and E2E_DEV_LOCAL parameters. +func Provisioner(opts ProvisionerOptions) provisioners.TypedProvisioner[environments.Kubernetes] { + switch getProvisionerType() { + case ProvisionerKindLocal: + return localKindProvisioner(opts) + case ProvisionerEKS: + return eksProvisioner(opts) + case ProvisionerGKE: + return gkeProvisioner(opts) + default: + return awsKindProvisioner(opts) + } +} + +func getProvisionerType() ProvisionerType { + provisioner, err := runner.GetProfile().ParamStore().GetWithDefault(parameters.Provisioner, "") + if err == nil && provisioner != "" { + return ProvisionerType(strings.ToLower(provisioner)) + } + devLocal, err := runner.GetProfile().ParamStore().GetBoolWithDefault(parameters.DevLocal, false) + if err == nil && devLocal { + return ProvisionerKindLocal + } + return ProvisionerKindAWS +} + +func localKindProvisioner(opts ProvisionerOptions) provisioners.TypedProvisioner[environments.Kubernetes] { + var localOpts []localkind.ProvisionerOption + if len(opts.AgentOptions) > 0 { + localOpts = append(localOpts, localkind.WithAgentOptions(opts.AgentOptions...)) + } + if opts.WorkloadAppFunc != nil { + localOpts = append(localOpts, localkind.WithWorkloadApp(opts.WorkloadAppFunc)) + } + if opts.AgentDependentWorkloadAppFunc != nil { + localOpts = append(localOpts, localkind.WithAgentDependentWorkloadApp(opts.AgentDependentWorkloadAppFunc)) + } + return localkind.Provisioner(localOpts...) +} + +func awsKindProvisioner(opts ProvisionerOptions) provisioners.TypedProvisioner[environments.Kubernetes] { + var runOpts []kindvm.RunOption + if len(opts.AgentOptions) > 0 { + runOpts = append(runOpts, kindvm.WithAgentOptions(opts.AgentOptions...)) + } + if opts.WorkloadAppFunc != nil { + runOpts = append(runOpts, kindvm.WithWorkloadApp(opts.WorkloadAppFunc)) + } + if opts.AgentDependentWorkloadAppFunc != nil { + runOpts = append(runOpts, kindvm.WithAgentDependentWorkloadApp(opts.AgentDependentWorkloadAppFunc)) + } + return provkindvm.Provisioner(provkindvm.WithRunOptions(runOpts...)) +} + +func eksProvisioner(opts ProvisionerOptions) provisioners.TypedProvisioner[environments.Kubernetes] { + var runOpts []scenarioeks.RunOption + runOpts = append(runOpts, scenarioeks.WithEKSOptions(scenarioeks.WithLinuxNodeGroup())) + if len(opts.AgentOptions) > 0 { + runOpts = append(runOpts, scenarioeks.WithAgentOptions(opts.AgentOptions...)) + } + if opts.WorkloadAppFunc != nil { + runOpts = append(runOpts, scenarioeks.WithWorkloadApp(opts.WorkloadAppFunc)) + } + if opts.AgentDependentWorkloadAppFunc != nil { + runOpts = append(runOpts, scenarioeks.WithAgentDependentWorkloadApp(opts.AgentDependentWorkloadAppFunc)) + } + return proveks.Provisioner(proveks.WithRunOptions(runOpts...)) +} + +func gkeProvisioner(opts ProvisionerOptions) provisioners.TypedProvisioner[environments.Kubernetes] { + var gkeOpts []provgke.ProvisionerOption + if len(opts.AgentOptions) > 0 { + gkeOpts = append(gkeOpts, provgke.WithAgentOptions(opts.AgentOptions...)) + } + if opts.WorkloadAppFunc != nil { + gkeOpts = append(gkeOpts, provgke.WithWorkloadApp(provgke.WorkloadAppFunc(opts.WorkloadAppFunc))) + } + if opts.AgentDependentWorkloadAppFunc != nil { + gkeOpts = append(gkeOpts, provgke.WithAgentDependentWorkloadApp(opts.AgentDependentWorkloadAppFunc)) + } + return provgke.GKEProvisioner(gkeOpts...) +} diff --git a/test/new-e2e/tests/admission-controller/registry_test.go b/test/new-e2e/tests/admission-controller/registry_test.go new file mode 100644 index 00000000000000..fdcb8a649e071d --- /dev/null +++ b/test/new-e2e/tests/admission-controller/registry_test.go @@ -0,0 +1,160 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package admissioncontroller contains E2E tests for admission controller features. +package admissioncontroller + +import ( + "context" + _ "embed" + "strings" + "testing" + "time" + + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes" + "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/DataDog/datadog-agent/test/e2e-framework/common/config" + "github.com/DataDog/datadog-agent/test/e2e-framework/components/datadog/apps/singlestep" + "github.com/DataDog/datadog-agent/test/e2e-framework/components/datadog/kubernetesagentparams" + compkube "github.com/DataDog/datadog-agent/test/e2e-framework/components/kubernetes" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/e2e" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/environments" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/runner" + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/runner/parameters" +) + +//go:embed testdata/helm_values.yaml +var helmValues string + +// expectedRegistry returns the container registry the admission controller +// should auto-select for the current E2E provisioner. +func expectedRegistry() string { + provisioner, err := runner.GetProfile().ParamStore().GetWithDefault(parameters.Provisioner, "") + if err == nil { + switch strings.ToLower(provisioner) { + case "eks": + return "public.ecr.aws/datadog" + case "gke": + return "gcr.io/datadoghq" + case "aks": + return "datadoghq.azurecr.io" + } + } + // Kind (local or AWS) — no cloud provider detected. + return "registry.datadoghq.com" +} + +// registrySuite verifies that the admission controller uses the correct +// container registry based on the detected cloud provider when no explicit +// registry is configured. +type registrySuite struct { + e2e.BaseSuite[environments.Kubernetes] +} + +func TestRegistryAutoDetection(t *testing.T) { + t.Parallel() + e2e.Run(t, ®istrySuite{}, e2e.WithProvisioner(Provisioner(ProvisionerOptions{ + AgentOptions: []kubernetesagentparams.Option{ + kubernetesagentparams.WithHelmValues(helmValues), + }, + AgentDependentWorkloadAppFunc: func(e config.Env, kubeProvider *kubernetes.Provider, dependsOnAgent pulumi.ResourceOption) (*compkube.Workload, error) { + return singlestep.Scenario(e, kubeProvider, "registry-test", []singlestep.Namespace{ + { + Name: "registry-test", + Apps: []singlestep.App{ + { + Name: "registry-test-app", + Image: "registry.datadoghq.com/injector-dev/python", + Version: "16ad9d4b", + Port: 8080, + }, + }, + }, + }, dependsOnAgent) + }, + }))) +} + +// TestInitContainerRegistry verifies that init containers injected by the +// admission controller use the expected cloud-provider-specific registry. +func (s *registrySuite) TestInitContainerRegistry() { + k8s := s.Env().KubernetesCluster.Client() + expected := expectedRegistry() + s.T().Logf("Expecting init container images from registry %q", expected) + + require.EventuallyWithT(s.T(), func(c *assert.CollectT) { + pods, err := k8s.CoreV1().Pods("registry-test").List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=registry-test-app", + }) + require.NoError(c, err, "failed to list pods") + require.NotEmpty(c, pods.Items, "no pods found for registry-test-app") + + pod := pods.Items[0] + require.NotEmpty(c, pod.Spec.InitContainers, "pod has no init containers — admission controller may not have mutated it") + + for _, ic := range pod.Spec.InitContainers { + if !isDatadogInitContainer(ic.Name) { + continue + } + require.True(c, strings.HasPrefix(ic.Image, expected+"/"), + "init container %q image %q does not use expected registry %q", ic.Name, ic.Image, expected) + } + }, 3*time.Minute, 10*time.Second) +} + +// TestInitContainersRunning verifies that all Datadog-injected init containers +// complete successfully and the pod is not stuck in ImagePullBackOff or +// ErrImagePull. +func (s *registrySuite) TestInitContainersRunning() { + k8s := s.Env().KubernetesCluster.Client() + + require.EventuallyWithT(s.T(), func(c *assert.CollectT) { + pods, err := k8s.CoreV1().Pods("registry-test").List(context.Background(), metav1.ListOptions{ + LabelSelector: "app=registry-test-app", + }) + require.NoError(c, err, "failed to list pods") + require.NotEmpty(c, pods.Items, "no pods found for registry-test-app") + + pod := pods.Items[0] + + // Check that no init container is in a waiting state with an image pull error. + for _, cs := range pod.Status.InitContainerStatuses { + if !isDatadogInitContainer(cs.Name) { + continue + } + if cs.State.Waiting != nil { + require.NotContains(c, cs.State.Waiting.Reason, "ImagePullBackOff", + "init container %q is in ImagePullBackOff: %s", cs.Name, cs.State.Waiting.Message) + require.NotContains(c, cs.State.Waiting.Reason, "ErrImagePull", + "init container %q has ErrImagePull: %s", cs.Name, cs.State.Waiting.Message) + } + } + + // Verify the pod reaches Running or Succeeded phase (init containers completed). + require.Contains(c, []corev1.PodPhase{corev1.PodRunning, corev1.PodSucceeded}, pod.Status.Phase, + "pod phase is %q, expected Running or Succeeded", pod.Status.Phase) + }, 5*time.Minute, 10*time.Second) +} + +// isDatadogInitContainer returns true for init container names that are +// typically injected by the Datadog admission controller. +func isDatadogInitContainer(name string) bool { + prefixes := []string{ + "datadog-lib-", + "datadog-init-", + "dd-lib-", + } + for _, p := range prefixes { + if strings.HasPrefix(name, p) { + return true + } + } + return false +} diff --git a/test/new-e2e/tests/admission-controller/testdata/helm_values.yaml b/test/new-e2e/tests/admission-controller/testdata/helm_values.yaml new file mode 100644 index 00000000000000..77c6c1383b897e --- /dev/null +++ b/test/new-e2e/tests/admission-controller/testdata/helm_values.yaml @@ -0,0 +1,19 @@ +--- +clusterAgent: + admissionController: + configMode: "hostip" + +datadog: + apm: + instrumentation: + enabled: true + injector: + imageTag: "0.54.0" + enabledNamespaces: [] + targets: + - name: "apps" + namespaceSelector: + matchNames: + - "registry-test" + ddTraceVersions: + python: "v3.18.1"