Skip to content

Commit 7dac202

Browse files
authored
add 'report all images' functionality, enable by default in embedded cluster installations (#329)
* add 'report all images' functionality, enable by default in embedded cluster installations * test report all images functionality * f? * Revert "f?" This reverts commit 41251b4. * use fully qualified images in test chart * make image filtering more permissive - ignore domains * retry image check * tests f * cross namespace pod image reporting test * track images in all namespaces when appropriate * mock fix * only watch pods in namespaces we have permissions for * Apply suggestion from @laverya * fix image name generation * make image name handling more robust * detect image names with tags from pod specf * remove digest from spec image if present to avoid doubling it
1 parent 1f380a1 commit 7dac202

File tree

14 files changed

+617
-37
lines changed

14 files changed

+617
-37
lines changed

chart/templates/replicated-secret.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ stringData:
4545
replicatedID: {{ .Values.replicatedID | default "" | quote }}
4646
appID: {{ .Values.appID | default "" | quote }}
4747
tlsCertSecretName: {{ .Values.tlsCertSecretName | default "" | quote }}
48+
{{- if hasKey .Values "reportAllImages" }}
49+
reportAllImages: {{ .Values.reportAllImages }}
50+
{{- end }}
4851
{{- if (.Values.integration).licenseID }}
4952
integration-license-id: {{ .Values.integration.licenseID }}
5053
{{- end }}

chart/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,8 @@ tlsCertSecretName: ""
300300
# Use a more restrictive RBAC policy for the Replicated SDK. This requires setting statusInformers directly
301301
# in order to generate the correct RBAC rules.
302302
minimalRBAC: false
303+
304+
# Report all images running in the cluster, not just those in the releaseImages list
305+
# When true, the SDK will report every image+digest in the cluster regardless of the releaseImages filter
306+
# When false (default), only images matching the releaseImages list will be reported
307+
reportAllImages: false

cmd/replicated/api.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ func APICmd() *cobra.Command {
7575
ReplicatedID: replicatedConfig.ReplicatedID,
7676
AppID: replicatedConfig.AppID,
7777
TlsCertSecretName: replicatedConfig.TlsCertSecretName,
78+
ReportAllImages: replicatedConfig.ReportAllImages,
7879
Namespace: namespace,
7980
}
8081
apiserver.Start(params)

dagger/e2e.go

Lines changed: 180 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -475,16 +475,16 @@ spec:
475475

476476
fmt.Printf("E2E test for distribution %s and version %s passed\n", distribution, version)
477477

478-
// Validate running images via vendor API
478+
// Validate running images via vendor API (with default filtering)
479479
// 1. Call vendor API to get running images for this instance
480480
imagesSet, err := getRunningImages(ctx, appID, customerID, instanceAppID, tokenPlaintext)
481481
if err != nil {
482482
return fmt.Errorf("failed to get running images: %w", err)
483483
}
484484

485-
// 2. Validate expected images
485+
// 2. Validate expected images (should only contain images from releaseImages list)
486486
required := []string{"docker.io/library/nginx:latest", "docker.io/library/nginx:alpine", strings.TrimSpace(sdkImage)}
487-
forbidden := []string{"docker.io/alpine/curl:latest"}
487+
forbidden := []string{"docker.io/alpine/curl:latest"} // This is from replicated-ssl-test, should be filtered out
488488
missing := []string{}
489489
for _, img := range required {
490490
if img == "" {
@@ -504,10 +504,186 @@ spec:
504504
}
505505
for _, img := range forbidden {
506506
if _, ok := imagesSet[img]; ok {
507-
return fmt.Errorf("running images contains forbidden entry: %s", img)
507+
return fmt.Errorf("running images contains forbidden entry: %s (should be filtered out)", img)
508508
}
509509
}
510510

511+
fmt.Println("Default image filtering test passed - only releaseImages are reported")
512+
513+
// Test reportAllImages functionality
514+
fmt.Println("Testing reportAllImages=true functionality...")
515+
516+
// Create a ClusterRole and ClusterRoleBinding to allow the SDK to read pods across all namespaces
517+
fmt.Println("Creating ClusterRole for cross-namespace pod access...")
518+
clusterRoleYaml := `apiVersion: rbac.authorization.k8s.io/v1
519+
kind: ClusterRole
520+
metadata:
521+
name: replicated-cross-namespace-reader
522+
rules:
523+
- apiGroups:
524+
- ""
525+
resources:
526+
- "namespaces"
527+
verbs:
528+
- "list"
529+
- apiGroups:
530+
- ""
531+
resources:
532+
- "pods"
533+
verbs:
534+
- "list"
535+
- "watch"`
536+
clusterRoleSource := source.WithNewFile("/clusterrole.yaml", clusterRoleYaml)
537+
538+
ctr = dag.Container().From("bitnami/kubectl:latest").
539+
WithFile(kubeconfigPath, kubeconfigSource.File("/kubeconfig")).
540+
WithEnvVariable("KUBECONFIG", kubeconfigPath).
541+
WithFile("/tmp/clusterrole.yaml", clusterRoleSource.File("/clusterrole.yaml")).
542+
WithExec([]string{"kubectl", "apply", "-f", "/tmp/clusterrole.yaml"})
543+
out, err = ctr.Stdout(ctx)
544+
if err != nil {
545+
stderr, _ := ctr.Stderr(ctx)
546+
return fmt.Errorf("failed to apply clusterrole: %w\n\nStderr: %s\n\nStdout: %s", err, stderr, out)
547+
}
548+
fmt.Println(out)
549+
550+
// Create ClusterRoleBinding
551+
clusterRoleBindingYaml := `apiVersion: rbac.authorization.k8s.io/v1
552+
kind: ClusterRoleBinding
553+
metadata:
554+
name: replicated-cross-namespace-reader-binding
555+
roleRef:
556+
apiGroup: rbac.authorization.k8s.io
557+
kind: ClusterRole
558+
name: replicated-cross-namespace-reader
559+
subjects:
560+
- kind: ServiceAccount
561+
name: replicated
562+
namespace: default`
563+
clusterRoleBindingSource := source.WithNewFile("/clusterrolebinding.yaml", clusterRoleBindingYaml)
564+
565+
ctr = dag.Container().From("bitnami/kubectl:latest").
566+
WithFile(kubeconfigPath, kubeconfigSource.File("/kubeconfig")).
567+
WithEnvVariable("KUBECONFIG", kubeconfigPath).
568+
WithFile("/tmp/clusterrolebinding.yaml", clusterRoleBindingSource.File("/clusterrolebinding.yaml")).
569+
WithExec([]string{"kubectl", "apply", "-f", "/tmp/clusterrolebinding.yaml"})
570+
out, err = ctr.Stdout(ctx)
571+
if err != nil {
572+
stderr, _ := ctr.Stderr(ctx)
573+
return fmt.Errorf("failed to apply clusterrolebinding: %w\n\nStderr: %s\n\nStdout: %s", err, stderr, out)
574+
}
575+
fmt.Println(out)
576+
577+
// Deploy a test pod in kube-system namespace to verify cross-namespace image reporting
578+
fmt.Println("Deploying test pod in kube-system namespace...")
579+
kubeSystemPodYaml := `apiVersion: v1
580+
kind: Pod
581+
metadata:
582+
name: replicated-test-pod
583+
namespace: kube-system
584+
labels:
585+
app: replicated-test
586+
spec:
587+
containers:
588+
- name: busybox
589+
image: docker.io/library/busybox:latest
590+
command: ["sleep", "500d"]`
591+
kubeSystemPodSource := source.WithNewFile("/kube-system-pod.yaml", kubeSystemPodYaml)
592+
593+
ctr = dag.Container().From("bitnami/kubectl:latest").
594+
WithFile(kubeconfigPath, kubeconfigSource.File("/kubeconfig")).
595+
WithEnvVariable("KUBECONFIG", kubeconfigPath).
596+
WithFile("/tmp/kube-system-pod.yaml", kubeSystemPodSource.File("/kube-system-pod.yaml")).
597+
WithExec([]string{"kubectl", "apply", "-f", "/tmp/kube-system-pod.yaml"})
598+
out, err = ctr.Stdout(ctx)
599+
if err != nil {
600+
stderr, _ := ctr.Stderr(ctx)
601+
return fmt.Errorf("failed to apply kube-system pod: %w\n\nStderr: %s\n\nStdout: %s", err, stderr, out)
602+
}
603+
fmt.Println(out)
604+
605+
// Wait for the kube-system pod to be ready
606+
ctr = dag.Container().From("bitnami/kubectl:latest").
607+
WithFile(kubeconfigPath, kubeconfigSource.File("/kubeconfig")).
608+
WithEnvVariable("KUBECONFIG", kubeconfigPath).
609+
WithExec([]string{"kubectl", "wait", "--for=condition=ready", "pod/replicated-test-pod", "-n", "kube-system", "--timeout=1m"})
610+
out, err = ctr.Stdout(ctx)
611+
if err != nil {
612+
return fmt.Errorf("failed to wait for kube-system pod to be ready: %w", err)
613+
}
614+
fmt.Println(out)
615+
616+
// Upgrade the chart to enable reportAllImages
617+
err = upgradeChartAndRestart(ctx, kubeconfigSource, licenseID, channelSlug, []string{
618+
"--set", "replicated.tlsCertSecretName=test-tls",
619+
"--set", "replicated.reportAllImages=true",
620+
})
621+
if err != nil {
622+
return fmt.Errorf("failed to upgrade chart enabling reportAllImages: %w", err)
623+
}
624+
625+
err = waitForResourcesReady(ctx, newResourceNames, 30, 5*time.Second, tokenPlaintext, instanceAppID, distribution)
626+
if err != nil {
627+
return fmt.Errorf("failed to wait for resources to be ready: %w", err)
628+
}
629+
630+
// Get running images again with retries, to allow server to refresh image reporting
631+
var allImagesSet map[string]struct{}
632+
// Now we expect both the alpine/curl from default namespace AND busybox from kube-system
633+
nowRequired := []string{
634+
"docker.io/alpine/curl:latest", // from replicated-ssl-test in default namespace
635+
"docker.io/library/busybox:latest", // from kube-system namespace
636+
}
637+
maxAttempts := 5
638+
retryDelay := 5 * time.Second
639+
for attempt := 1; attempt <= maxAttempts; attempt++ {
640+
var getErr error
641+
allImagesSet, getErr = getRunningImages(ctx, appID, customerID, instanceAppID, tokenPlaintext)
642+
if getErr != nil {
643+
if attempt == maxAttempts {
644+
return fmt.Errorf("failed to get running images after enabling reportAllImages (after %d attempts): %w", maxAttempts, getErr)
645+
}
646+
fmt.Printf("attempt %d/%d: failed to get running images: %v\n", attempt, maxAttempts, getErr)
647+
time.Sleep(retryDelay)
648+
continue
649+
}
650+
651+
// Validate that previously excluded images are now present
652+
// The alpine/curl image from replicated-ssl-test should now be reported
653+
missing = []string{}
654+
for _, img := range nowRequired {
655+
if _, ok := allImagesSet[img]; !ok {
656+
missing = append(missing, img)
657+
}
658+
}
659+
if len(missing) == 0 {
660+
break
661+
}
662+
663+
if attempt == maxAttempts {
664+
seen := make([]string, 0, len(allImagesSet))
665+
for k := range allImagesSet {
666+
seen = append(seen, k)
667+
}
668+
return fmt.Errorf("with reportAllImages=true, missing expected images after %d attempts: %v. Seen: %v", maxAttempts, missing, seen)
669+
}
670+
671+
fmt.Printf("attempt %d/%d: still missing expected images with reportAllImages=true: %v (retrying)\n", attempt, maxAttempts, missing)
672+
time.Sleep(retryDelay)
673+
}
674+
675+
// Should still have the original required images
676+
for _, img := range required {
677+
if img == "" {
678+
continue
679+
}
680+
if _, ok := allImagesSet[img]; !ok {
681+
return fmt.Errorf("with reportAllImages=true, missing original required image: %s", img)
682+
}
683+
}
684+
685+
fmt.Printf("reportAllImages test passed - found %d total images (vs %d with filtering)\n", len(allImagesSet), len(imagesSet))
686+
511687
return nil
512688
}
513689

pkg/apiserver/bootstrap.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/replicatedhq/replicated-sdk/pkg/k8sutil"
1515
sdklicense "github.com/replicatedhq/replicated-sdk/pkg/license"
1616
"github.com/replicatedhq/replicated-sdk/pkg/logger"
17+
"github.com/replicatedhq/replicated-sdk/pkg/report"
18+
reporttypes "github.com/replicatedhq/replicated-sdk/pkg/report/types"
1719
"github.com/replicatedhq/replicated-sdk/pkg/store"
1820
"github.com/replicatedhq/replicated-sdk/pkg/upstream"
1921
upstreamtypes "github.com/replicatedhq/replicated-sdk/pkg/upstream/types"
@@ -44,6 +46,16 @@ func bootstrap(params APIServerParams) error {
4446
log.Println("replicatedID:", replicatedID)
4547
log.Println("appID:", appID)
4648

49+
// In Embedded Cluster installations, automatically enable reporting all images
50+
reportAllImages := params.ReportAllImages
51+
if !reportAllImages {
52+
distribution := report.GetDistribution(clientset)
53+
if distribution == reporttypes.EmbeddedCluster {
54+
reportAllImages = true
55+
log.Println("Detected Embedded Cluster installation, enabling reportAllImages")
56+
}
57+
}
58+
4759
var unverifiedLicense *kotsv1beta1.License
4860
if len(params.LicenseBytes) > 0 {
4961
l, err := sdklicense.LoadLicenseFromBytes(params.LicenseBytes)
@@ -111,6 +123,7 @@ func bootstrap(params APIServerParams) error {
111123
Namespace: params.Namespace,
112124
ReplicatedID: replicatedID,
113125
AppID: appID,
126+
ReportAllImages: reportAllImages,
114127
})
115128

116129
isIntegrationModeEnabled, err := integration.IsEnabled(params.Context, clientset, store.GetStore().GetNamespace(), store.GetStore().GetLicense())

pkg/apiserver/server.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type APIServerParams struct {
4040
AppID string
4141
Namespace string
4242
TlsCertSecretName string
43+
ReportAllImages bool
4344
}
4445

4546
func Start(params APIServerParams) {

0 commit comments

Comments
 (0)