Skip to content

Commit e407646

Browse files
fix(templates): e2e fixes for RPM-based distros (Rocky 9)
Fixes discovered during end-to-end testing of Rocky Linux 9 with all three container runtimes (Docker, containerd, CRI-O) + Kubernetes. Fixes: - CRI-O: add crun and containers-common dependencies for RHEL-family (opensuse package doesn't pull OCI runtime deps on RPM) - CRI-O: fix repo file name format (isv:cri-o:stable:vX.Y.repo) - Kubernetes/Calico: patch Tigera operator with hostNetwork + node IP to resolve CNI bootstrap chicken-and-egg problem (operator can't reach API server via cluster IP before CNI is installed) - Containerd: set sandbox_image to pause:3.10 for K8s 1.33+ compat - Common: add /usr/local/bin to sudo secure_path on RHEL-family - AMI resolver: per-OS arch mapping instead of hardcoded Ubuntu logic Also adds Rocky 9 + AL2023 e2e test data files and test entries for all three container runtimes. Tested: Rocky 9 + Docker (PASS), Rocky 9 + CRI-O (PASS), Rocky 9 + Containerd (PASS) Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
1 parent cf6ea28 commit e407646

17 files changed

+412
-35
lines changed

internal/ami/registry.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ var registry = map[string]OSImage{
3333
NamePattern: "ubuntu/images/hvm-ssd-gp3/ubuntu-noble-24.04-%s-server-*",
3434
SSMPath: "/aws/service/canonical/ubuntu/server/24.04/stable/" +
3535
"current/%s/hvm/ebs-gp3/ami-id",
36+
NameArchMap: map[string]string{"x86_64": "amd64"},
3637
Architectures: []string{"x86_64", "arm64"},
3738
},
3839
"ubuntu-22.04": {
@@ -46,6 +47,7 @@ var registry = map[string]OSImage{
4647
NamePattern: "ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-%s-server-*",
4748
SSMPath: "/aws/service/canonical/ubuntu/server/22.04/stable/" +
4849
"current/%s/hvm/ebs-gp3/ami-id",
50+
NameArchMap: map[string]string{"x86_64": "amd64"},
4951
Architectures: []string{"x86_64", "arm64"},
5052
},
5153
"amazon-linux-2023": {
@@ -69,7 +71,7 @@ var registry = map[string]OSImage{
6971
PackageManager: PackageManagerDNF,
7072
MinRootVolumeGB: 20,
7173
OwnerID: "792107900819",
72-
NamePattern: "Rocky-9-EC2-Base-*.%s-*",
74+
NamePattern: "Rocky-9-EC2-Base-*.%s",
7375
SSMPath: "", // No SSM support for Rocky Linux
7476
Architectures: []string{"x86_64", "arm64"},
7577
},
@@ -84,6 +86,7 @@ var registry = map[string]OSImage{
8486
NamePattern: "ubuntu/images/hvm-ssd/ubuntu-focal-20.04-%s-server-*",
8587
SSMPath: "/aws/service/canonical/ubuntu/server/20.04/stable/" +
8688
"current/%s/hvm/ebs-gp2/ami-id",
89+
NameArchMap: map[string]string{"x86_64": "amd64"},
8790
Architectures: []string{"x86_64", "arm64"},
8891
},
8992
}

internal/ami/resolver.go

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,14 @@ func (r *Resolver) resolveViaDescribeImages(
147147
osImage *OSImage,
148148
arch string,
149149
) (string, error) {
150-
// Convert architecture for AMI name pattern (Ubuntu uses "amd64" not "x86_64")
151-
nameArch := archToAMINameArch(arch)
150+
// Convert architecture for AMI name pattern if the OS has a custom mapping
151+
// (e.g., Ubuntu uses "amd64" instead of "x86_64" in AMI names)
152+
nameArch := arch
153+
if osImage.NameArchMap != nil {
154+
if mapped, ok := osImage.NameArchMap[arch]; ok {
155+
nameArch = mapped
156+
}
157+
}
152158
namePattern := fmt.Sprintf(osImage.NamePattern, nameArch)
153159

154160
filters := []types.Filter{
@@ -231,17 +237,6 @@ func archToSSMArch(arch string) string {
231237
}
232238
}
233239

234-
// archToAMINameArch converts EC2 architecture names to AMI name pattern format.
235-
// Some vendors (like Ubuntu) use "amd64" in AMI names instead of "x86_64".
236-
func archToAMINameArch(arch string) string {
237-
switch arch {
238-
case "x86_64":
239-
return "amd64"
240-
default:
241-
return arch
242-
}
243-
}
244-
245240
// contains checks if a string slice contains a specific item.
246241
func contains(slice []string, item string) bool {
247242
for _, s := range slice {

internal/ami/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ type OSImage struct {
7979
// Use %s as a placeholder for architecture. Empty if SSM is not supported.
8080
SSMPath string
8181

82+
// NameArchMap maps EC2 architecture names to the format used in AMI name
83+
// patterns. If nil, EC2 arch names are used directly (x86_64, arm64).
84+
// Ubuntu uses "amd64" instead of "x86_64", so it needs: {"x86_64": "amd64"}.
85+
NameArchMap map[string]string
86+
8287
// Architectures lists the supported CPU architectures (x86_64, arm64).
8388
Architectures []string
8489
}

pkg/provisioner/templates/common.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,14 @@ case "${HOLODECK_OS_FAMILY}" in
108108
if ! command -v dnf &>/dev/null && command -v yum &>/dev/null; then
109109
export HOLODECK_PKG_MGR="yum"
110110
fi
111+
# Ensure /usr/local/bin is in sudo's secure_path (RHEL-family excludes it by default).
112+
# Without this, 'sudo kubeadm' and similar commands fail with "command not found"
113+
# because kubeadm/kubelet/kubectl are installed to /usr/local/bin.
114+
if ! sudo grep -qr '/usr/local/bin' /etc/sudoers /etc/sudoers.d 2>/dev/null; then
115+
echo 'Defaults secure_path = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin' | \
116+
sudo tee /etc/sudoers.d/holodeck-path > /dev/null
117+
sudo chmod 0440 /etc/sudoers.d/holodeck-path
118+
fi
111119
;;
112120
*)
113121
export HOLODECK_PKG_MGR="unknown"

pkg/provisioner/templates/containerd.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
160160
# Set systemd as the cgroup driver
161161
sudo sed -i 's/SystemdCgroup \= false/SystemdCgroup \= true/g' /etc/containerd/config.toml
162162
163+
# Use the sandbox image version that matches kubeadm expectations.
164+
# containerd 1.7.x defaults to pause:3.8, but Kubernetes 1.33+ expects pause:3.10.
165+
sudo sed -i 's|sandbox_image = .*|sandbox_image = "registry.k8s.io/pause:3.10"|g' /etc/containerd/config.toml
166+
163167
# Ensure CNI paths are configured correctly
164168
sudo sed -i 's|conf_dir = .*|conf_dir = "/etc/cni/net.d"|g' /etc/containerd/config.toml
165169
sudo sed -i 's|bin_dir = .*|bin_dir = "/opt/cni/bin"|g' /etc/containerd/config.toml

pkg/provisioner/templates/crio.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,11 @@ case "${HOLODECK_OS_FAMILY}" in
9595
9696
amazon|rhel)
9797
if [[ ! -f /etc/yum.repos.d/cri-o.repo ]]; then
98+
# Repo file name format: isv:cri-o:stable:vX.Y.repo
99+
# See: https://download.opensuse.org/repositories/isv:/cri-o:/stable:/vX.Y/rpm/
100+
CRIO_REPO_FILE="isv:cri-o:stable:${CRIO_VERSION}.repo"
98101
holodeck_retry 3 "$COMPONENT" sudo curl -fsSL -o /etc/yum.repos.d/cri-o.repo \
99-
"${CRIO_REPO_URL}/rpm/cri-o.repo"
102+
"${CRIO_REPO_URL}/rpm/${CRIO_REPO_FILE}"
100103
else
101104
holodeck_log "INFO" "$COMPONENT" "CRI-O repository already configured"
102105
fi
@@ -112,7 +115,17 @@ esac
112115
holodeck_progress "$COMPONENT" 3 4 "Installing CRI-O"
113116
114117
holodeck_retry 3 "$COMPONENT" pkg_update
115-
holodeck_retry 3 "$COMPONENT" pkg_install cri-o
118+
119+
# The opensuse CRI-O package does not pull OCI runtime dependencies on RHEL-family.
120+
# Install crun (OCI runtime) and containers-common (registry/storage config) explicitly.
121+
case "${HOLODECK_OS_FAMILY}" in
122+
amazon|rhel)
123+
holodeck_retry 3 "$COMPONENT" pkg_install cri-o crun containers-common
124+
;;
125+
*)
126+
holodeck_retry 3 "$COMPONENT" pkg_install cri-o
127+
;;
128+
esac
116129
117130
# Start and enable Service
118131
sudo systemctl daemon-reload

pkg/provisioner/templates/kubernetes.go

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,49 @@ else
201201
holodeck_log "INFO" "$COMPONENT" "Tigera operator already installed"
202202
fi
203203
204-
# Wait for Tigera operator
205-
holodeck_log "INFO" "$COMPONENT" "Waiting for Tigera operator"
206-
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
207-
--for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
204+
# Patch Tigera operator to use host networking and reach the API server directly.
205+
# Without CNI, pods cannot reach the Kubernetes API server via cluster IP
206+
# (10.96.0.1:443) because kube-proxy iptables rules may not be functional yet.
207+
# The operator IS the CNI installer, so it must bypass cluster networking entirely.
208+
# - hostNetwork: true — use the node's network stack
209+
# - KUBERNETES_SERVICE_HOST=<node-ip> — reach API server via the node's IP
210+
# (must match a SAN in the kubeadm TLS cert; localhost is NOT in SANs)
211+
# - KUBERNETES_SERVICE_PORT=6443 — use the real API server port, not the service port
212+
# This is harmless for runtimes like Docker where cri-dockerd bridges service IPs.
213+
NODE_IP=$(hostname -I | awk '{print $1}')
214+
holodeck_log "INFO" "$COMPONENT" "Patching Tigera operator for host networking (API: ${NODE_IP}:6443)"
215+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" patch deployment \
216+
tigera-operator -n tigera-operator --type=strategic -p "{
217+
\"spec\": {\"template\": {\"spec\": {
218+
\"hostNetwork\": true,
219+
\"dnsPolicy\": \"ClusterFirstWithHostNet\"
220+
}}}
221+
}"
222+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" set env \
223+
deployment/tigera-operator -n tigera-operator \
224+
KUBERNETES_SERVICE_HOST="${NODE_IP}" KUBERNETES_SERVICE_PORT="6443"
225+
226+
# Wait for the patched rollout to complete
227+
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" rollout status \
228+
deployment/tigera-operator -n tigera-operator --timeout=300s
229+
230+
# Wait for Tigera operator CRDs to be established before applying custom resources.
231+
# The operator deployment becomes "available" before it has registered all its CRDs
232+
# (Installation, APIServer, etc.), causing "no matches for kind" errors.
233+
holodeck_log "INFO" "$COMPONENT" "Waiting for Tigera operator CRDs"
234+
if ! holodeck_retry 30 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
235+
--for=condition=established --timeout=10s crd/installations.operator.tigera.io; then
236+
# Diagnostic dump on failure
237+
holodeck_log "ERROR" "$COMPONENT" "CRD wait failed - collecting diagnostics"
238+
kubectl --kubeconfig "$KUBECONFIG" get pods -n tigera-operator -o wide 2>&1 || true
239+
kubectl --kubeconfig "$KUBECONFIG" describe pod -n tigera-operator 2>&1 | tail -40 || true
240+
kubectl --kubeconfig "$KUBECONFIG" logs -n tigera-operator -l name=tigera-operator --tail=30 2>&1 || true
241+
kubectl --kubeconfig "$KUBECONFIG" get events -n tigera-operator --sort-by='.lastTimestamp' 2>&1 | tail -20 || true
242+
kubectl --kubeconfig "$KUBECONFIG" get crd 2>&1 | grep -i tigera || true
243+
holodeck_error 6 "$COMPONENT" \
244+
"Tigera operator CRDs not registered after retries" \
245+
"The operator pod may be crashing. Check diagnostics above."
246+
fi
208247
209248
# Install Calico custom resources (idempotent)
210249
if ! kubectl --kubeconfig "$KUBECONFIG" get installations.operator.tigera.io default \
@@ -1117,9 +1156,28 @@ if ! kubectl --kubeconfig "$KUBECONFIG" get namespace tigera-operator &>/dev/nul
11171156
"https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml"
11181157
fi
11191158
1159+
# Patch Tigera operator for host networking (see comment in package template above)
1160+
NODE_IP=$(hostname -I | awk '{print $1}')
1161+
holodeck_log "INFO" "$COMPONENT" "Patching Tigera operator for host networking (API: ${NODE_IP}:6443)"
1162+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" patch deployment \
1163+
tigera-operator -n tigera-operator --type=strategic -p "{
1164+
\"spec\": {\"template\": {\"spec\": {
1165+
\"hostNetwork\": true,
1166+
\"dnsPolicy\": \"ClusterFirstWithHostNet\"
1167+
}}}
1168+
}"
1169+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" set env \
1170+
deployment/tigera-operator -n tigera-operator \
1171+
KUBERNETES_SERVICE_HOST="${NODE_IP}" KUBERNETES_SERVICE_PORT="6443"
1172+
11201173
holodeck_log "INFO" "$COMPONENT" "Waiting for Tigera operator..."
1121-
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
1122-
--for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
1174+
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" rollout status \
1175+
deployment/tigera-operator -n tigera-operator --timeout=300s
1176+
1177+
# Wait for Tigera operator CRDs before applying custom resources
1178+
holodeck_log "INFO" "$COMPONENT" "Waiting for Tigera operator CRDs..."
1179+
holodeck_retry 30 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
1180+
--for=condition=established --timeout=10s crd/installations.operator.tigera.io
11231181
11241182
if ! kubectl --kubeconfig "$KUBECONFIG" get installations.operator.tigera.io default \
11251183
-n tigera-operator &>/dev/null; then
@@ -1418,8 +1476,26 @@ if ! kubectl --kubeconfig "$KUBECONFIG" get namespace tigera-operator &>/dev/nul
14181476
"https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/tigera-operator.yaml"
14191477
fi
14201478
1421-
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
1422-
--for=condition=available --timeout=300s deployment/tigera-operator -n tigera-operator
1479+
# Patch Tigera operator for host networking (see comment in package template above)
1480+
NODE_IP=$(hostname -I | awk '{print $1}')
1481+
holodeck_log "INFO" "$COMPONENT" "Patching Tigera operator for host networking (API: ${NODE_IP}:6443)"
1482+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" patch deployment \
1483+
tigera-operator -n tigera-operator --type=strategic -p "{
1484+
\"spec\": {\"template\": {\"spec\": {
1485+
\"hostNetwork\": true,
1486+
\"dnsPolicy\": \"ClusterFirstWithHostNet\"
1487+
}}}
1488+
}"
1489+
holodeck_retry 3 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" set env \
1490+
deployment/tigera-operator -n tigera-operator \
1491+
KUBERNETES_SERVICE_HOST="${NODE_IP}" KUBERNETES_SERVICE_PORT="6443"
1492+
1493+
holodeck_retry 10 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" rollout status \
1494+
deployment/tigera-operator -n tigera-operator --timeout=300s
1495+
1496+
# Wait for Tigera operator CRDs before applying custom resources
1497+
holodeck_retry 30 "$COMPONENT" kubectl --kubeconfig "$KUBECONFIG" wait \
1498+
--for=condition=established --timeout=10s crd/installations.operator.tigera.io
14231499
14241500
if ! kubectl --kubeconfig "$KUBECONFIG" get installations.operator.tigera.io default \
14251501
-n tigera-operator &>/dev/null; then

tests/aws_cluster_test.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,11 @@ var _ = DescribeTable("AWS Cluster E2E",
119119
})
120120

121121
state.opts.cfg.Spec.PrivateKey = sshKey
122-
state.opts.cfg.Spec.Username = "ubuntu"
122+
// Default to ubuntu for tests without OS specification;
123+
// when OS is set, the provider resolves the SSH username automatically
124+
if state.opts.cfg.Spec.Cluster == nil || state.opts.cfg.Spec.Cluster.ControlPlane.OS == "" {
125+
state.opts.cfg.Spec.Username = "ubuntu"
126+
}
123127
Expect(state.provider.Create()).To(Succeed(), "Failed to create cluster infrastructure")
124128

125129
By("Verifying cluster status in cache")
@@ -222,4 +226,14 @@ var _ = DescribeTable("AWS Cluster E2E",
222226
filePath: filepath.Join(packagePath, "data", "test_cluster_minimal.yaml"),
223227
description: "Tests smallest valid multinode cluster configuration",
224228
}, Label("cluster", "multinode", "minimal")),
229+
Entry("RPM Rocky 9 Cluster", clusterTestConfig{
230+
name: "rpm-rocky9-cluster",
231+
filePath: filepath.Join(packagePath, "data", "test_cluster_rpm_rocky9.yaml"),
232+
description: "Tests Rocky 9 RPM cluster with 1 CP + 1 GPU worker",
233+
}, Label("cluster", "multinode", "rpm", "post-merge")),
234+
Entry("RPM Amazon Linux 2023 Cluster", clusterTestConfig{
235+
name: "rpm-al2023-cluster",
236+
filePath: filepath.Join(packagePath, "data", "test_cluster_rpm_al2023.yaml"),
237+
description: "Tests Amazon Linux 2023 cluster with 1 CP + 1 GPU worker",
238+
}, Label("cluster", "multinode", "rpm", "post-merge")),
225239
)

tests/aws_test.go

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,12 @@ var _ = DescribeTable("AWS Environment E2E",
111111
})
112112

113113
state.opts.cfg.Spec.PrivateKey = sshKey
114-
//nolint:staticcheck // Auth is embedded but explicit access is clearer
115-
state.opts.cfg.Spec.Auth.Username = "ubuntu"
114+
// Default to ubuntu for tests without OS specification;
115+
// when OS is set, the provider resolves the SSH username automatically
116+
if state.opts.cfg.Spec.OS == "" {
117+
//nolint:staticcheck // Auth is embedded but explicit access is clearer
118+
state.opts.cfg.Spec.Auth.Username = "ubuntu"
119+
}
116120
Expect(state.provider.Create()).To(Succeed(), "Failed to create environment")
117121
Expect(state.opts.cfg.Name).NotTo(BeEmpty(), "Environment name should not be empty")
118122

@@ -127,7 +131,13 @@ var _ = DescribeTable("AWS Environment E2E",
127131
}
128132
}
129133
Expect(hostUrl).NotTo(BeEmpty(), "Host URL should not be empty")
130-
p, err := provisioner.New(state.log, state.opts.cfg.Spec.PrivateKey, state.opts.cfg.Spec.Username, hostUrl)
134+
// Use username from cache file (resolved by provider during Create)
135+
// rather than the original config which may be empty for OS-based provisioning
136+
sshUsername := env.Spec.Username
137+
if sshUsername == "" {
138+
sshUsername = state.opts.cfg.Spec.Username
139+
}
140+
p, err := provisioner.New(state.log, state.opts.cfg.Spec.PrivateKey, sshUsername, hostUrl)
131141
Expect(err).NotTo(HaveOccurred(), "Failed to create provisioner")
132142
defer func() {
133143
if p.Client != nil {
@@ -141,8 +151,8 @@ var _ = DescribeTable("AWS Environment E2E",
141151
p.Client = nil
142152
}
143153
}()
144-
_, runErr := p.Run(env)
145-
Expect(runErr).NotTo(HaveOccurred(), "Failed to provision environment")
154+
_, provisionErr := p.Run(env)
155+
Expect(provisionErr).NotTo(HaveOccurred(), "Failed to provision environment")
146156

147157
By("Kubernetes Configuration")
148158
k8s := state.opts.cfg.Spec.Kubernetes
@@ -210,11 +220,36 @@ var _ = DescribeTable("AWS Environment E2E",
210220
filePath: filepath.Join(packagePath, "data", "test_aws_k8s_latest.yml"),
211221
description: "Tests AWS environment with Kubernetes tracking master branch",
212222
}, Label("k8s-latest")),
213-
Entry("ARM64 GPU Test", testConfig{
214-
name: "ARM64 GPU Test",
215-
filePath: filepath.Join(packagePath, "data", "test_aws_arm64.yml"),
216-
description: "Tests full GPU stack on ARM64 (g5g Graviton) with architecture inferred from instance type",
217-
}, Label("arm64")),
223+
Entry("RPM Default Test (Rocky 9)", testConfig{
224+
name: "RPM Default Test",
225+
filePath: filepath.Join(packagePath, "data", "test_aws_rpm.yml"),
226+
description: "Tests RPM-based distro (Rocky 9) with Docker, full GPU stack, and Kubernetes",
227+
}, Label("default", "rpm")),
228+
Entry("RPM Rocky 9 Containerd", testConfig{
229+
name: "RPM Rocky 9 Containerd",
230+
filePath: filepath.Join(packagePath, "data", "test_rpm_rocky9_containerd.yml"),
231+
description: "Tests Rocky 9 with containerd runtime",
232+
}, Label("rpm", "post-merge")),
233+
Entry("RPM Rocky 9 CRI-O", testConfig{
234+
name: "RPM Rocky 9 CRI-O",
235+
filePath: filepath.Join(packagePath, "data", "test_rpm_rocky9_crio.yml"),
236+
description: "Tests Rocky 9 with CRI-O runtime",
237+
}, Label("rpm", "post-merge")),
238+
Entry("RPM Amazon Linux 2023 Docker", testConfig{
239+
name: "RPM AL2023 Docker",
240+
filePath: filepath.Join(packagePath, "data", "test_rpm_al2023_docker.yml"),
241+
description: "Tests Amazon Linux 2023 with Docker runtime",
242+
}, Label("rpm", "post-merge")),
243+
Entry("RPM Amazon Linux 2023 Containerd", testConfig{
244+
name: "RPM AL2023 Containerd",
245+
filePath: filepath.Join(packagePath, "data", "test_rpm_al2023_containerd.yml"),
246+
description: "Tests Amazon Linux 2023 with containerd runtime",
247+
}, Label("rpm", "post-merge")),
248+
Entry("RPM Amazon Linux 2023 CRI-O", testConfig{
249+
name: "RPM AL2023 CRI-O",
250+
filePath: filepath.Join(packagePath, "data", "test_rpm_al2023_crio.yml"),
251+
description: "Tests Amazon Linux 2023 with CRI-O runtime",
252+
}, Label("rpm", "post-merge")),
218253
)
219254

220255
// Note: To run tests in parallel, use: ginkgo -p or --procs=N

tests/data/test_aws_rpm.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: holodeck.nvidia.com/v1alpha1
2+
kind: Environment
3+
metadata:
4+
name: holodeck-rpm-e2e
5+
description: "RPM E2E: Rocky 9 + Docker + full GPU stack"
6+
spec:
7+
provider: aws
8+
auth:
9+
keyName: cnt-ci
10+
privateKey: /home/runner/.cache/key
11+
instance:
12+
type: g4dn.xlarge
13+
region: us-west-1
14+
os: rocky-9
15+
image:
16+
architecture: amd64
17+
containerRuntime:
18+
install: true
19+
name: docker
20+
nvidiaContainerToolkit:
21+
install: true
22+
nvidiaDriver:
23+
install: true
24+
kubernetes:
25+
install: true
26+
installer: kubeadm

0 commit comments

Comments
 (0)