Skip to content

Commit ef081f9

Browse files
Merge remote-tracking branch 'upstream/main'
2 parents 28a02a0 + 3658620 commit ef081f9

File tree

11 files changed

+135
-21
lines changed

11 files changed

+135
-21
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ VERSION ?= v0.0.0-dev
1212
BUNDLE_VERSION ?= $(VERSION:v%=%)
1313

1414
# APPWRAPPER_VERSION defines the default version of the AppWrapper controller
15-
APPWRAPPER_VERSION ?= v0.21.1
15+
APPWRAPPER_VERSION ?= v0.22.0
1616
APPWRAPPER_REPO ?= github.com/project-codeflare/appwrapper
1717
APPWRAPPER_CRD ?= ${APPWRAPPER_REPO}/config/crd?ref=${APPWRAPPER_VERSION}
1818

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ CodeFlare Stack Compatibility Matrix
88

99
| Component | Version |
1010
|------------------------------|---------------------------------------------------------------------------------------------------|
11-
| CodeFlare Operator | [v1.5.0](https://github.com/project-codeflare/codeflare-operator/releases/tag/v1.5.0) |
12-
| CodeFlare-SDK | [v0.17.0](https://github.com/project-codeflare/codeflare-sdk/releases/tag/v0.17.0) |
13-
| AppWrapper | [v0.20.2](https://github.com/project-codeflare/appwrapper/releases/tag/v0.20.2) |
11+
| CodeFlare Operator | [v1.6.0](https://github.com/project-codeflare/codeflare-operator/releases/tag/v1.6.0) |
12+
| CodeFlare-SDK | [v0.18.0](https://github.com/project-codeflare/codeflare-sdk/releases/tag/v0.18.0) |
13+
| AppWrapper | [v0.22.0](https://github.com/project-codeflare/appwrapper/releases/tag/v0.22.0) |
1414
| KubeRay | [v1.1.0](https://github.com/opendatahub-io/kuberay/releases/tag/v1.1.0) |
1515
| Kueue | [v0.7.0](https://github.com/opendatahub-io/kueue/releases/tag/v0.7.0) |
1616
<!-- Compatibility Matrix end -->
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
apiVersion: kustomize.config.k8s.io/v1beta1
22
kind: Kustomization
33
resources:
4-
- github.com/project-codeflare/appwrapper/config/crd?ref=v0.21.1
4+
- github.com/project-codeflare/appwrapper/config/crd?ref=v0.22.0

config/manager/params.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
codeflare-operator-controller-image=quay.io/opendatahub/codeflare-operator:v1.5.0
1+
codeflare-operator-controller-image=quay.io/opendatahub/codeflare-operator:v1.6.0
22
namespace=opendatahub

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ rules:
1414
- patch
1515
- update
1616
- watch
17+
- apiGroups:
18+
- ""
19+
resources:
20+
- nodes
21+
verbs:
22+
- get
23+
- list
24+
- watch
1725
- apiGroups:
1826
- ""
1927
resources:

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ require (
99
github.com/opendatahub-io/opendatahub-operator/v2 v2.10.0
1010
github.com/openshift/api v0.0.0-20230823114715-5fdd7511b790
1111
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c
12-
github.com/project-codeflare/appwrapper v0.21.1
12+
github.com/project-codeflare/appwrapper v0.22.0
1313
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e
1414
github.com/ray-project/kuberay/ray-operator v1.1.1
1515
go.uber.org/zap v1.27.0
@@ -35,7 +35,7 @@ replace go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp => go.open
3535
replace github.com/jackc/pgx/v4 => github.com/jackc/pgx/v5 v5.5.4
3636

3737
// These replace directives support the backlevel go version required by ODH build
38-
replace github.com/project-codeflare/appwrapper v0.21.1 => github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37
38+
replace github.com/project-codeflare/appwrapper v0.22.0 => github.com/project-codeflare/appwrapper v0.22.1-0.20240719212005-aab106b2126e
3939

4040
replace sigs.k8s.io/kueue v0.7.1 => github.com/opendatahub-io/kueue v0.7.0-odh-test
4141

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
246246
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
247247
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
248248
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
249-
github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37 h1:x4qdbN98B9gtaU7pseJWABZzwoDawXLC5QMlx0idXxc=
250-
github.com/project-codeflare/appwrapper v0.21.2-0.20240712173553-5b007c947b37/go.mod h1:gKjO+iRtMIdBvIBYmN+VciL9kzWmkfwgk/+24wCLhSM=
249+
github.com/project-codeflare/appwrapper v0.22.1-0.20240719212005-aab106b2126e h1:cIsCTtAZaT2fsQG/QGUm4/wvJnobYawCPZwTwVE2DGo=
250+
github.com/project-codeflare/appwrapper v0.22.1-0.20240719212005-aab106b2126e/go.mod h1:gKjO+iRtMIdBvIBYmN+VciL9kzWmkfwgk/+24wCLhSM=
251251
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e h1:juFd1dQyioeMxbVE6F0YD25ozm/jiqJE+MpDhu8p22k=
252252
github.com/project-codeflare/codeflare-common v0.0.0-20240628111341-56c962a09b7e/go.mod h1:unKTw+XoMANTES3WieG016im7rxZ7IR2/ph++L5Vp1Y=
253253
github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=

pkg/controllers/appwrapper_controller.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,6 @@ package controllers
4141
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/finalizers,verbs=update
4242
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch
4343
// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloadpriorityclasses,verbs=get;list;watch
44+
45+
// permission to watch nodes for Autopilot integration
46+
//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch

pkg/controllers/raycluster_controller.go

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,10 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
213213
return ctrl.Result{RequeueAfter: requeueTime}, err
214214
}
215215

216+
if err := r.deleteHeadPodIfMissingImagePullSecrets(ctx, cluster); err != nil {
217+
return ctrl.Result{RequeueAfter: requeueTime}, err
218+
}
219+
216220
_, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true})
217221
if err != nil {
218222
logger.Error(err, "Failed to update OAuth ClusterRoleBinding")
@@ -470,6 +474,7 @@ func generateCACertificate() ([]byte, []byte, error) {
470474

471475
return privateKeyPem, certPem, nil
472476
}
477+
473478
func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration {
474479
return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace).
475480
WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}).
@@ -486,6 +491,7 @@ func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.Netw
486491
metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true),
487492
)
488493
}
494+
489495
func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConfiguration, kubeRayNamespaces []string) *networkingv1ac.NetworkPolicyApplyConfiguration {
490496
allSecuredPorts := []*networkingv1ac.NetworkPolicyPortApplyConfiguration{
491497
networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)),
@@ -544,6 +550,49 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf
544550
)
545551
}
546552

553+
func (r *RayClusterReconciler) deleteHeadPodIfMissingImagePullSecrets(ctx context.Context, cluster *rayv1.RayCluster) error {
554+
serviceAccount, err := r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Get(ctx, oauthServiceAccountNameFromCluster(cluster), metav1.GetOptions{})
555+
if err != nil {
556+
return fmt.Errorf("failed to get OAuth ServiceAccount: %w", err)
557+
}
558+
559+
headPod, err := getHeadPod(ctx, r, cluster)
560+
if err != nil {
561+
return fmt.Errorf("failed to get head pod: %w", err)
562+
}
563+
564+
if headPod == nil {
565+
return nil
566+
}
567+
568+
missingSecrets := map[string]bool{}
569+
for _, secret := range serviceAccount.ImagePullSecrets {
570+
missingSecrets[secret.Name] = true
571+
}
572+
for _, secret := range headPod.Spec.ImagePullSecrets {
573+
delete(missingSecrets, secret.Name)
574+
}
575+
if len(missingSecrets) > 0 {
576+
if err := r.kubeClient.CoreV1().Pods(headPod.Namespace).Delete(ctx, headPod.Name, metav1.DeleteOptions{}); err != nil {
577+
return fmt.Errorf("failed to delete head pod: %w", err)
578+
}
579+
}
580+
return nil
581+
}
582+
583+
func getHeadPod(ctx context.Context, r *RayClusterReconciler, cluster *rayv1.RayCluster) (*corev1.Pod, error) {
584+
podList, err := r.kubeClient.CoreV1().Pods(cluster.Namespace).List(ctx, metav1.ListOptions{
585+
LabelSelector: fmt.Sprintf("ray.io/node-type=head,ray.io/cluster=%s", cluster.Name),
586+
})
587+
if err != nil {
588+
return nil, err
589+
}
590+
if len(podList.Items) > 0 {
591+
return &podList.Items[0], nil
592+
}
593+
return nil, nil
594+
}
595+
547596
// SetupWithManager sets up the controller with the Manager.
548597
func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
549598
r.kubeClient = kubernetes.NewForConfigOrDie(mgr.GetConfig())
@@ -577,7 +626,8 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
577626
NamespacedName: client.ObjectKey{
578627
Name: name,
579628
Namespace: namespace,
580-
}}}
629+
},
630+
}}
581631
}),
582632
)
583633
if r.IsOpenShift {

pkg/controllers/raycluster_controller_test.go

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import (
3333

3434
var _ = Describe("RayCluster controller", func() {
3535
Context("RayCluster controller test", func() {
36-
var rayClusterName = "test-raycluster"
36+
rayClusterName := "test-raycluster"
3737
var namespaceName string
3838
BeforeEach(func(ctx SpecContext) {
3939
By("Creating a namespace for running the tests.")
@@ -145,6 +145,53 @@ var _ = Describe("RayCluster controller", func() {
145145
}).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceName, Equal(foundRayCluster.Name)))
146146
})
147147

148+
It("should delete the head pod if missing image pull secrets", func(ctx SpecContext) {
149+
foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{})
150+
Expect(err).To(Not(HaveOccurred()))
151+
152+
Eventually(func() (*corev1.ServiceAccount, error) {
153+
return k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{})
154+
}).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceKind, Equal("RayCluster")))
155+
156+
headPodName := "head-pod"
157+
headPod := &corev1.Pod{
158+
ObjectMeta: metav1.ObjectMeta{
159+
Name: headPodName,
160+
Namespace: namespaceName,
161+
Labels: map[string]string{
162+
"ray.io/node-type": "head",
163+
"ray.io/cluster": foundRayCluster.Name,
164+
},
165+
},
166+
Spec: corev1.PodSpec{
167+
Containers: []corev1.Container{
168+
{
169+
Name: "head-container",
170+
Image: "busybox",
171+
},
172+
},
173+
},
174+
}
175+
_, err = k8sClient.CoreV1().Pods(namespaceName).Create(ctx, headPod, metav1.CreateOptions{})
176+
Expect(err).To(Not(HaveOccurred()))
177+
178+
Eventually(func() (*corev1.Pod, error) {
179+
return k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{})
180+
}).WithTimeout(time.Second * 10).ShouldNot(BeNil())
181+
182+
sa, err := k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{})
183+
Expect(err).To(Not(HaveOccurred()))
184+
185+
sa.ImagePullSecrets = append(sa.ImagePullSecrets, corev1.LocalObjectReference{Name: "test-image-pull-secret"})
186+
_, err = k8sClient.CoreV1().ServiceAccounts(namespaceName).Update(ctx, sa, metav1.UpdateOptions{})
187+
Expect(err).To(Not(HaveOccurred()))
188+
189+
Eventually(func() error {
190+
_, err := k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{})
191+
return err
192+
}).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound))
193+
})
194+
148195
It("should remove CRB when the RayCluster is deleted", func(ctx SpecContext) {
149196
foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{})
150197
Expect(err).To(Not(HaveOccurred()))
@@ -157,7 +204,6 @@ var _ = Describe("RayCluster controller", func() {
157204
return err
158205
}).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound))
159206
})
160-
161207
})
162208
})
163209

0 commit comments

Comments
 (0)