Skip to content

Commit 4364afd

Browse files
committed
[SPARK-49389] Support master|worker container templates
### What changes were proposed in this pull request? This PR aims to support `master` container and `worker` container templates. ### Why are the changes needed? Previously, pod template throws exceptions on containers whose name is `master` or `worker` because two `master|worker` containers were created. After this PR, the problem is resolved. - apache#80 ### Does this PR introduce _any_ user-facing change? No, this is a new unreleased feature. ### How was this patch tested? For now, we need to run manual tests. ``` $ ./gradlew build buildDockerImage spark-operator-api:relocateGeneratedCRD -x check $ helm install spark-kubernetes-operator -f build-tools/helm/spark-kubernetes-operator/values.yaml build-tools/helm/spark-kubernetes-operator/ $ kubectl apply -f examples/cluster-with-template.yaml $ kubectl get pod -l spark-role=master -oyaml | yq '.items[0].spec.containers[0].resources' limits: cpu: "2" memory: 2Gi requests: cpu: "2" memory: 2Gi ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#104 from dongjoon-hyun/SPARK-49389. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 6629764 commit 4364afd

File tree

2 files changed

+70
-46
lines changed

2 files changed

+70
-46
lines changed

examples/cluster-with-template.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ spec:
3535
securityContext:
3636
runAsUser: 0
3737
containers:
38+
- name: master
39+
resources:
40+
requests:
41+
cpu: "2"
42+
memory: "2Gi"
43+
limits:
44+
cpu: "2"
45+
memory: "2Gi"
3846
- name: sidecar
3947
image: registry.k8s.io/pause
4048
resources:
@@ -58,6 +66,14 @@ spec:
5866
securityContext:
5967
runAsUser: 0
6068
containers:
69+
- name: worker
70+
resources:
71+
requests:
72+
cpu: "2"
73+
memory: "2Gi"
74+
limits:
75+
cpu: "2"
76+
memory: "2Gi"
6177
- name: sidecar
6278
image: registry.k8s.io/pause
6379
resources:

spark-submission-worker/src/main/java/org/apache/spark/k8s/operator/SparkClusterResourceSpec.java

Lines changed: 54 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -153,27 +153,31 @@ private static StatefulSet buildMasterStatefulSet(
153153
String options,
154154
ObjectMeta objectMeta,
155155
StatefulSetSpec statefulSetSpec) {
156-
return new StatefulSetBuilder()
157-
.withNewMetadataLike(objectMeta)
158-
.withName(name + "-master")
159-
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
160-
.withNamespace(namespace)
161-
.endMetadata()
162-
.withNewSpecLike(statefulSetSpec)
163-
.withPodManagementPolicy("Parallel")
164-
.withReplicas(1)
165-
.editOrNewSelector()
166-
.addToMatchLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
167-
.endSelector()
168-
.editOrNewTemplate()
169-
.editOrNewMetadata()
170-
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
171-
.endMetadata()
172-
.editOrNewSpec()
173-
.withSchedulerName(scheduler)
174-
.withTerminationGracePeriodSeconds(0L)
175-
.addNewContainer()
176-
.withName("master")
156+
var partialStatefulSet =
157+
new StatefulSetBuilder()
158+
.withNewMetadataLike(objectMeta)
159+
.withName(name + "-master")
160+
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
161+
.withNamespace(namespace)
162+
.endMetadata()
163+
.withNewSpecLike(statefulSetSpec)
164+
.withPodManagementPolicy("Parallel")
165+
.withReplicas(1)
166+
.editOrNewSelector()
167+
.addToMatchLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
168+
.endSelector()
169+
.editOrNewTemplate()
170+
.editOrNewMetadata()
171+
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_MASTER_VALUE)
172+
.endMetadata()
173+
.editOrNewSpec()
174+
.withSchedulerName(scheduler)
175+
.withTerminationGracePeriodSeconds(0L);
176+
if (!partialStatefulSet.hasMatchingContainer(p -> "master".equals(p.getName()))) {
177+
partialStatefulSet = partialStatefulSet.addNewContainer().withName("master").endContainer();
178+
}
179+
return partialStatefulSet
180+
.editMatchingContainer(p -> "master".equals(p.getName()))
177181
.withImage(image)
178182
.addNewEnv()
179183
.withName("SPARK_MASTER_OPTS")
@@ -212,31 +216,35 @@ private static StatefulSet buildWorkerStatefulSet(
212216
String options,
213217
ObjectMeta metadata,
214218
StatefulSetSpec statefulSetSpec) {
215-
return new StatefulSetBuilder()
216-
.withNewMetadataLike(metadata)
217-
.withName(name + "-worker")
218-
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
219-
.withNamespace(namespace)
220-
.endMetadata()
221-
.withNewSpecLike(statefulSetSpec)
222-
.withPodManagementPolicy("Parallel")
223-
.withReplicas(initWorkers)
224-
.withServiceName(name + "-worker-svc")
225-
.editOrNewSelector()
226-
.addToMatchLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
227-
.endSelector()
228-
.editOrNewTemplate()
229-
.editOrNewMetadata()
230-
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
231-
.endMetadata()
232-
.editOrNewSpec()
233-
.withSchedulerName(scheduler)
234-
.withTerminationGracePeriodSeconds(0L)
235-
.withNewDnsConfig()
236-
.withSearches(String.format("%s-worker-svc.%s.svc.cluster.local", name, namespace))
237-
.endDnsConfig()
238-
.addNewContainer()
239-
.withName("worker")
219+
var partialStatefulSet =
220+
new StatefulSetBuilder()
221+
.withNewMetadataLike(metadata)
222+
.withName(name + "-worker")
223+
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
224+
.withNamespace(namespace)
225+
.endMetadata()
226+
.withNewSpecLike(statefulSetSpec)
227+
.withPodManagementPolicy("Parallel")
228+
.withReplicas(initWorkers)
229+
.withServiceName(name + "-worker-svc")
230+
.editOrNewSelector()
231+
.addToMatchLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
232+
.endSelector()
233+
.editOrNewTemplate()
234+
.editOrNewMetadata()
235+
.addToLabels(LABEL_SPARK_ROLE_NAME, LABEL_SPARK_ROLE_WORKER_VALUE)
236+
.endMetadata()
237+
.editOrNewSpec()
238+
.withSchedulerName(scheduler)
239+
.withTerminationGracePeriodSeconds(0L)
240+
.withNewDnsConfig()
241+
.withSearches(String.format("%s-worker-svc.%s.svc.cluster.local", name, namespace))
242+
.endDnsConfig();
243+
if (!partialStatefulSet.hasMatchingContainer(p -> "worker".equals(p.getName()))) {
244+
partialStatefulSet = partialStatefulSet.addNewContainer().withName("worker").endContainer();
245+
}
246+
return partialStatefulSet
247+
.editMatchingContainer(p -> "worker".equals(p.getName()))
240248
.withImage(image)
241249
.addNewEnv()
242250
.withName("SPARK_LOG_DIR")

0 commit comments

Comments
 (0)