Skip to content

Commit 60e1bd6

Browse files
dvogelbacherRobert Kruszewski
authored andcommitted
[SPARK-23825][K8S] Requesting memory + memory overhead for pod memory
## What changes were proposed in this pull request? Kubernetes driver and executor pods should request `memory + memoryOverhead` as their resources instead of just `memory`, see https://issues.apache.org/jira/browse/SPARK-23825 ## How was this patch tested? Existing unit tests were adapted. Author: David Vogelbacher <[email protected]> Closes apache#20943 from dvogelbacher/spark-23825.
1 parent 7b12f63 commit 60e1bd6

File tree

4 files changed

+7
-11
lines changed

4 files changed

+7
-11
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,6 @@ private[spark] class BasicDriverConfigurationStep(
9393
.withAmount(driverCpuCores)
9494
.build()
9595
val driverMemoryQuantity = new QuantityBuilder(false)
96-
.withAmount(s"${driverMemoryMiB}Mi")
97-
.build()
98-
val driverMemoryLimitQuantity = new QuantityBuilder(false)
9996
.withAmount(s"${driverMemoryWithOverheadMiB}Mi")
10097
.build()
10198
val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
@@ -117,7 +114,7 @@ private[spark] class BasicDriverConfigurationStep(
117114
.withNewResources()
118115
.addToRequests("cpu", driverCpuQuantity)
119116
.addToRequests("memory", driverMemoryQuantity)
120-
.addToLimits("memory", driverMemoryLimitQuantity)
117+
.addToLimits("memory", driverMemoryQuantity)
121118
.addToLimits(maybeCpuLimitQuantity.toMap.asJava)
122119
.endResources()
123120
.addToArgs("driver")

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ private[spark] class ExecutorPodFactory(
110110
SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
111111
executorLabels
112112
val executorMemoryQuantity = new QuantityBuilder(false)
113-
.withAmount(s"${executorMemoryMiB}Mi")
114-
.build()
115-
val executorMemoryLimitQuantity = new QuantityBuilder(false)
116113
.withAmount(s"${executorMemoryWithOverhead}Mi")
117114
.build()
118115
val executorCpuQuantity = new QuantityBuilder(false)
@@ -169,7 +166,7 @@ private[spark] class ExecutorPodFactory(
169166
.withImagePullPolicy(imagePullPolicy)
170167
.withNewResources()
171168
.addToRequests("memory", executorMemoryQuantity)
172-
.addToLimits("memory", executorMemoryLimitQuantity)
169+
.addToLimits("memory", executorMemoryQuantity)
173170
.addToRequests("cpu", executorCpuQuantity)
174171
.endResources()
175172
.addAllToEnv(executorEnv.asJava)

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class BasicDriverConfigurationStepSuite extends SparkFunSuite {
9191
val resourceRequirements = preparedDriverSpec.driverContainer.getResources
9292
val requests = resourceRequirements.getRequests.asScala
9393
assert(requests("cpu").getAmount === "2")
94-
assert(requests("memory").getAmount === "256Mi")
94+
assert(requests("memory").getAmount === "456Mi")
9595
val limits = resourceRequirements.getLimits.asScala
9696
assert(limits("memory").getAmount === "456Mi")
9797
assert(limits("cpu").getAmount === "4")

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,14 @@ class ExecutorPodFactorySuite extends SparkFunSuite with BeforeAndAfter with Bef
6767
assert(executor.getMetadata.getLabels.size() === 3)
6868
assert(executor.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL) === "1")
6969

70-
// There is exactly 1 container with no volume mounts and default memory limits.
71-
// Default memory limit is 1024M + 384M (minimum overhead constant).
70+
// There is exactly 1 container with no volume mounts and default memory limits and requests.
71+
// Default memory limit/request is 1024M + 384M (minimum overhead constant).
7272
assert(executor.getSpec.getContainers.size() === 1)
7373
assert(executor.getSpec.getContainers.get(0).getImage === executorImage)
7474
assert(executor.getSpec.getContainers.get(0).getVolumeMounts.isEmpty)
7575
assert(executor.getSpec.getContainers.get(0).getResources.getLimits.size() === 1)
76+
assert(executor.getSpec.getContainers.get(0).getResources
77+
.getRequests.get("memory").getAmount === "1408Mi")
7678
assert(executor.getSpec.getContainers.get(0).getResources
7779
.getLimits.get("memory").getAmount === "1408Mi")
7880

0 commit comments

Comments
 (0)