[SPARK-23825][K8S] Requesting memory + memory overhead for pod memory

dvogelbacher · Robert Kruszewski · commit 60e1bd62d72c · 2018-04-04T09:13:37.000+01:00
## What changes were proposed in this pull request? Kubernetes driver and executor pods should request `memory + memoryOverhead` as their resources instead of just `memory`, see https://issues.apache.org/jira/browse/SPARK-23825 ## How was this patch tested? Existing unit tests were adapted. Author: David Vogelbacher <dvogelbacher@palantir.com> Closes apache#20943 from dvogelbacher/spark-23825.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStep.scala
@@ -93,9 +93,6 @@ private[spark] class BasicDriverConfigurationStep(
       .withAmount(driverCpuCores)
       .build()
     val driverMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${driverMemoryMiB}Mi")
-      .build()
-    val driverMemoryLimitQuantity = new QuantityBuilder(false)
       .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
       .build()
     val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
@@ -117,7 +114,7 @@ private[spark] class BasicDriverConfigurationStep(
       .withNewResources()
         .addToRequests("cpu", driverCpuQuantity)
         .addToRequests("memory", driverMemoryQuantity)
-        .addToLimits("memory", driverMemoryLimitQuantity)
+        .addToLimits("memory", driverMemoryQuantity)
         .addToLimits(maybeCpuLimitQuantity.toMap.asJava)
         .endResources()
       .addToArgs("driver")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactory.scala
@@ -110,9 +110,6 @@ private[spark] class ExecutorPodFactory(
       SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++
       executorLabels
     val executorMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${executorMemoryMiB}Mi")
-      .build()
-    val executorMemoryLimitQuantity = new QuantityBuilder(false)
       .withAmount(s"${executorMemoryWithOverhead}Mi")
       .build()
     val executorCpuQuantity = new QuantityBuilder(false)
@@ -169,7 +166,7 @@ private[spark] class ExecutorPodFactory(
       .withImagePullPolicy(imagePullPolicy)
       .withNewResources()
         .addToRequests("memory", executorMemoryQuantity)
-        .addToLimits("memory", executorMemoryLimitQuantity)
+        .addToLimits("memory", executorMemoryQuantity)
         .addToRequests("cpu", executorCpuQuantity)
         .endResources()
       .addAllToEnv(executorEnv.asJava)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/steps/BasicDriverConfigurationStepSuite.scala
@@ -91,7 +91,7 @@ class BasicDriverConfigurationStepSuite extends SparkFunSuite {
     val resourceRequirements = preparedDriverSpec.driverContainer.getResources
     val requests = resourceRequirements.getRequests.asScala
     assert(requests("cpu").getAmount === "2")
-    assert(requests("memory").getAmount === "256Mi")
+    assert(requests("memory").getAmount === "456Mi")
     val limits = resourceRequirements.getLimits.asScala
     assert(limits("memory").getAmount === "456Mi")
     assert(limits("cpu").getAmount === "4")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodFactorySuite.scala
@@ -67,12 +67,14 @@ class ExecutorPodFactorySuite extends SparkFunSuite with BeforeAndAfter with Bef
     assert(executor.getMetadata.getLabels.size() === 3)
     assert(executor.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL) === "1")
 
-    // There is exactly 1 container with no volume mounts and default memory limits.
-    // Default memory limit is 1024M + 384M (minimum overhead constant).
+    // There is exactly 1 container with no volume mounts and default memory limits and requests.
+    // Default memory limit/request is 1024M + 384M (minimum overhead constant).
     assert(executor.getSpec.getContainers.size() === 1)
     assert(executor.getSpec.getContainers.get(0).getImage === executorImage)
     assert(executor.getSpec.getContainers.get(0).getVolumeMounts.isEmpty)
     assert(executor.getSpec.getContainers.get(0).getResources.getLimits.size() === 1)
+    assert(executor.getSpec.getContainers.get(0).getResources
+      .getRequests.get("memory").getAmount === "1408Mi")
     assert(executor.getSpec.getContainers.get(0).getResources
       .getLimits.get("memory").getAmount === "1408Mi")