Skip to content

Commit 0114c89

Browse files
foxishMarcelo Vanzin
authored andcommitted
[SPARK-22845][SCHEDULER] Modify spark.kubernetes.allocation.batch.delay to take time instead of int
## What changes were proposed in this pull request? Fixing configuration that was taking an int which should take time. Discussion in #19946 (comment) Made the granularity milliseconds as opposed to seconds since there's a use-case for sub-second reactions to scale-up rapidly especially with dynamic allocation. ## How was this patch tested? TODO: manual run of integration tests against this PR. PTAL cc/ mccheah liyinan926 kimoonkim vanzin mridulm jiangxb1987 ueshin Author: foxish <[email protected]> Closes #20032 from foxish/fix-time-conf.
1 parent b176014 commit 0114c89

File tree

3 files changed

+9
-9
lines changed

3 files changed

+9
-9
lines changed

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ private[spark] object Config extends Logging {
102102

103103
val KUBERNETES_ALLOCATION_BATCH_DELAY =
104104
ConfigBuilder("spark.kubernetes.allocation.batch.delay")
105-
.doc("Number of seconds to wait between each round of executor allocation.")
106-
.longConf
107-
.checkValue(value => value > 0, "Allocation batch delay should be a positive integer")
108-
.createWithDefault(1)
105+
.doc("Time to wait between each round of executor allocation.")
106+
.timeConf(TimeUnit.MILLISECONDS)
107+
.checkValue(value => value > 0, "Allocation batch delay must be a positive time value.")
108+
.createWithDefaultString("1s")
109109

110110
val KUBERNETES_EXECUTOR_LOST_REASON_CHECK_MAX_ATTEMPTS =
111111
ConfigBuilder("spark.kubernetes.executor.lostCheck.maxAttempts")

resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
217217
.watch(new ExecutorPodsWatcher()))
218218

219219
allocatorExecutor.scheduleWithFixedDelay(
220-
allocatorRunnable, 0L, podAllocationInterval, TimeUnit.SECONDS)
220+
allocatorRunnable, 0L, podAllocationInterval, TimeUnit.MILLISECONDS)
221221

222222
if (!Utils.isDynamicAllocationEnabled(conf)) {
223223
doRequestTotalExecutors(initialExecutors)

resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
4646
private val NAMESPACE = "test-namespace"
4747
private val SPARK_DRIVER_HOST = "localhost"
4848
private val SPARK_DRIVER_PORT = 7077
49-
private val POD_ALLOCATION_INTERVAL = 60L
49+
private val POD_ALLOCATION_INTERVAL = "1m"
5050
private val DRIVER_URL = RpcEndpointAddress(
5151
SPARK_DRIVER_HOST, SPARK_DRIVER_PORT, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
5252
private val FIRST_EXECUTOR_POD = new PodBuilder()
@@ -144,7 +144,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
144144
.set(KUBERNETES_NAMESPACE, NAMESPACE)
145145
.set("spark.driver.host", SPARK_DRIVER_HOST)
146146
.set("spark.driver.port", SPARK_DRIVER_PORT.toString)
147-
.set(KUBERNETES_ALLOCATION_BATCH_DELAY, POD_ALLOCATION_INTERVAL)
147+
.set(KUBERNETES_ALLOCATION_BATCH_DELAY.key, POD_ALLOCATION_INTERVAL)
148148
executorPodsWatcherArgument = ArgumentCaptor.forClass(classOf[Watcher[Pod]])
149149
allocatorRunnable = ArgumentCaptor.forClass(classOf[Runnable])
150150
requestExecutorRunnable = ArgumentCaptor.forClass(classOf[Runnable])
@@ -162,8 +162,8 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
162162
when(allocatorExecutor.scheduleWithFixedDelay(
163163
allocatorRunnable.capture(),
164164
mockitoEq(0L),
165-
mockitoEq(POD_ALLOCATION_INTERVAL),
166-
mockitoEq(TimeUnit.SECONDS))).thenReturn(null)
165+
mockitoEq(TimeUnit.MINUTES.toMillis(1)),
166+
mockitoEq(TimeUnit.MILLISECONDS))).thenReturn(null)
167167
// Creating Futures in Scala backed by a Java executor service resolves to running
168168
// ExecutorService#execute (as opposed to submit)
169169
doNothing().when(requestExecutorsService).execute(requestExecutorRunnable.capture())

0 commit comments

Comments
 (0)