Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ private[spark] object Config extends Logging {

val KUBERNETES_ALLOCATION_BATCH_DELAY =
ConfigBuilder("spark.kubernetes.allocation.batch.delay")
.doc("Number of seconds to wait between each round of executor allocation.")
.longConf
.checkValue(value => value > 0, "Allocation batch delay should be a positive integer")
.createWithDefault(1)
.doc("Time to wait between each round of executor allocation.")
.timeConf(TimeUnit.MILLISECONDS)
.checkValue(value => value > 0, "Allocation batch delay must be a positive time value.")
.createWithDefaultString("1s")

val KUBERNETES_EXECUTOR_LOST_REASON_CHECK_MAX_ATTEMPTS =
ConfigBuilder("spark.kubernetes.executor.lostCheck.maxAttempts")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ private[spark] class KubernetesClusterSchedulerBackend(

private val initialExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(conf)

private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
private val podAllocationInterval = conf.getTimeAsMs(KUBERNETES_ALLOCATION_BATCH_DELAY.key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be just conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY). The config's unit is already ms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was the previous state actually. Not sure what the best practice is here. @mridulm, thoughts? getTimeAsMs seemed better as it might protect us from changes in the config if at all that happens. It enforces the contract that it must be milliseconds, which is essential for the allocator to function correctly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) returns a Long if it's a time conf. That's how time configs are expected to be used.

You don't need podAllocationInterval.toLong later on like you had before.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I (incorrectly) assumed KUBERNETES_ALLOCATION_BATCH_DELAY was a String, and not ConfigEntry.
@vanzin's suggestion is much more elegant in comparison.


private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)

Expand Down Expand Up @@ -217,7 +217,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
.watch(new ExecutorPodsWatcher()))

allocatorExecutor.scheduleWithFixedDelay(
allocatorRunnable, 0L, podAllocationInterval, TimeUnit.SECONDS)
allocatorRunnable, 0L, podAllocationInterval, TimeUnit.MILLISECONDS)

if (!Utils.isDynamicAllocationEnabled(conf)) {
doRequestTotalExecutors(initialExecutors)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
private val NAMESPACE = "test-namespace"
private val SPARK_DRIVER_HOST = "localhost"
private val SPARK_DRIVER_PORT = 7077
private val POD_ALLOCATION_INTERVAL = 60L
private val POD_ALLOCATION_INTERVAL = "1m"
private val DRIVER_URL = RpcEndpointAddress(
SPARK_DRIVER_HOST, SPARK_DRIVER_PORT, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
private val FIRST_EXECUTOR_POD = new PodBuilder()
Expand Down Expand Up @@ -144,7 +144,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
.set(KUBERNETES_NAMESPACE, NAMESPACE)
.set("spark.driver.host", SPARK_DRIVER_HOST)
.set("spark.driver.port", SPARK_DRIVER_PORT.toString)
.set(KUBERNETES_ALLOCATION_BATCH_DELAY, POD_ALLOCATION_INTERVAL)
.set(KUBERNETES_ALLOCATION_BATCH_DELAY.key, POD_ALLOCATION_INTERVAL)
executorPodsWatcherArgument = ArgumentCaptor.forClass(classOf[Watcher[Pod]])
allocatorRunnable = ArgumentCaptor.forClass(classOf[Runnable])
requestExecutorRunnable = ArgumentCaptor.forClass(classOf[Runnable])
Expand All @@ -162,8 +162,8 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
when(allocatorExecutor.scheduleWithFixedDelay(
allocatorRunnable.capture(),
mockitoEq(0L),
mockitoEq(POD_ALLOCATION_INTERVAL),
mockitoEq(TimeUnit.SECONDS))).thenReturn(null)
mockitoEq(TimeUnit.MINUTES.toMillis(1)),
mockitoEq(TimeUnit.MILLISECONDS))).thenReturn(null)
// Creating Futures in Scala backed by a Java executor service resolves to running
// ExecutorService#execute (as opposed to submit)
doNothing().when(requestExecutorsService).execute(requestExecutorRunnable.capture())
Expand Down