Skip to content

Commit 72d0f1e

Browse files
dansanduleacrobert3005
authored andcommitted
Allow setting a custom CondaEnvironment (manually or automatically) without having to run PythonRunner (apache-spark-on-k8s#260)
1 parent a72151f commit 72d0f1e

File tree

1 file changed

+32
-16
lines changed

1 file changed

+32
-16
lines changed

core/src/main/scala/org/apache/spark/deploy/CondaRunner.scala

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package org.apache.spark.deploy
2020
import java.util.concurrent.atomic.AtomicReference
2121

2222
import org.apache.spark.SparkConf
23+
import org.apache.spark.SparkContext
2324
import org.apache.spark.api.conda.CondaEnvironment
2425
import org.apache.spark.api.conda.CondaEnvironmentManager
2526
import org.apache.spark.internal.Logging
@@ -33,31 +34,46 @@ import org.apache.spark.util.Utils
3334
abstract class CondaRunner extends Logging {
3435
final def main(args: Array[String]): Unit = {
3536
val sparkConf = new SparkConf()
37+
run(args, CondaRunner.setupCondaEnvironmentAutomatically(sparkConf))
38+
}
3639

40+
def run(args: Array[String], maybeConda: Option[CondaEnvironment]): Unit
41+
}
42+
43+
object CondaRunner {
44+
private[spark] val condaEnvironment: AtomicReference[Option[CondaEnvironment]] =
45+
new AtomicReference(None)
46+
47+
/**
48+
* Sets up a conda environment if [[CondaEnvironmentManager.isConfigured]] returns true.
49+
* Once an environment has been set up, calling this method again (or the [[main]] method)
50+
* will throw a [[RuntimeException]].
51+
*/
52+
def setupCondaEnvironmentAutomatically(sparkConf: SparkConf): Option[CondaEnvironment] = {
3753
if (CondaEnvironmentManager.isConfigured(sparkConf)) {
3854
val condaBootstrapDeps = sparkConf.get(CONDA_BOOTSTRAP_PACKAGES)
3955
val condaChannelUrls = sparkConf.get(CONDA_CHANNEL_URLS)
4056
val condaBaseDir = Utils.createTempDir(Utils.getLocalDir(sparkConf), "conda").getAbsolutePath
4157
val condaEnvironmentManager = CondaEnvironmentManager.fromConf(sparkConf)
4258
val environment = condaEnvironmentManager
43-
.create(condaBaseDir, condaBootstrapDeps, condaChannelUrls)
44-
45-
// Save this as a global in order for SparkContext to be able to access it later, in case we
46-
// are shelling out, but providing a bridge back into this JVM.
47-
require(CondaRunner.condaEnvironment.compareAndSet(None, Some(environment)),
48-
"Couldn't set condaEnvironment to the newly created environment, it was already set to: "
49-
+ CondaRunner.condaEnvironment.get())
50-
51-
run(args, Some(environment))
59+
.create(condaBaseDir, condaBootstrapDeps, condaChannelUrls)
60+
setCondaEnvironment(environment)
61+
Some(environment)
5262
} else {
53-
run(args, None)
63+
None
5464
}
5565
}
5666

57-
def run(args: Array[String], maybeConda: Option[CondaEnvironment]): Unit
58-
}
59-
60-
object CondaRunner {
61-
private[spark] val condaEnvironment: AtomicReference[Option[CondaEnvironment]] =
62-
new AtomicReference(None)
67+
/**
68+
* Sets the given environment as the global environment, which will be accessible by calling
69+
* [[SparkContext.condaEnvironment]]. This method can only be called once! If an environment
70+
* has already been set, calling this method again will throw a [[RuntimeException]].
71+
*/
72+
def setCondaEnvironment(environment: CondaEnvironment): Unit = {
73+
// Save this as a global in order for SparkContext to be able to access it later, in case we
74+
// are shelling out, but providing a bridge back into this JVM.
75+
require(CondaRunner.condaEnvironment.compareAndSet(None, Some(environment)),
76+
"Couldn't set condaEnvironment to the newly created environment, it was already set to: "
77+
+ CondaRunner.condaEnvironment.get())
78+
}
6379
}

0 commit comments

Comments
 (0)