Skip to content

Commit 95cf5f7

Browse files
authored
Merge pull request apache-spark-on-k8s#324 from palantir/use-upstream-kubernetes
Rebase to upstream's version of Kubernetes support.
2 parents 4e7f4f0 + 1d60e38 commit 95cf5f7

File tree

153 files changed

+2616
-12775
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+2616
-12775
lines changed

bin/docker-image-tool.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,11 @@ function build {
6363
error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
6464
fi
6565

66+
local DOCKERFILE=${DOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
67+
6668
docker build "${BUILD_ARGS[@]}" \
6769
-t $(image_ref spark) \
68-
-f "$IMG_PATH/spark/Dockerfile" .
70+
-f "$DOCKERFILE" .
6971
}
7072

7173
function push {
@@ -83,6 +85,7 @@ Commands:
8385
push Push a pre-built image to a registry. Requires a repository address to be provided.
8486
8587
Options:
88+
-f file Dockerfile to build. By default builds the Dockerfile shipped with Spark.
8689
-r repo Repository address.
8790
-t tag Tag to apply to the built image, or to identify the image to be pushed.
8891
-m Use minikube's Docker daemon.
@@ -112,10 +115,12 @@ fi
112115

113116
REPO=
114117
TAG=
115-
while getopts mr:t: option
118+
DOCKERFILE=
119+
while getopts f:mr:t: option
116120
do
117121
case "${option}"
118122
in
123+
f) DOCKERFILE=${OPTARG};;
119124
r) REPO=${OPTARG};;
120125
t) TAG=${OPTARG};;
121126
m)

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@
1818
package org.apache.spark.deploy
1919

2020
import java.io._
21-
import java.lang.reflect.{InvocationTargetException, UndeclaredThrowableException}
21+
import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
2222
import java.net.URL
2323
import java.security.PrivilegedExceptionAction
2424
import java.text.ParseException
2525

2626
import scala.annotation.tailrec
27-
import scala.collection.mutable.ArrayBuffer
27+
import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
2828
import scala.util.{Properties, Try}
2929

3030
import org.apache.commons.lang3.StringUtils
@@ -99,7 +99,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
9999
private[deploy] val REST_CLUSTER_SUBMIT_CLASS = classOf[RestSubmissionClientApp].getName()
100100
private[deploy] val STANDALONE_CLUSTER_SUBMIT_CLASS = classOf[ClientApp].getName()
101101
private[deploy] val KUBERNETES_CLUSTER_SUBMIT_CLASS =
102-
"org.apache.spark.deploy.k8s.submit.Client"
102+
"org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
103103

104104
// scalastyle:off println
105105
private[spark] def printVersionAndExit(): Unit = {
@@ -310,10 +310,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
310310

311311
// Fail fast, the following modes are not supported or applicable
312312
(clusterManager, deployMode) match {
313-
case (KUBERNETES, CLIENT) =>
314-
printErrorAndExit("Client mode is currently not supported for Kubernetes.")
315-
case (KUBERNETES, CLUSTER) if args.isR =>
316-
printErrorAndExit("Kubernetes does not currently support R applications.")
317313
case (STANDALONE, CLUSTER) if args.isPython =>
318314
printErrorAndExit("Cluster deploy mode is currently not supported for python " +
319315
"applications on standalone clusters.")
@@ -343,8 +339,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
343339
}
344340
val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
345341
val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER
346-
val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER
347342
val isStandAloneCluster = clusterManager == STANDALONE && deployMode == CLUSTER
343+
val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER
348344

349345
if (!isMesosCluster && !isStandAloneCluster) {
350346
// Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
@@ -579,9 +575,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
579575
OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.principal"),
580576
OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.keytab"),
581577

582-
OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES,
583-
confKey = "spark.kubernetes.namespace"),
584-
585578
// Other options
586579
OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES,
587580
confKey = "spark.executor.cores"),
@@ -649,9 +642,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
649642

650643
// Add the application jar automatically so the user doesn't have to call sc.addJar
651644
// For YARN cluster mode, the jar is already distributed on each node as "app.jar"
652-
// In Kubernetes cluster mode, the jar will be uploaded by the client separately.
653645
// For python and R files, the primary resource is already distributed as a regular file
654-
if (!isYarnCluster && !isKubernetesCluster && !args.isPython && !args.isR) {
646+
if (!isYarnCluster && !args.isPython && !args.isR) {
655647
var jars = sparkConf.getOption("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty)
656648
if (isUserJar(args.primaryResource)) {
657649
jars = jars ++ Seq(args.primaryResource)
@@ -733,21 +725,14 @@ object SparkSubmit extends CommandLineUtils with Logging {
733725

734726
if (isKubernetesCluster) {
735727
childMainClass = KUBERNETES_CLUSTER_SUBMIT_CLASS
736-
if (args.isPython) {
737-
childArgs ++= Array("--primary-py-file", args.primaryResource)
738-
childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner")
739-
if (args.pyFiles != null) {
740-
childArgs ++= Array("--other-py-files", args.pyFiles)
741-
}
742-
} else {
743-
if (args.primaryResource != SparkLauncher.NO_RESOURCE) {
744-
childArgs ++= Array("--primary-java-resource", args.primaryResource)
745-
}
746-
childArgs ++= Array("--main-class", args.mainClass)
728+
if (args.primaryResource != SparkLauncher.NO_RESOURCE) {
729+
childArgs ++= Array("--primary-java-resource", args.primaryResource)
747730
}
748-
args.childArgs.foreach { arg =>
749-
childArgs += "--arg"
750-
childArgs += arg
731+
childArgs ++= Array("--main-class", args.mainClass)
732+
if (args.childArgs != null) {
733+
args.childArgs.foreach { arg =>
734+
childArgs += ("--arg", arg)
735+
}
751736
}
752737
}
753738

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
7474
var principal: String = null
7575
var keytab: String = null
7676

77-
// Kubernetes only
78-
var kubernetesNamespace: String = null
79-
8077
// Standalone cluster mode only
8178
var supervise: Boolean = false
8279
var driverCores: String = null
@@ -201,9 +198,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
201198
queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull
202199
keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
203200
principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
204-
kubernetesNamespace = Option(kubernetesNamespace)
205-
.orElse(sparkProperties.get("spark.kubernetes.namespace"))
206-
.orNull
207201

208202
// Try to set main class from JAR if no --class argument is given
209203
if (mainClass == null && !isPython && !isR && primaryResource != null) {
@@ -460,9 +454,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
460454
case KEYTAB =>
461455
keytab = value
462456

463-
case KUBERNETES_NAMESPACE =>
464-
kubernetesNamespace = value
465-
466457
case HELP =>
467458
printUsageAndExit(0)
468459

dev/deps/spark-deps-hadoop-palantir

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ aws-java-sdk-core-1.11.45.jar
2020
aws-java-sdk-kms-1.11.45.jar
2121
aws-java-sdk-s3-1.11.45.jar
2222
base64-2.3.8.jar
23-
bcpkix-jdk15on-1.58.jar
2423
bcprov-jdk15on-1.58.jar
2524
breeze-macros_2.11-0.13.2.jar
2625
breeze_2.11-0.13.2.jar
@@ -46,8 +45,6 @@ commons-math3-3.4.1.jar
4645
commons-net-2.2.jar
4746
commons-pool-1.6.jar
4847
compress-lzf-1.0.3.jar
49-
converter-jackson-2.3.0.jar
50-
converter-scalars-2.3.0.jar
5148
core-1.1.2.jar
5249
curator-client-2.7.1.jar
5350
curator-framework-2.7.1.jar
@@ -102,8 +99,6 @@ jackson-datatype-guava-2.6.7.jar
10299
jackson-datatype-jdk7-2.6.7.jar
103100
jackson-datatype-joda-2.6.7.jar
104101
jackson-jaxrs-1.9.13.jar
105-
jackson-jaxrs-base-2.6.7.jar
106-
jackson-jaxrs-json-provider-2.6.7.jar
107102
jackson-mapper-asl-1.9.13.jar
108103
jackson-module-afterburner-2.6.7.jar
109104
jackson-module-jaxb-annotations-2.6.7.jar
@@ -129,7 +124,6 @@ jersey-container-servlet-2.25.1.jar
129124
jersey-container-servlet-core-2.25.1.jar
130125
jersey-guava-2.25.1.jar
131126
jersey-media-jaxb-2.25.1.jar
132-
jersey-media-multipart-2.25.1.jar
133127
jersey-server-2.25.1.jar
134128
jets3t-0.9.4.jar
135129
jetty-6.1.26.jar
@@ -160,7 +154,6 @@ metrics-graphite-3.2.5.jar
160154
metrics-influxdb-1.1.8.jar
161155
metrics-json-3.2.5.jar
162156
metrics-jvm-3.2.5.jar
163-
mimepull-1.9.6.jar
164157
minlog-1.3.0.jar
165158
netty-3.10.6.Final.jar
166159
netty-all-4.1.17.Final.jar
@@ -184,7 +177,6 @@ parquet-jackson-1.9.1-palantir3.jar
184177
protobuf-java-2.5.0.jar
185178
py4j-0.10.6.jar
186179
pyrolite-4.13.jar
187-
retrofit-2.3.0.jar
188180
scala-compiler-2.11.8.jar
189181
scala-library-2.11.8.jar
190182
scala-parser-combinators_2.11-1.0.4.jar

0 commit comments

Comments
 (0)