From 79ffa7294be7fc1cd664cacab47e6e1d45b7bc06 Mon Sep 17 00:00:00 2001 From: Paul Reimer Date: Wed, 18 Oct 2017 15:23:12 -0700 Subject: [PATCH 1/3] Allow specifying non-local files to spark-submit (python files, and R files) when isKubernetes is set (#527) --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 85b76013ba5f3..138b1a58ef482 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -325,7 +325,7 @@ object SparkSubmit extends CommandLineUtils { // Require all python files to be local, so we can add them to the PYTHONPATH // In YARN cluster mode, python files are distributed as regular files, which can be non-local. // In Mesos cluster mode, non-local python files are automatically downloaded by Mesos. - if (args.isPython && !isYarnCluster && !isMesosCluster) { + if (args.isPython && !isYarnCluster && !isMesosCluster && !isKubernetesCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}") } @@ -336,7 +336,7 @@ object SparkSubmit extends CommandLineUtils { } // Require all R files to be local - if (args.isR && !isYarnCluster && !isMesosCluster) { + if (args.isR && !isYarnCluster && !isMesosCluster && !isKubernetesCluster) { if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) { printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}") } From ecfa6f22b5064142f13a110afa4c3c311a8ef7e3 Mon Sep 17 00:00:00 2001 From: Paul Reimer Date: Thu, 19 Oct 2017 09:44:35 -0700 Subject: [PATCH 2/3] Avoid formatting python path (when using --py-files) in kubernetes cluster mode for spark-submit --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 138b1a58ef482..0bd8d0c98b639 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -682,10 +682,10 @@ object SparkSubmit extends CommandLineUtils { // explicitly sets `spark.submit.pyFiles` in his/her default properties file. sysProps.get("spark.submit.pyFiles").foreach { pyFiles => val resolvedPyFiles = Utils.resolveURIs(pyFiles) - val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) { + val formattedPyFiles = if (!isYarnCluster && !isMesosCluster && !isKubernetesCluster) { PythonRunner.formatPaths(resolvedPyFiles).mkString(",") } else { - // Ignoring formatting python path in yarn and mesos cluster mode, these two modes + // Ignoring formatting python path in yarn, mesos, and kubernetes cluster mode, these two modes // support dealing with remote python files, they could distribute and add python files // locally. resolvedPyFiles From a621c5ff9fe842416ec8837665a9a70d4dfc8a9c Mon Sep 17 00:00:00 2001 From: Paul Reimer Date: Thu, 19 Oct 2017 10:58:16 -0700 Subject: [PATCH 3/3] More general comment about cluster modes which should skip python path formatting. --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0bd8d0c98b639..b0ea0fc5a51f7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -685,7 +685,7 @@ object SparkSubmit extends CommandLineUtils { val formattedPyFiles = if (!isYarnCluster && !isMesosCluster && !isKubernetesCluster) { PythonRunner.formatPaths(resolvedPyFiles).mkString(",") } else { - // Ignoring formatting python path in yarn, mesos, and kubernetes cluster mode, these two modes + // Ignoring formatting python path in yarn, mesos, and kubernetes cluster mode, these modes // support dealing with remote python files, they could distribute and add python files // locally. resolvedPyFiles