apache-spark-on-k8s · foxish · Dec 20, 2017 · Dec 15, 2017 · Dec 15, 2017 · Dec 16, 2017
diff --git a/README.md b/README.md
@@ -1,2 +1,72 @@
-# spark-integration
-Integration tests for Spark
+---
+layout: global
+title: Spark on Kubernetes Integration Tests
+---
+
+# Running the Kubernetes Integration Tests
+
+Note that the integration test framework is currently being heavily revised and
+is subject to change.
+
+Note that currently the integration tests only run with Java 8.
+
+Running the integration tests requires a Spark distribution tarball. It also
+needs a local path to the directory that contains `Dockerimage` files.
+
+Once you prepare the inputs, the integration tests can be executed with Maven or
+your IDE. Note that when running tests from an IDE, the `pre-integration-test`
+phase must be run every time the Spark main code changes.  When running tests
+from the command line, the `pre-integration-test` phase should automatically be
+invoked if the `integration-test` phase is run.
+
+With Maven, the integration test can be run using the following command:
+
+```
+$ mvn clean integration-test  \
+    -Dspark-distro-tgz=/tmp/spark-2.3.0-SNAPSHOT-bin-20171216-0c8fca4608.tgz  \
+    -Dspark-dockerfiles-dir=.../spark/resource-managers/kubernetes/docker/src/main/dockerfiles
+```
+
+# Running against an arbitrary cluster
+
+In order to run against any cluster, use the following:
+```sh
+$ mvn clean integration-test  \
+    -Dspark-distro-tgz=/tmp/spark-2.3.0-SNAPSHOT-bin-20171216-0c8fca4608.tgz  \
+    -Dspark-dockerfiles-dir=.../spark/resource-managers/kubernetes/docker/src/main/dockerfiles
+    -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://https://<master> -Dspark.docker.test.driverImage=<driver-image> -Dspark.docker.test.executorImage=<executor-image>"
+```
+
+# Preserve the Minikube VM
+
+The integration tests make use of
+[Minikube](https://github.com/kubernetes/minikube), which fires up a virtual
+machine and setup a single-node kubernetes cluster within it. By default the vm
+is destroyed after the tests are finished.  If you want to preserve the vm, e.g.
+to reduce the running time of tests during development, you can pass the
+property `spark.docker.test.persistMinikube` to the test process:
+
+```
+$ mvn clean integration-test  \
+    -Dspark-distro-tgz=/tmp/spark-2.3.0-SNAPSHOT-bin-20171216-0c8fca4608.tgz  \
+    -Dspark-dockerfiles-dir=.../spark/resource-managers/kubernetes/docker/src/main/dockerfiles
+    -DextraScalaTestArgs=-Dspark.docker.test.persistMinikube=true
+```
+
+# Reuse the previous Docker images
+
+The integration tests build a number of Docker images, which takes some time.
+By default, the images are built every time the tests run.  You may want to skip
+re-building those images during development, if the distribution package did not
+change since the last run. You can pass the property
+`spark.docker.test.skipBuildImages` to the test process. This will work only if
+you have been setting the property `spark.docker.test.persistMinikube`, in the
+previous run since the docker daemon run inside the minikube environment.  Here
+is an example:
+
+```
+$ mvn clean integration-test  \
+    -Dspark-distro-tgz=/tmp/spark-2.3.0-SNAPSHOT-bin-20171216-0c8fca4608.tgz  \
+    -Dspark-dockerfiles-dir=.../spark/resource-managers/kubernetes/docker/src/main/dockerfiles
+    "-DextraScalaTestArgs=-Dspark.docker.test.persistMinikube=true -Dspark.docker.test.skipBuildImages=true"
+```
diff --git a/integration-test/pom.xml b/integration-test/pom.xml
@@ -0,0 +1,250 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>spark-kubernetes-integration-tests_2.11</artifactId>
+  <groupId>spark-kubernetes-integration-tests</groupId>
+  <version>0.1-SNAPSHOT</version>
+  <properties>
+    <commons-lang3.version>3.5</commons-lang3.version>
+    <commons-logging.version>1.1.1</commons-logging.version>
+    <docker-client.version>5.0.2</docker-client.version>
+    <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
+    <exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
+    <extraScalaTestArgs></extraScalaTestArgs>
+    <guava.version>18.0</guava.version>
+    <jsr305.version>1.3.9</jsr305.version>
+    <kubernetes-client.version>3.0.0</kubernetes-client.version>
+    <log4j.version>1.2.17</log4j.version>
+    <scala.version>2.11.8</scala.version>
+    <scala.binary.version>2.11</scala.binary.version>
+    <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
+    <scalatest.version>2.2.6</scalatest.version>
+    <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
+    <slf4j-log4j12.version>1.7.24</slf4j-log4j12.version>
+    <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
+    <spark-distro-tgz>YOUR-SPARK-DISTRO-TARBALL-HERE</spark-distro-tgz>
+    <spark-dockerfiles-dir>YOUR-DOCKERFILES-DIR-HERE</spark-dockerfiles-dir>
+    <test.exclude.tags></test.exclude.tags>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Project Kubernetes Integration Tests</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>${commons-logging.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+      <version>${jsr305.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <scope>test</scope>
+      <!-- For compatibility with Docker client. Should be fine since this is just for tests.-->
+      <version>${guava.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.spotify</groupId>
+      <artifactId>docker-client</artifactId>
+      <version>${docker-client.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.fabric8</groupId>
+      <artifactId>kubernetes-client</artifactId>
+      <version>${kubernetes-client.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <version>${commons-lang3.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+      <version>${scala.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <version>${scalatest.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>${slf4j-log4j12.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+        <version>${scala-maven-plugin.version}</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>compile</goal>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>exec-maven-plugin</artifactId>
+        <version>${exec-maven-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>unpack-spark-distro</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+            <configuration>
+              <workingDirectory>${project.build.directory}</workingDirectory>
+              <executable>/bin/sh</executable>
+              <arguments>
+                <argument>-c</argument>
+                <argument>rm -rf spark-distro; mkdir spark-distro-tmp; cd spark-distro-tmp; tar xfz ${spark-distro-tgz}; mv * ../spark-distro; cd ..; rm -rf spark-distro-tmp</argument>
+              </arguments>
+            </configuration>
+          </execution>
+          <execution>
+            <!-- TODO: Remove this hack once the upstream is fixed -->
+            <id>copy-dockerfiles-if-missing</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+            <configuration>
+              <workingDirectory>${project.build.directory}/spark-distro</workingDirectory>
+              <executable>/bin/sh</executable>
+              <arguments>
+                <argument>-c</argument>
+                <argument>test -d dockerfiles || cp -pr ${spark-dockerfiles-dir} dockerfiles</argument>
+              </arguments>
+            </configuration>
+          </execution>
+          <execution>
+            <!-- TODO: Remove this hack once upstream is fixed by SPARK-22777 -->
+            <id>set-exec-bit-on-docker-entrypoint-sh</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+            <configuration>
+              <workingDirectory>${project.build.directory}/spark-distro/dockerfiles</workingDirectory>
+              <executable>/bin/chmod</executable>
+              <arguments>
+                <argument>+x</argument>
+                <argument>spark-base/entrypoint.sh</argument>
+              </arguments>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>com.googlecode.maven-download-plugin</groupId>
+        <artifactId>download-maven-plugin</artifactId>
+        <version>${download-maven-plugin.version}</version>
+        <executions>
+          <execution>
+            <id>download-minikube-linux</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>wget</goal>
+            </goals>
+            <configuration>
+              <url>https://storage.googleapis.com/minikube/releases/v0.22.0/minikube-linux-amd64</url>
+              <outputDirectory>${project.build.directory}/minikube-bin/linux-amd64</outputDirectory>
+              <outputFileName>minikube</outputFileName>
+            </configuration>
+          </execution>
+          <execution>
+            <id>download-minikube-darwin</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>wget</goal>
+            </goals>
+            <configuration>
+              <url>https://storage.googleapis.com/minikube/releases/v0.22.0/minikube-darwin-amd64</url>
+              <outputDirectory>${project.build.directory}/minikube-bin/darwin-amd64</outputDirectory>
+              <outputFileName>minikube</outputFileName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <!-- Triggers scalatest plugin in the integration-test phase instead of
+             the test phase. -->
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <version>${scalatest-maven-plugin.version}</version>
+        <configuration>
+          <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
+          <junitxml>.</junitxml>
+          <filereports>SparkTestSuite.txt</filereports>
+          <argLine>-ea -Xmx3g -XX:ReservedCodeCacheSize=512m ${extraScalaTestArgs}</argLine>
+          <stderr/>
+          <systemProperties>
+            <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
+            <java.awt.headless>true</java.awt.headless>
+          </systemProperties>
+          <tagsToExclude>${test.exclude.tags}</tagsToExclude>
+        </configuration>
+        <executions>
+          <execution>
+            <id>test</id>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <configuration>
+              <!-- The negative pattern below prevents integration tests such as
+                   KubernetesSuite from running in the test phase. -->
+              <suffixes>(?&lt;!Suite)</suffixes>
+            </configuration>
+          </execution>
+          <execution>
+            <id>integration-test</id>
+            <phase>integration-test</phase>
+            <goals>
+              <goal>test</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+
+  </build>
+
+</project>
diff --git a/integration-test/src/test/resources/log4j.properties b/integration-test/src/test/resources/log4j.properties
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/integration-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=true
+log4j.appender.file.file=target/integration-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
+
+# Ignore messages below warning level from a few verbose libraries.
+log4j.logger.com.sun.jersey=WARN
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.mortbay=WARN
+log4j.logger.org.spark_project.jetty=WARN