@@ -29,6 +29,20 @@ if [ -z "${SPARK_HOME}" ]; then
29
29
fi
30
30
. " ${SPARK_HOME} /bin/load-spark-env.sh"
31
31
32
+ CTX_DIR=" $SPARK_HOME /target/tmp/docker"
33
+
34
+ function is_dev_build {
35
+ [ ! -f " $SPARK_HOME /RELEASE" ]
36
+ }
37
+
38
+ function cleanup_ctx_dir {
39
+ if is_dev_build; then
40
+ rm -rf " $CTX_DIR "
41
+ fi
42
+ }
43
+
44
+ trap cleanup_ctx_dir EXIT
45
+
32
46
function image_ref {
33
47
local image=" $1 "
34
48
local add_repo=" ${2:- 1} "
@@ -53,80 +67,114 @@ function docker_push {
53
67
fi
54
68
}
55
69
70
+ # Create a smaller build context for docker in dev builds to make the build faster. Docker
71
+ # uploads all of the current directory to the daemon, and it can get pretty big with dev
72
+ # builds that contain test log files and other artifacts.
73
+ #
74
+ # Three build contexts are created, one for each image: base, pyspark, and sparkr. For them
75
+ # to have the desired effect, the docker command needs to be executed inside the appropriate
76
+ # context directory.
77
+ #
78
+ # Note: docker does not support symlinks in the build context.
79
+ function create_dev_build_context {(
80
+ set -e
81
+ local BASE_CTX=" $CTX_DIR /base"
82
+ mkdir -p " $BASE_CTX /kubernetes"
83
+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
84
+ " $BASE_CTX /kubernetes/dockerfiles"
85
+
86
+ cp -r " assembly/target/scala-$SPARK_SCALA_VERSION /jars" " $BASE_CTX /jars"
87
+ cp -r " resource-managers/kubernetes/integration-tests/tests" \
88
+ " $BASE_CTX /kubernetes/tests"
89
+
90
+ mkdir " $BASE_CTX /examples"
91
+ cp -r " examples/src" " $BASE_CTX /examples/src"
92
+ # Copy just needed examples jars instead of everything.
93
+ mkdir " $BASE_CTX /examples/jars"
94
+ for i in examples/target/scala-$SPARK_SCALA_VERSION /jars/* ; do
95
+ if [ ! -f " $BASE_CTX /jars/$( basename $i ) " ]; then
96
+ cp $i " $BASE_CTX /examples/jars"
97
+ fi
98
+ done
99
+
100
+ for other in bin sbin data; do
101
+ cp -r " $other " " $BASE_CTX /$other "
102
+ done
103
+
104
+ local PYSPARK_CTX=" $CTX_DIR /pyspark"
105
+ mkdir -p " $PYSPARK_CTX /kubernetes"
106
+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
107
+ " $PYSPARK_CTX /kubernetes/dockerfiles"
108
+ mkdir " $PYSPARK_CTX /python"
109
+ cp -r " python/lib" " $PYSPARK_CTX /python/lib"
110
+
111
+ local R_CTX=" $CTX_DIR /sparkr"
112
+ mkdir -p " $R_CTX /kubernetes"
113
+ cp -r " resource-managers/kubernetes/docker/src/main/dockerfiles" \
114
+ " $R_CTX /kubernetes/dockerfiles"
115
+ cp -r " R" " $R_CTX /R"
116
+ )}
117
+
118
+ function img_ctx_dir {
119
+ if is_dev_build; then
120
+ echo " $CTX_DIR /$1 "
121
+ else
122
+ echo " $SPARK_HOME "
123
+ fi
124
+ }
125
+
56
126
function build {
57
127
local BUILD_ARGS
58
- local IMG_PATH
59
- local JARS
60
-
61
- if [ ! -f " $SPARK_HOME /RELEASE" ]; then
62
- # Set image build arguments accordingly if this is a source repo and not a distribution archive.
63
- #
64
- # Note that this will copy all of the example jars directory into the image, and that will
65
- # contain a lot of duplicated jars with the main Spark directory. In a proper distribution,
66
- # the examples directory is cleaned up before generating the distribution tarball, so this
67
- # issue does not occur.
68
- IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
69
- JARS=assembly/target/scala-$SPARK_SCALA_VERSION /jars
70
- BUILD_ARGS=(
71
- ${BUILD_PARAMS}
72
- --build-arg
73
- img_path=$IMG_PATH
74
- --build-arg
75
- spark_jars=$JARS
76
- --build-arg
77
- example_jars=examples/target/scala-$SPARK_SCALA_VERSION /jars
78
- --build-arg
79
- k8s_tests=resource-managers/kubernetes/integration-tests/tests
80
- )
81
- else
82
- # Not passed as arguments to docker, but used to validate the Spark directory.
83
- IMG_PATH=" kubernetes/dockerfiles"
84
- JARS=jars
85
- BUILD_ARGS=(${BUILD_PARAMS} )
128
+ local SPARK_ROOT=" $SPARK_HOME "
129
+
130
+ if is_dev_build; then
131
+ create_dev_build_context || error " Failed to create docker build context."
132
+ SPARK_ROOT=" $CTX_DIR /base"
86
133
fi
87
134
88
135
# Verify that the Docker image content directory is present
89
- if [ ! -d " $IMG_PATH " ]; then
136
+ if [ ! -d " $SPARK_ROOT /kubernetes/dockerfiles " ]; then
90
137
error " Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
91
138
fi
92
139
93
140
# Verify that Spark has actually been built/is a runnable distribution
94
141
# i.e. the Spark JARs that the Docker files will place into the image are present
95
- local TOTAL_JARS=$( ls $JARS /spark-* | wc -l)
142
+ local TOTAL_JARS=$( ls $SPARK_ROOT /jars /spark-* | wc -l)
96
143
TOTAL_JARS=$(( $TOTAL_JARS ))
97
144
if [ " ${TOTAL_JARS} " -eq 0 ]; then
98
145
error " Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
99
146
fi
100
147
148
+ local BUILD_ARGS=(${BUILD_PARAMS} )
101
149
local BINDING_BUILD_ARGS=(
102
150
${BUILD_PARAMS}
103
151
--build-arg
104
152
base_img=$( image_ref spark)
105
153
)
106
- local BASEDOCKERFILE=${BASEDOCKERFILE:- " $IMG_PATH /spark/Dockerfile" }
154
+ local BASEDOCKERFILE=${BASEDOCKERFILE:- " kubernetes/dockerfiles /spark/Dockerfile" }
107
155
local PYDOCKERFILE=${PYDOCKERFILE:- false}
108
156
local RDOCKERFILE=${RDOCKERFILE:- false}
109
157
110
- docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
158
+ (cd $( img_ctx_dir base ) && docker build $NOCACHEARG " ${BUILD_ARGS[@]} " \
111
159
-t $( image_ref spark) \
112
- -f " $BASEDOCKERFILE " .
160
+ -f " $BASEDOCKERFILE " .)
113
161
if [ $? -ne 0 ]; then
114
162
error " Failed to build Spark JVM Docker image, please refer to Docker build output for details."
115
163
fi
116
164
117
165
if [ " ${PYDOCKERFILE} " != " false" ]; then
118
- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
166
+ (cd $( img_ctx_dir pyspark ) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
119
167
-t $( image_ref spark-py) \
120
- -f " $PYDOCKERFILE " .
168
+ -f " $PYDOCKERFILE " .)
121
169
if [ $? -ne 0 ]; then
122
170
error " Failed to build PySpark Docker image, please refer to Docker build output for details."
123
171
fi
124
172
fi
125
173
126
174
if [ " ${RDOCKERFILE} " != " false" ]; then
127
- docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
175
+ (cd $( img_ctx_dir sparkr ) && docker build $NOCACHEARG " ${BINDING_BUILD_ARGS[@]} " \
128
176
-t $( image_ref spark-r) \
129
- -f " $RDOCKERFILE " .
177
+ -f " $RDOCKERFILE " .)
130
178
if [ $? -ne 0 ]; then
131
179
error " Failed to build SparkR Docker image, please refer to Docker build output for details."
132
180
fi
0 commit comments