Skip to content

Commit 5f203cd

Browse files
robert3005bulldozer-bot[bot]
authored andcommitted
Default profiles are on by default (apache-spark-on-k8s#486)
This avoids annoying issues with IDE integration and building where you have to remember incantation to run correct combination. This entails following changes * Hadoop default changed from 2.7.4 to 2.9.1 * yarn, kubernetes, hadoop-cloud and kinesis modules are on by default SparkR is left out since it requries a bit more invasive changes to enable by default
1 parent 3084918 commit 5f203cd

File tree

11 files changed

+87
-106
lines changed

11 files changed

+87
-106
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ jobs:
149149
keys:
150150
- build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
151151
- build-binaries-
152-
- run: ./build/mvn -DskipTests -Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Psparkr install
152+
- run: ./build/mvn -DskipTests -Psparkr install
153153
# Get sbt to run trivially, ensures its launcher is downloaded under build/
154154
- run: ./build/sbt -h || true
155155
- save_cache:

assembly/pom.xml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,29 @@
7979
<artifactId>spark-avro_${scala.binary.version}</artifactId>
8080
<version>${project.version}</version>
8181
</dependency>
82-
82+
<dependency>
83+
<groupId>org.apache.spark</groupId>
84+
<artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
85+
<version>${project.version}</version>
86+
</dependency>
87+
<!--
88+
Redeclare this dependency to force it into the distribution.
89+
-->
90+
<dependency>
91+
<groupId>org.eclipse.jetty</groupId>
92+
<artifactId>jetty-util</artifactId>
93+
<scope>${hadoop.deps.scope}</scope>
94+
</dependency>
95+
<dependency>
96+
<groupId>org.apache.spark</groupId>
97+
<artifactId>spark-yarn_${scala.binary.version}</artifactId>
98+
<version>${project.version}</version>
99+
</dependency>
100+
<dependency>
101+
<groupId>org.apache.spark</groupId>
102+
<artifactId>spark-kubernetes_${scala.binary.version}</artifactId>
103+
<version>${project.version}</version>
104+
</dependency>
83105
<!--
84106
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
85107
that the libraries Spark depend on have it available. We'll package the version that Spark

dev/deps/spark-deps-hadoop-palantir

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ avro-1.8.2.jar
1919
avro-ipc-1.8.2.jar
2020
avro-mapred-1.8.2-hadoop2.jar
2121
aws-java-sdk-bundle-1.11.201.jar
22+
azure-data-lake-store-sdk-2.2.3.jar
2223
azure-keyvault-core-0.8.0.jar
2324
azure-storage-5.4.0.jar
2425
breeze-macros_2.12-0.13.2.jar
@@ -66,6 +67,7 @@ hadoop-annotations-2.9.1-palantir.11.jar
6667
hadoop-auth-2.9.1-palantir.11.jar
6768
hadoop-aws-2.9.1-palantir.11.jar
6869
hadoop-azure-2.9.1-palantir.11.jar
70+
hadoop-azure-datalake-2.9.1-palantir.11.jar
6971
hadoop-client-2.9.1-palantir.11.jar
7072
hadoop-common-2.9.1-palantir.11.jar
7173
hadoop-hdfs-client-2.9.1-palantir.11.jar

dev/mima

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ set -e
2424
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2525
cd "$FWDIR"
2626

27-
SPARK_PROFILES="-Phadoop-palantir -Pmesos -Phadoop-cloud -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
27+
SPARK_PROFILES="-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
2828
TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
2929
OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
3030

dev/publish-local.sh

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,14 @@
11
#!/usr/bin/env bash
22

33
set -euo pipefail
4-
version=$(git describe --tags --first-parent)
5-
6-
PALANTIR_FLAGS=(-Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Psparkr)
74

5+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
86
MVN_LOCAL=~/.m2/repository
97

10-
publish_artifacts() {
11-
./build/mvn versions:set -DnewVersion=$version
12-
./build/mvn -DskipTests "${PALANTIR_FLAGS[@]}" install
13-
}
8+
source "$FWDIR/publish_functions.sh"
149

15-
make_dist() {
16-
build_flags="$1"
17-
shift 1
18-
hadoop_name="hadoop-palantir"
19-
artifact_name="spark-dist_2.12-${hadoop_name}"
20-
file_name="spark-dist-${version}-${hadoop_name}.tgz"
21-
./dev/make-distribution.sh --name "hadoop-palantir" --tgz "$@" $build_flags
22-
mkdir -p $MVN_LOCAL/org/apache/spark/${artifact_name}/${version} && \
23-
cp $file_name $MVN_LOCAL/org/apache/spark/${artifact_name}/${version}/${artifact_name}-${version}.tgz
24-
}
10+
set_version_and_install
11+
DONT_BUILD=true make_dist
12+
mkdir -p $MVN_LOCAL/org/apache/spark/${artifact_name}/${version}
13+
cp $file_name $MVN_LOCAL/org/apache/spark/${artifact_name}/${version}/${artifact_name}-${version}.tgz
2514

26-
publish_artifacts
27-
make_dist "${PALANTIR_FLAGS[*]}"

dev/publish.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
77
source "$FWDIR/publish_functions.sh"
88

99
publish_artifacts | tee -a "/tmp/publish_artifacts.log"
10-
DONT_BUILD=true make_dist "${PALANTIR_FLAGS[*]}" | tee -a "/tmp/make-dist.log"
10+
DONT_BUILD=true make_dist_and_deploy | tee -a "/tmp/make-dist.log"

dev/publish_functions.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
set -euo pipefail
44

5-
PALANTIR_FLAGS=(-Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Psparkr)
5+
PALANTIR_FLAGS=(-Psparkr)
66

77
get_version() {
88
git describe --tags --first-parent
@@ -14,6 +14,12 @@ set_version_and_package() {
1414
./build/mvn -DskipTests "${PALANTIR_FLAGS[@]}" package
1515
}
1616

17+
set_version_and_install() {
18+
version=$(get_version)
19+
./build/mvn versions:set -DnewVersion="$version"
20+
./build/mvn -DskipTests "${PALANTIR_FLAGS[@]}" install
21+
}
22+
1723
publish_artifacts() {
1824
tmp_settings="tmp-settings.xml"
1925
echo "<settings><servers><server>" > $tmp_settings
@@ -25,13 +31,15 @@ publish_artifacts() {
2531
}
2632

2733
make_dist() {
28-
build_flags="$1"
29-
shift 1
3034
version=$(get_version)
3135
hadoop_name="hadoop-palantir"
3236
artifact_name="spark-dist_2.12-${hadoop_name}"
3337
file_name="spark-dist-${version}-${hadoop_name}.tgz"
34-
./dev/make-distribution.sh --name "hadoop-palantir" --tgz "$@" $build_flags
38+
./dev/make-distribution.sh --name "hadoop-palantir" --tgz "$@" "${PALANTIR_FLAGS[@]}"
39+
}
40+
41+
make_dist_and_deploy() {
42+
make_dist
3543
curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T "$file_name" "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/${artifact_name}/${version}/${artifact_name}-${version}.tgz"
3644
curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST "https://api.bintray.com/content/palantir/releases/spark/${version}/publish"
3745
}

dev/sbt-checkstyle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
# with failure (either resolution or compilation); the "q" makes SBT quit.
2222
ERRORS=$(echo -e "q\n" \
2323
| build/sbt \
24-
-Phadoop-palantir \
2524
-Pkinesis-asl \
2625
-Pmesos \
2726
-Pkubernetes \

dev/test-dependencies.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export LC_ALL=C
2929
# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
3030

3131
# NOTE: These should match those in the release publishing script
32-
HADOOP2_MODULE_PROFILES="-Phadoop-cloud -Pkubernetes -Pkinesis-asl -Pyarn"
32+
HADOOP2_MODULE_PROFILES=""
3333
MVN="build/mvn"
3434
HADOOP_PROFILES=(
3535
hadoop-palantir

hadoop-cloud/pom.xml

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@
138138
<dependency>
139139
<groupId>org.apache.hadoop</groupId>
140140
<artifactId>hadoop-azure</artifactId>
141-
<version>${hadoop.version}</version>
142141
<scope>${hadoop.deps.scope}</scope>
143142
<exclusions>
144143
<exclusion>
@@ -159,25 +158,14 @@
159158
</exclusion>
160159
</exclusions>
161160
</dependency>
161+
<dependency>
162+
<groupId>org.apache.hadoop</groupId>
163+
<artifactId>hadoop-azure-datalake</artifactId>
164+
<scope>${hadoop.deps.scope}</scope>
165+
</dependency>
162166
</dependencies>
163167

164168
<profiles>
165-
<profile>
166-
<id>hadoop-palantir</id>
167-
<dependencies>
168-
<dependency>
169-
<groupId>org.apache.hadoop</groupId>
170-
<artifactId>hadoop-azure-datalake</artifactId>
171-
<scope>${hadoop.deps.scope}</scope>
172-
</dependency>
173-
<dependency>
174-
<groupId>org.apache.hadoop</groupId>
175-
<artifactId>hadoop-azure</artifactId>
176-
<scope>${hadoop.deps.scope}</scope>
177-
</dependency>
178-
</dependencies>
179-
</profile>
180-
181169
<!--
182170
Hadoop 3 simplifies the classpath, and adds a new committer base class which
183171
enables store-specific committers.

0 commit comments

Comments
 (0)