Skip to content

Commit eaf980a

Browse files
pan3793himadripal
authored andcommitted
[SPARK-49969][BUILD] Simplify dependency management in YARN module
### What changes were proposed in this pull request? This PR simplifies dependency management in YARN module by pruning unnecessary test scope dependency which pulls from the vanilla Hadoop client. ### Why are the changes needed? Since 3.2 (SPARK-33212), Spark moved from the vanilla Hadoop3 client to the shaded Hadoop3 client, significantly simplifying dependency management, some hack rules of dependency to address the odd issues can be removed to simplify the Maven/SBT configuration files now. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - pass SBT test: `build/sbt -Pyarn yarn/test` - pass Maven test: `build/mvn -Pyarn -pl :spark-yarn_2.13 clean install -DskipTests -am && build/mvn -Pyarn -pl :spark-yarn_2.13 test` - verified no affection on runtime deps: `dev/test-dependencies.sh` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#48468 from pan3793/SPARK-49969. Authored-by: Cheng Pan <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 5de4d9e commit eaf980a

File tree

2 files changed

+30
-126
lines changed

2 files changed

+30
-126
lines changed

project/SparkBuild.scala

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,20 +1072,9 @@ object DependencyOverrides {
10721072
object ExcludedDependencies {
10731073
lazy val settings = Seq(
10741074
libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") },
1075-
// SPARK-33705: Due to sbt compiler issues, it brings exclusions defined in maven pom back to
1076-
// the classpath directly and assemble test scope artifacts to assembly/target/scala-xx/jars,
1077-
// which is also will be added to the classpath of some unit tests that will build a subprocess
1078-
// to run `spark-submit`, e.g. HiveThriftServer2Test.
1079-
//
1080-
// These artifacts are for the jersey-1 API but Spark use jersey-2 ones, so it cause test
1081-
// flakiness w/ jar conflicts issues.
1082-
//
1083-
// Also jersey-1 is only used by yarn module(see resource-managers/yarn/pom.xml) for testing
1084-
// purpose only. Here we exclude them from the whole project scope and add them w/ yarn only.
10851075
excludeDependencies ++= Seq(
1086-
ExclusionRule(organization = "com.sun.jersey"),
10871076
ExclusionRule(organization = "ch.qos.logback"),
1088-
ExclusionRule("javax.ws.rs", "jsr311-api"))
1077+
ExclusionRule("javax.servlet", "javax.servlet-api"))
10891078
)
10901079
}
10911080

@@ -1229,10 +1218,6 @@ object YARN {
12291218
val hadoopProvidedProp = "spark.yarn.isHadoopProvided"
12301219

12311220
lazy val settings = Seq(
1232-
excludeDependencies --= Seq(
1233-
ExclusionRule(organization = "com.sun.jersey"),
1234-
ExclusionRule("javax.servlet", "javax.servlet-api"),
1235-
ExclusionRule("javax.ws.rs", "jsr311-api")),
12361221
Compile / unmanagedResources :=
12371222
(Compile / unmanagedResources).value.filter(!_.getName.endsWith(s"$propFileName")),
12381223
genConfigProperties := {

resource-managers/yarn/pom.xml

Lines changed: 29 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -29,43 +29,8 @@
2929
<name>Spark Project YARN</name>
3030
<properties>
3131
<sbt.project.name>yarn</sbt.project.name>
32-
<jersey-1.version>1.19</jersey-1.version>
3332
</properties>
3433

35-
<profiles>
36-
<profile>
37-
<id>hadoop-3</id>
38-
<activation>
39-
<activeByDefault>true</activeByDefault>
40-
</activation>
41-
<dependencies>
42-
<dependency>
43-
<groupId>org.apache.hadoop</groupId>
44-
<artifactId>hadoop-client-runtime</artifactId>
45-
<version>${hadoop.version}</version>
46-
<scope>${hadoop.deps.scope}</scope>
47-
</dependency>
48-
<dependency>
49-
<groupId>org.apache.hadoop</groupId>
50-
<artifactId>hadoop-client-minicluster</artifactId>
51-
<version>${hadoop.version}</version>
52-
<scope>test</scope>
53-
</dependency>
54-
<!-- Used by MiniYARNCluster -->
55-
<dependency>
56-
<groupId>org.bouncycastle</groupId>
57-
<artifactId>bcprov-jdk18on</artifactId>
58-
<scope>test</scope>
59-
</dependency>
60-
<dependency>
61-
<groupId>org.bouncycastle</groupId>
62-
<artifactId>bcpkix-jdk18on</artifactId>
63-
<scope>test</scope>
64-
</dependency>
65-
</dependencies>
66-
</profile>
67-
</profiles>
68-
6934
<dependencies>
7035
<dependency>
7136
<groupId>org.apache.spark</groupId>
@@ -102,6 +67,35 @@
10267
<groupId>org.apache.hadoop</groupId>
10368
<artifactId>hadoop-client-api</artifactId>
10469
<version>${hadoop.version}</version>
70+
<scope>${hadoop.deps.scope}</scope>
71+
</dependency>
72+
<dependency>
73+
<groupId>org.apache.hadoop</groupId>
74+
<artifactId>hadoop-client-runtime</artifactId>
75+
<version>${hadoop.version}</version>
76+
<scope>${hadoop.deps.scope}</scope>
77+
</dependency>
78+
<dependency>
79+
<groupId>org.apache.hadoop</groupId>
80+
<artifactId>hadoop-client-minicluster</artifactId>
81+
<version>${hadoop.version}</version>
82+
<scope>test</scope>
83+
</dependency>
84+
<!-- Used by MiniYARNCluster -->
85+
<dependency>
86+
<groupId>javax.xml.bind</groupId>
87+
<artifactId>jaxb-api</artifactId>
88+
<scope>test</scope>
89+
</dependency>
90+
<dependency>
91+
<groupId>org.bouncycastle</groupId>
92+
<artifactId>bcprov-jdk18on</artifactId>
93+
<scope>test</scope>
94+
</dependency>
95+
<dependency>
96+
<groupId>org.bouncycastle</groupId>
97+
<artifactId>bcpkix-jdk18on</artifactId>
98+
<scope>test</scope>
10599
</dependency>
106100

107101
<!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
@@ -135,22 +129,6 @@
135129
</dependency>
136130
<!-- End of shaded deps. -->
137131

138-
<!--
139-
SPARK-10059: Explicitly add JSP dependencies for tests since the MiniYARN cluster needs them.
140-
-->
141-
<dependency>
142-
<groupId>org.eclipse.jetty.orbit</groupId>
143-
<artifactId>javax.servlet.jsp</artifactId>
144-
<version>2.2.0.v201112011158</version>
145-
<scope>test</scope>
146-
</dependency>
147-
<dependency>
148-
<groupId>org.eclipse.jetty.orbit</groupId>
149-
<artifactId>javax.servlet.jsp.jstl</artifactId>
150-
<version>1.2.0.v201105211821</version>
151-
<scope>test</scope>
152-
</dependency>
153-
154132
<dependency>
155133
<groupId>org.mockito</groupId>
156134
<artifactId>mockito-core</artifactId>
@@ -166,65 +144,6 @@
166144
<artifactId>byte-buddy-agent</artifactId>
167145
<scope>test</scope>
168146
</dependency>
169-
170-
<!--
171-
Jersey 1 dependencies only required for YARN integration testing. Creating a YARN cluster
172-
in the JVM requires starting a Jersey 1-based web application.
173-
-->
174-
<dependency>
175-
<groupId>com.sun.jersey</groupId>
176-
<artifactId>jersey-core</artifactId>
177-
<scope>test</scope>
178-
<version>${jersey-1.version}</version>
179-
</dependency>
180-
<dependency>
181-
<groupId>com.sun.jersey</groupId>
182-
<artifactId>jersey-json</artifactId>
183-
<scope>test</scope>
184-
<version>${jersey-1.version}</version>
185-
</dependency>
186-
<dependency>
187-
<groupId>com.sun.jersey</groupId>
188-
<artifactId>jersey-server</artifactId>
189-
<scope>test</scope>
190-
<version>${jersey-1.version}</version>
191-
</dependency>
192-
<dependency>
193-
<groupId>com.sun.jersey.contribs</groupId>
194-
<artifactId>jersey-guice</artifactId>
195-
<scope>test</scope>
196-
<version>${jersey-1.version}</version>
197-
</dependency>
198-
<dependency>
199-
<groupId>com.sun.jersey</groupId>
200-
<artifactId>jersey-servlet</artifactId>
201-
<scope>test</scope>
202-
<version>${jersey-1.version}</version>
203-
</dependency>
204-
205-
<!-- These dependencies are duplicated from core, because dependencies in the "provided"
206-
scope are not transitive.-->
207-
<dependency>
208-
<groupId>${hive.group}</groupId>
209-
<artifactId>hive-exec</artifactId>
210-
<classifier>${hive.classifier}</classifier>
211-
<scope>provided</scope>
212-
</dependency>
213-
<dependency>
214-
<groupId>${hive.group}</groupId>
215-
<artifactId>hive-metastore</artifactId>
216-
<scope>provided</scope>
217-
</dependency>
218-
<dependency>
219-
<groupId>org.apache.thrift</groupId>
220-
<artifactId>libthrift</artifactId>
221-
<scope>provided</scope>
222-
</dependency>
223-
<dependency>
224-
<groupId>org.apache.thrift</groupId>
225-
<artifactId>libfb303</artifactId>
226-
<scope>provided</scope>
227-
</dependency>
228147
</dependencies>
229148

230149
<build>

0 commit comments

Comments
 (0)