Skip to content

Commit 710c078

Browse files
authored
[LIVY-1010] Add support for Spark 3.5.6
## What changes were proposed in this pull request? Keep Spark 3 support up to date with the latest Spark 3 release, upgrade dependencies. ## How was this patch tested? Unit and integration tests.
1 parent c28f2fc commit 710c078

File tree

12 files changed

+85
-25
lines changed

12 files changed

+85
-25
lines changed

.github/workflows/integration-tests.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ jobs:
4848
-
4949
name: Set Python 3 as default for Spark 3 builds
5050
if: ${{ contains(matrix.maven_profile, 'spark3') }}
51-
run: pyenv global 3
51+
# This can be removed once support for Python 2 and Spark 2 is removed and the default python executable is python3
52+
run: pyenv global 3 && echo "PYSPARK_PYTHON=$(which python3)" >> "$GITHUB_ENV"
5253
-
5354
name: Build with Maven
5455
run: mvn -Pthriftserver ${{ matrix.maven_profile }} -DskipTests -Dmaven.javadoc.skip=true -B -V -e verify

integration-test/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@
111111
</exclusions>
112112
</dependency>
113113

114+
<dependency>
115+
<groupId>org.apache.hadoop</groupId>
116+
<artifactId>hadoop-hdfs</artifactId>
117+
</dependency>
118+
114119
<dependency>
115120
<groupId>org.apache.hadoop</groupId>
116121
<artifactId>hadoop-hdfs</artifactId>

pom.xml

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,19 +81,21 @@
8181
<asynchttpclient.version>2.10.1</asynchttpclient.version>
8282
<hadoop.scope>compile</hadoop.scope>
8383
<slf4j.version>1.7.36</slf4j.version>
84-
<reload4j.version>1.2.25</reload4j.version>
84+
<reload4j.version>1.2.26</reload4j.version>
8585
<spark.scala-2.11.version>2.4.5</spark.scala-2.11.version>
8686
<spark.scala-2.12.version>2.4.5</spark.scala-2.12.version>
8787
<spark.version>${spark.scala-2.11.version}</spark.version>
8888
<kubernetes.client.version>5.6.0</kubernetes.client.version>
8989
<hive.version>3.0.0</hive.version>
90-
<commons-codec.version>1.9</commons-codec.version>
91-
<httpclient.version>4.5.13</httpclient.version>
92-
<httpcore.version>4.4.4</httpcore.version>
90+
<commons-codec.version>1.15</commons-codec.version>
91+
<commons-lang3.version>3.17.0</commons-lang3.version>
92+
<httpclient.version>4.5.14</httpclient.version>
93+
<httpcore.version>4.4.16</httpcore.version>
9394
<jackson.version>2.12.7</jackson.version>
9495
<jackson-databind.version>2.12.7.1</jackson-databind.version>
96+
<jacoco.version>0.8.13</jacoco.version>
9597
<javax.servlet-api.version>3.1.0</javax.servlet-api.version>
96-
<jetty.version>9.4.50.v20221201</jetty.version>
98+
<jetty.version>9.4.56.v20240826</jetty.version>
9799
<junit.version>4.13.1</junit.version>
98100
<libthrift.version>0.9.3</libthrift.version>
99101
<kryo.version>4.0.2</kryo.version>
@@ -130,6 +132,8 @@
130132
<!-- Set this to "true" to skip R tests. -->
131133
<skipRTests>false</skipRTests>
132134

135+
<!-- Set this to "true" to skip PySpark2 tests. -->
136+
<skipPySpark2Tests>false</skipPySpark2Tests>
133137
<!-- Set this to "true" to skip PySpark3 tests. -->
134138
<skipPySpark3Tests>false</skipPySpark3Tests>
135139

@@ -307,6 +311,12 @@
307311
<version>${commons-codec.version}</version>
308312
</dependency>
309313

314+
<dependency>
315+
<groupId>org.apache.commons</groupId>
316+
<artifactId>commons-lang3</artifactId>
317+
<version>${commons-lang3.version}</version>
318+
</dependency>
319+
310320
<dependency>
311321
<groupId>io.dropwizard.metrics</groupId>
312322
<artifactId>metrics-core</artifactId>
@@ -412,6 +422,18 @@
412422
</exclusions>
413423
</dependency>
414424

425+
<dependency>
426+
<groupId>org.apache.hadoop</groupId>
427+
<artifactId>hadoop-hdfs</artifactId>
428+
<version>${hadoop.version}</version>
429+
<exclusions>
430+
<exclusion>
431+
<groupId>log4j</groupId>
432+
<artifactId>log4j</artifactId>
433+
</exclusion>
434+
</exclusions>
435+
</dependency>
436+
415437
<dependency>
416438
<groupId>org.apache.hadoop</groupId>
417439
<artifactId>hadoop-hdfs</artifactId>
@@ -789,7 +811,7 @@
789811
<plugin>
790812
<groupId>org.apache.maven.plugins</groupId>
791813
<artifactId>maven-shade-plugin</artifactId>
792-
<version>3.2.1</version>
814+
<version>3.5.0</version>
793815
</plugin>
794816

795817
<plugin>
@@ -810,6 +832,7 @@
810832
<spark.ui.enabled>false</spark.ui.enabled>
811833
<project.version>${project.version}</project.version>
812834
<skipRTests>${skipRTests}</skipRTests>
835+
<skipPySpark2Tests>${skipPySpark2Tests}</skipPySpark2Tests>
813836
<skipPySpark3Tests>${skipPySpark3Tests}</skipPySpark3Tests>
814837
</systemProperties>
815838
<redirectTestOutputToFile>${test.redirectToFile}</redirectTestOutputToFile>
@@ -839,6 +862,7 @@
839862
<spark.ui.enabled>false</spark.ui.enabled>
840863
<project.version>${project.version}</project.version>
841864
<skipRTests>${skipRTests}</skipRTests>
865+
<skipPySpark2Tests>${skipPySpark2Tests}</skipPySpark2Tests>
842866
<skipPySpark3Tests>${skipPySpark3Tests}</skipPySpark3Tests>
843867
</systemProperties>
844868
<stdout>D</stdout>
@@ -1081,7 +1105,7 @@
10811105
<plugin>
10821106
<groupId>org.jacoco</groupId>
10831107
<artifactId>jacoco-maven-plugin</artifactId>
1084-
<version>0.8.2</version>
1108+
<version>${jacoco.version}</version>
10851109
<executions>
10861110
<execution>
10871111
<goals>
@@ -1163,7 +1187,7 @@
11631187
<id>scala-2.12</id>
11641188
<properties>
11651189
<scala.binary.version>2.12</scala.binary.version>
1166-
<scala.version>2.12.15</scala.version>
1190+
<scala.version>2.12.18</scala.version>
11671191
</properties>
11681192
</profile>
11691193

@@ -1192,15 +1216,20 @@
11921216
<profile>
11931217
<id>spark3</id>
11941218
<properties>
1195-
<spark.version>3.2.3</spark.version>
1219+
<spark.version>3.5.6</spark.version>
1220+
<hadoop.major-minor.version>3</hadoop.major-minor.version>
1221+
<hadoop.version>3.3.4</hadoop.version>
11961222
<java.version>1.8</java.version>
11971223
<py4j.version>0.10.9.7</py4j.version>
11981224
<json4s.version>3.7.0-M11</json4s.version>
1199-
<netty.version>4.1.92.Final</netty.version>
1225+
<netty.version>4.1.96.Final</netty.version>
1226+
<jackson.version>2.15.2</jackson.version>
1227+
<jackson-databind.version>2.15.2</jackson-databind.version>
12001228
<spark.bin.name>spark-${spark.version}-bin-hadoop${hadoop.major-minor.version}</spark.bin.name>
12011229
<spark.bin.download.url>
12021230
https://archive.apache.org/dist/spark/spark-${spark.version}/${spark.bin.name}.tgz
12031231
</spark.bin.download.url>
1232+
<skipPySpark2Tests>true</skipPySpark2Tests>
12041233
</properties>
12051234
</profile>
12061235

python-api/setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
'cloudpickle>=0.2.1',
3232
'configparser>=3.5.0',
3333
'future>=0.15.2',
34-
'futures>=3.0.5',
3534
'mock~=3.0.5',
3635
'requests>=2.10.0',
3736
'responses>=0.5.1',
@@ -54,6 +53,9 @@
5453
keywords='livy pyspark development',
5554
classifiers=CLASSIFIERS,
5655
install_requires=requirements,
56+
extras_require={
57+
':python_version == "2.7"': ['futures']
58+
},
5759
setup_requires=['pytest-runner', 'flake8'],
5860
tests_require=['pytest']
5961
)

repl/src/test/scala/org/apache/livy/repl/PythonInterpreterSpec.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,11 @@ class Python2InterpreterSpec extends PythonBaseInterpreterSpec {
282282

283283
implicit val formats = DefaultFormats
284284

285+
override protected def withFixture(test: NoArgTest): Outcome = {
286+
assume(!sys.props.getOrElse("skipPySpark2Tests", "false").toBoolean, "Skipping PySpark2 tests.")
287+
test()
288+
}
289+
285290
override def createInterpreter(): Interpreter = {
286291
val sparkConf = new SparkConf()
287292
PythonInterpreter(sparkConf, new SparkEntries(sparkConf))

repl/src/test/scala/org/apache/livy/repl/PythonSessionSpec.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,13 @@ abstract class PythonSessionSpec extends BaseSessionSpec(PySpark) {
170170
}
171171
}
172172

173-
class Python2SessionSpec extends PythonSessionSpec
173+
class Python2SessionSpec extends PythonSessionSpec {
174+
175+
override protected def withFixture(test: NoArgTest): Outcome = {
176+
assume(!sys.props.getOrElse("skipPySpark2Tests", "false").toBoolean, "Skipping PySpark2 tests.")
177+
test()
178+
}
179+
}
174180

175181
class Python3SessionSpec extends PythonSessionSpec with BeforeAndAfterAll {
176182

repl/src/test/scala/org/apache/livy/repl/SQLInterpreterSpec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class SQLInterpreterSpec extends BaseInterpreterSpec {
193193
assert(resp.isInstanceOf[Interpreter.ExecuteError])
194194
val error = resp.asInstanceOf[Interpreter.ExecuteError]
195195
error.ename should be ("Error")
196-
assert(error.evalue.contains("not found"))
196+
assert(error.evalue.contains("not found") || error.evalue.contains("cannot be found"))
197197
}
198198

199199
it should "fail if submitting multiple queries" in withInterpreter { interpreter =>

server/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,11 @@
9494
<artifactId>javax.servlet-api</artifactId>
9595
</dependency>
9696

97+
<dependency>
98+
<groupId>org.apache.commons</groupId>
99+
<artifactId>commons-lang3</artifactId>
100+
</dependency>
101+
97102
<dependency>
98103
<groupId>org.apache.hadoop</groupId>
99104
<artifactId>hadoop-auth</artifactId>

server/src/main/scala/org/apache/livy/utils/LivySparkUtils.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ object LivySparkUtils extends Logging {
3030
// For each Spark version we supported, we need to add this mapping relation in case Scala
3131
// version cannot be detected from "spark-submit --version".
3232
private val _defaultSparkScalaVersion = SortedMap(
33+
// Spark 3.5 + Scala 2.12
34+
(3, 5) -> "2.12",
35+
// Spark 3.4 + Scala 2.12
36+
(3, 4) -> "2.12",
37+
// Spark 3.3 + Scala 2.12
38+
(3, 3) -> "2.12",
39+
// Spark 3.2 + Scala 2.12
40+
(3, 2) -> "2.12",
41+
// Spark 3.1 + Scala 2.12
42+
(3, 1) -> "2.12",
3343
// Spark 3.0 + Scala 2.12
3444
(3, 0) -> "2.12",
3545
// Spark 2.4 + Scala 2.11
@@ -42,7 +52,7 @@ object LivySparkUtils extends Logging {
4252

4353
// Supported Spark version
4454
private val MIN_VERSION = (2, 2)
45-
private val MAX_VERSION = (3, 1)
55+
private val MAX_VERSION = (3, 6)
4656

4757
private val sparkVersionRegex = """version (.*)""".r.unanchored
4858
private val scalaVersionRegex = """Scala version (.*), Java""".r.unanchored

server/src/main/scala/org/apache/livy/utils/SparkKubernetesApp.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import scala.util.control.NonFatal
3131
import io.fabric8.kubernetes.api.model._
3232
import io.fabric8.kubernetes.api.model.networking.v1.{Ingress, IngressBuilder}
3333
import io.fabric8.kubernetes.client.{Config, ConfigBuilder, _}
34-
import org.apache.commons.lang.StringUtils
34+
import org.apache.commons.lang3.StringUtils
3535

3636
import org.apache.livy.{LivyConf, Logging}
3737

0 commit comments

Comments
 (0)