Skip to content

Commit 5abf702

Browse files
authored
[release] fix dataproc failures (#15199)
Fixes the following dataproc test failures in the lastest release commit: - VEP serialisation failure - Warnings from mismatched spark version - Java/Scala library version conflicts - Logging configuration errors - pyarrow abi conflicts in dataproc 2.2.x - NullPointerException from premature hadoop.fs.closeall call Major changes: 1. dataproc 2.3.17-debian12 pyarrow had api conflicts with numpy2 in dataproc 2.2.x. dataproc 2.3.x fixes this and also provides many of the python libraries we install in `init_notebook.py`. 1. build.mill Spark provides many of our runtime dependencies and places its versions of libraries ahead of `hail-all-spark.jar` on the classpath. Unless we relocate these libraries, we need explicit compile-only dependences on the libraries that we use directly but spark ships. Turns out many of spark 3.5.2's 3p libraries are the ones we need so don't have to relocate as many! 1. VEP To reduce what we need to serialise and work around VEP serialisation failures brought about by the log4j-api-scala introduction, VEP implementation is now anonymous. There are no other functional changes therein. This change has low security impact on the Broad-managed hail batch service in GCP, with changes limited to the version of spark and java logging library used for query-on-batch.
1 parent d7351b3 commit 5abf702

File tree

18 files changed

+323
-390
lines changed

18 files changed

+323
-390
lines changed

batch/Dockerfile.worker

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ RUN hail-pip-install \
4646
-r hailtop-requirements.txt \
4747
-r gear-requirements.txt \
4848
-r batch-requirements.txt \
49-
pyspark==3.5.0
49+
pyspark==3.5.3
5050

5151
ENV SPARK_HOME=$VIRTUAL_ENV/lib/python$UV_PYTHON/site-packages/pyspark
5252
ENV PATH="$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin"

batch/jvm-entryway/build.mill

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ object `package` extends ScalafixModule with ScalafmtModule {
2020

2121
override def bomMvnDeps: T[Seq[Dep]] =
2222
Seq(
23-
mvn"org.apache.logging.log4j:log4j-bom:2.25.2",
23+
mvn"org.apache.logging.log4j:log4j-bom:2.22.0",
2424
)
2525

2626
override def scalafixIvyDeps: T[Seq[Dep]] =

hail/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ HAIL_PIP_VERSION := $(HAIL_MAJOR_MINOR_VERSION).$(HAIL_PATCH_VERSION)
2020
HAIL_VERSION := $(HAIL_PIP_VERSION)-$(SHORT_REVISION)
2121

2222
SCALA_VERSION ?= 2.12
23-
SPARK_VERSION ?= 3.5.0
23+
SPARK_VERSION ?= 3.5.3
2424

2525
include env_var.mk
2626

hail/build.mill

Lines changed: 91 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
package build
55

66
import com.goyeau.mill.scalafix.ScalafixModule
7-
import org.typelevel.scalacoptions.*
87
import mill.*
98
import mill.api.{BuildCtx, Result}
109
import mill.scalalib.*
1110
import mill.scalalib.Assembly.*
1211
import mill.scalalib.TestModule.TestNg
1312
import mill.scalalib.scalafmt.ScalafmtModule
1413
import mill.util.{Jvm, VcsVersion}
14+
import org.typelevel.scalacoptions.*
1515
import upickle.default.ReadWriter
1616

1717
object Settings {
@@ -21,16 +21,12 @@ object Settings {
2121
}
2222

2323
object Deps {
24-
object HTTPComponents {
25-
val core = mvn"org.apache.httpcomponents:httpcore:4.4.14"
26-
val client = mvn"org.apache.httpcomponents:httpclient:4.5.13"
27-
}
2824

2925
object Asm {
30-
val bom = mvn"org.ow2.asm:asm-bom:9.9"
26+
val `asm-bom` = mvn"org.ow2.asm:asm-bom:9.9"
3127
val core = mvn"org.ow2.asm:asm"
32-
val analysis = mvn"org.ow2.asm:asm-analysis"
33-
val util = mvn"org.ow2.asm:asm-util"
28+
val `asm-analysis` = mvn"org.ow2.asm:asm-analysis"
29+
val `asm-util` = mvn"org.ow2.asm:asm-util"
3430
}
3531

3632
object Breeze {
@@ -39,51 +35,51 @@ object Deps {
3935
// - https://hail.zulipchat.com/#narrow/stream/123011-Hail-Query-Dev/topic/new.20spark.20ndarray.20failures/near/41645
4036
// - https://github.com/hail-is/hail/pull/11555
4137
val core = mvn"org.scalanlp::breeze:1.1"
42-
val natives = mvn"org.scalanlp::breeze-natives:1.1"
38+
val natives = mvn"org.scalanlp::breeze-natives:1.1".excludeOrg("org.apache.commons.math3")
4339
}
4440

45-
object Commons {
46-
val io = mvn"commons-io:commons-io:2.20.0"
47-
val lang3 = mvn"org.apache.commons:commons-lang3:3.18.0"
48-
val codec = mvn"commons-codec:commons-codec:1.19.0"
41+
object GoogleCloud {
42+
val `libraries-bom` = mvn"com.google.cloud:libraries-bom:26.66.0"
43+
val `google-auth-library-oauth2-http` = mvn"com.google.auth:google-auth-library-oauth2-http"
44+
val `google-cloud-storage` = mvn"com.google.cloud:google-cloud-storage".excludeOrg("com.fasterxml.jackson.core")
4945
}
5046

5147
object Spark {
5248
def core: Task[Dep] = Task.Anon(mvn"org.apache.spark::spark-core:${Env.sparkVersion()}")
5349
def mllib: Task[Dep] = Task.Anon(mvn"org.apache.spark::spark-mllib:${Env.sparkVersion()}")
54-
}
55-
56-
object Log4j {
57-
// have to forceVersion for reasons unknown?
58-
val bom = mvn"org.apache.logging.log4j:log4j-bom:2.25.2".forceVersion()
59-
val api = mvn"org.apache.logging.log4j:log4j-api"
60-
val core = mvn"org.apache.logging.log4j:log4j-core"
61-
val slf4j = mvn"org.apache.logging.log4j:log4j-slf4j2-impl"
6250

63-
val scala = mvn"org.apache.logging.log4j::log4j-api-scala:13.1.0"
51+
// direct dependencies provided by Spark at runtime
52+
val avro = mvn"org.apache.avro:avro:1.11.2"
53+
val `commons-codec` = mvn"commons-codec:commons-codec:1.16.1"
54+
val `commons-io` = mvn"commons-io:commons-io:2.16.1"
55+
val `commons-lang3` = mvn"org.apache.commons:commons-lang3:3.12.0"
56+
val `commons-math3` = mvn"org.apache.commons:commons-math3:3.6.1"
57+
val `hadoop-client-api` = mvn"org.apache.hadoop:hadoop-client-api:3.3.4"
58+
val httpclient = mvn"org.apache.httpcomponents:httpclient:4.5.14"
59+
val `jackson-core` = mvn"com.fasterxml.jackson.core:jackson-core:2.15.2"
60+
val `json4s-jackson` = mvn"org.json4s::json4s-jackson:3.7.0-M11"
61+
val `l4z-java` = mvn"org.lz4:lz4-java:1.8.0"
62+
val `log4j-bom` = mvn"org.apache.logging.log4j:log4j-bom:2.22.0"
63+
val `logj4-api` = mvn"org.apache.logging.log4j:log4j-api"
64+
val `log4j-core` = mvn"org.apache.logging.log4j:log4j-core"
65+
val `zstd-jni` = mvn"com.github.luben:zstd-jni:1.5.5-4"
6466
}
6567

66-
val samtools = mvn"com.github.samtools:htsjdk:3.0.5"
67-
val jdistlib = mvn"net.sourceforge.jdistlib:jdistlib:0.4.5"
68+
val `elasticsearch-spark` = mvn"org.elasticsearch::elasticsearch-spark-30:8.4.3".excludeOrg("org.apache.spark")
6869
val freemarker = mvn"org.freemarker:freemarker:2.3.31"
69-
val elasticsearch = mvn"org.elasticsearch::elasticsearch-spark-30:8.4.3"
70-
val gcloud = mvn"com.google.cloud:google-cloud-storage:2.55.0"
70+
val htsjdk = mvn"com.github.samtools:htsjdk:3.0.5".excludeOrg("*")
71+
val jdistlib = mvn"net.sourceforge.jdistlib:jdistlib:0.4.5".excludeOrg("*")
7172
val jna = mvn"net.java.dev.jna:jna:5.13.0"
72-
val json4s = mvn"org.json4s::json4s-jackson:3.7.0-M11"
73-
val zstd = mvn"com.github.luben:zstd-jni:1.5.5-11"
74-
val lz4 = mvn"org.lz4:lz4-java:1.8.0"
73+
val `junixsocket-core` = mvn"com.kohlschutter.junixsocket:junixsocket-core:2.6.1"
74+
val `log4j-api-scala` = mvn"org.apache.logging.log4j::log4j-api-scala:13.1.0".excludeOrg("*")
7575
val netlib = mvn"com.github.fommil.netlib:all:1.1.2"
76-
val avro = mvn"org.apache.avro:avro:1.11.2"
77-
val junixsocket = mvn"com.kohlschutter.junixsocket:junixsocket-core:2.6.1"
78-
val hadoopClient = mvn"org.apache.hadoop:hadoop-client:3.4.2"
79-
val jackson = mvn"com.fasterxml.jackson.core:jackson-core:2.15.2"
80-
val sourcecode = mvn"com.lihaoyi::sourcecode:0.4.2"
81-
val collection_compat = mvn"org.scala-lang.modules::scala-collection-compat:2.13.0"
76+
val `scala-collection-compat` = mvn"org.scala-lang.modules::scala-collection-compat:2.13.0"
8277
// provides @nowarn212 and @nowarn213
83-
val scalac_compat = mvn"org.typelevel::scalac-compat-annotation:0.1.4"
78+
val `scalac-compat-annotation` = mvn"org.typelevel::scalac-compat-annotation:0.1.4"
79+
val sourcecode = mvn"com.lihaoyi::sourcecode:0.4.2"
8480

8581
object Plugins {
86-
val betterModadicFor = mvn"com.olegpy::better-monadic-for:0.3.1"
82+
val `better-monadic-for` = mvn"com.olegpy::better-monadic-for:0.3.1"
8783
}
8884
}
8985

@@ -97,7 +93,7 @@ object Env extends Module {
9793
}
9894

9995
def sparkVersion: T[String] = Task.Input {
100-
Result.Success(Task.env.getOrElse("SPARK_VERSION", "3.5.0"))
96+
Result.Success(Task.env.getOrElse("SPARK_VERSION", "3.5.3"))
10197
}
10298

10399
val buildMode: BuildMode = BuildMode.valueOf(millbuild.BuildConfig.buildMode)
@@ -113,9 +109,10 @@ trait HailModule extends ScalaModule with ScalafmtModule with ScalafixModule { o
113109
mvn"io.github.tanin47::scalafix-forbidden-symbol:1.0.0",
114110
)
115111

116-
override def depManagement: T[Seq[Dep]] = Task {
117-
Seq(Deps.collection_compat)
118-
}
112+
override def depManagement: T[Seq[Dep]] =
113+
Seq(
114+
Deps.`scala-collection-compat`,
115+
)
119116

120117
override def javacOptions: T[Seq[String]] = Seq(
121118
"-Xlint:all",
@@ -215,24 +212,6 @@ trait HailModule extends ScalaModule with ScalafmtModule with ScalafixModule { o
215212
finalOptions :+ "-Wconf:msg=legacy-binding:s"
216213
}
217214
}
218-
219-
trait HailTests extends ScalaTests with TestNg with ScalafmtModule with ScalafixModule {
220-
override def forkArgs: T[Seq[String]] = Seq("-Xss4m", "-Xmx4096M")
221-
222-
override def scalafixIvyDeps: T[Seq[Dep]] = Seq(
223-
mvn"io.github.tanin47::scalafix-forbidden-symbol:1.0.0",
224-
)
225-
226-
override def mvnDeps: T[Seq[Dep]] =
227-
outer.compileMvnDeps() ++ Seq(
228-
mvn"org.scalacheck::scalacheck:1.18.1",
229-
mvn"org.scalatest::scalatest:3.2.19",
230-
mvn"org.scalatest::scalatest-shouldmatchers:3.2.19",
231-
mvn"org.scalatestplus::scalacheck-1-18:3.2.19.0",
232-
mvn"org.scalatestplus::testng-7-10:3.2.19.0",
233-
mvn"org.mockito::mockito-scala:1.17.31",
234-
)
235-
}
236215
}
237216

238217
object hail extends Cross[RootHailModule](millbuild.BuildConfig.enabledScalaVersions)
@@ -276,70 +255,64 @@ trait RootHailModule extends CrossScalaModule with HailModule { outer =>
276255
}
277256

278257
override def bomMvnDeps: T[Seq[Dep]] = Seq(
279-
Deps.Asm.bom,
280-
Deps.Log4j.bom,
258+
Deps.Asm.`asm-bom`,
259+
Deps.GoogleCloud.`libraries-bom`,
260+
Deps.Spark.`log4j-bom`,
281261
)
282262

283263
override def mvnDeps: T[Seq[Dep]] = Seq(
284-
Deps.HTTPComponents.core,
285-
Deps.HTTPComponents.client,
286264
Deps.Asm.core,
287-
Deps.Asm.analysis,
288-
Deps.Asm.util,
289-
Deps.Log4j.core,
290-
Deps.Log4j.scala,
291-
Deps.samtools.excludeOrg("*"),
292-
Deps.jdistlib.excludeOrg("*"),
265+
Deps.Asm.`asm-analysis`,
266+
Deps.Asm.`asm-util`,
267+
Deps.GoogleCloud.`google-auth-library-oauth2-http`,
268+
Deps.GoogleCloud.`google-cloud-storage`,
269+
Deps.`elasticsearch-spark`,
293270
Deps.freemarker,
294-
Deps.elasticsearch.excludeOrg("org.apache.spark"),
295-
Deps.gcloud.excludeOrg("com.fasterxml.jackson.core"),
271+
Deps.htsjdk,
272+
Deps.jdistlib,
273+
Deps.`log4j-api-scala`,
296274
Deps.jna,
297-
Deps.json4s.excludeOrg("com.fasterxml.jackson.core"),
298-
Deps.zstd,
275+
Deps.`scala-collection-compat`,
299276
Deps.sourcecode,
300-
Deps.collection_compat,
301277
)
302278

303279
override def runMvnDeps: T[Seq[Dep]] = Seq(
304-
Deps.Breeze.natives.excludeOrg("org.apache.commons.math3"),
305-
Deps.Commons.io,
306-
Deps.Commons.lang3,
307-
Deps.Commons.codec,
308-
Deps.lz4,
280+
Deps.Breeze.natives,
309281
Deps.netlib,
310-
Deps.avro.excludeOrg("com.fasterxml.jackson.core"),
311-
Deps.junixsocket,
312-
Deps.Log4j.slf4j,
282+
Deps.`junixsocket-core`,
313283
)
314284

315285
override def compileMvnDeps: T[Seq[Dep]] = Seq(
316-
Deps.Log4j.api,
317-
Deps.hadoopClient,
286+
Deps.Breeze.core,
318287
Deps.Spark.core().excludeOrg("org.scalanlp"), // Hail has an explicit dependency on Breeze 1.1
319288
Deps.Spark.mllib().excludeOrg("org.scalanlp"), // Hail has an explicit dependency on Breeze 1.1
320-
Deps.Breeze.core,
321-
Deps.jackson,
322-
Deps.scalac_compat,
289+
Deps.Spark.avro,
290+
Deps.Spark.`commons-codec`,
291+
Deps.Spark.`commons-io`,
292+
Deps.Spark.`commons-lang3`,
293+
Deps.Spark.`commons-math3`,
294+
Deps.Spark.httpclient,
295+
Deps.Spark.`hadoop-client-api`,
296+
Deps.Spark.`jackson-core`,
297+
Deps.Spark.`json4s-jackson`,
298+
Deps.Spark.`l4z-java`,
299+
Deps.Spark.`logj4-api`,
300+
Deps.Spark.`log4j-core`,
301+
Deps.Spark.`zstd-jni`,
302+
Deps.`scalac-compat-annotation`,
323303
)
324304

325305
override def assemblyRules: Seq[Rule] = super.assemblyRules ++ Seq(
326306
Rule.Exclude("META-INF/INDEX.LIST"),
327307
Rule.ExcludePattern("^scala/(?!collection/compat).*"),
328308
Rule.AppendPattern("META-INF/services/.*", "\n"),
329309
Rule.Relocate("breeze.**", "is.hail.relocated.@0"),
330-
Rule.Relocate("com.google.cloud.**", "is.hail.relocated.@0"),
331-
Rule.Relocate("com.google.common.**", "is.hail.relocated.@0"),
332-
Rule.Relocate("org.apache.commons.io.**", "is.hail.relocated.@0"),
333-
Rule.Relocate("org.apache.commons.lang3.**", "is.hail.relocated.@0"),
334-
Rule.Relocate("org.apache.http.**", "is.hail.relocated.@0"),
335-
Rule.Relocate("org.elasticsearch.**", "is.hail.relocated.@0"),
336-
Rule.Relocate("org.json4s.**", "is.hail.relocated.@0"),
337-
Rule.Relocate("org.objectweb.**", "is.hail.relocated.@0"),
310+
Rule.Relocate("com.google.**", "is.hail.relocated.@0"),
338311
Rule.Relocate("scala.collection.compat.**", "is.hail.relocated.@0"),
339312
)
340313

341314
override def scalacPluginMvnDeps: T[Seq[Dep]] = Seq(
342-
Deps.Plugins.betterModadicFor
315+
Deps.Plugins.`better-monadic-for`,
343316
)
344317

345318
def writeRunClasspath: T[PathRef] = Task {
@@ -383,18 +356,32 @@ trait RootHailModule extends CrossScalaModule with HailModule { outer =>
383356
}
384357
}
385358

386-
object test extends HailTests {
387-
override def assemblyRules: Seq[Rule] = outer.assemblyRules ++ Seq(
388-
Rule.Relocate("org.codehaus.jackson.**", "is.hail.relocated.@0")
389-
)
359+
object test extends ScalaTests with TestNg with ScalafmtModule with ScalafixModule {
360+
override def forkArgs: T[Seq[String]] =
361+
Seq("-Xss4m", "-Xmx4096M")
362+
363+
override def scalafixIvyDeps: T[Seq[Dep]] =
364+
outer.scalafixIvyDeps()
365+
366+
override def compileMvnDeps: T[Seq[Dep]] =
367+
outer.compileMvnDeps()
390368

391369
override def mvnDeps: T[Seq[Dep]] =
392-
super.mvnDeps() ++ outer.mvnDeps() ++ Seq(
393-
Deps.jackson,
370+
outer.mvnDeps() ++ Seq(
371+
mvn"com.google.inject:guice:5.1.0",
372+
mvn"org.scalacheck::scalacheck:1.18.1",
373+
mvn"org.scalatest::scalatest:3.2.19",
374+
mvn"org.scalatest::scalatest-shouldmatchers:3.2.19",
375+
mvn"org.scalatestplus::scalacheck-1-18:3.2.19.0",
376+
mvn"org.scalatestplus::testng-7-10:3.2.19.0",
377+
mvn"org.mockito::mockito-scala:1.17.31",
394378
)
395379

396-
override def compileModuleDeps: Seq[JavaModule] =
397-
super.compileModuleDeps ++ outer.compileModuleDeps
380+
override def runMvnDeps: T[Seq[Dep]] =
381+
outer.runMvnDeps()
382+
383+
override def assemblyRules: Seq[Rule] =
384+
outer.assemblyRules
398385
}
399386

400387
object shadedazure extends JavaModule {

hail/hail/src/is/hail/backend/driver/Py4JQueryDriver.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import java.io.Closeable
2828
import java.net.InetSocketAddress
2929
import java.util
3030

31-
import com.google.api.client.http.HttpStatusCodes
3231
import com.sun.net.httpserver.{HttpExchange, HttpServer}
3332
import org.apache.hadoop.conf.Configuration
3433
import org.apache.spark.sql.DataFrame
@@ -412,12 +411,12 @@ final class Py4JQueryDriver(backend: Backend) extends Closeable with Logging {
412411
}
413412

414413
override def result(req: HttpExchange, result: Array[Byte]): Unit =
415-
respond(req, HttpStatusCodes.STATUS_CODE_OK, result)
414+
respond(req, 200, result)
416415

417416
override def failure(req: HttpExchange, t: Throwable): Unit =
418417
respond(
419418
req,
420-
HttpStatusCodes.STATUS_CODE_SERVER_ERROR,
419+
500,
421420
jsonToBytes {
422421
val (shortMessage, expandedMessage, errorId) = handleForPython(t)
423422
JObject(

hail/hail/src/is/hail/backend/service/Worker.scala

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ import java.nio.charset._
1717
import java.nio.file.Path
1818
import java.util
1919
import java.util.concurrent.Executors
20-
21-
import com.google.common.util.concurrent.ThreadFactoryBuilder
20+
import java.util.concurrent.atomic.AtomicInteger
2221

2322
class ServiceTaskContext(val partitionId: Int) extends HailTaskContext {
2423
override def stageId(): Int = 0
@@ -193,12 +192,15 @@ object Worker extends Logging {
193192

194193
implicit val ec: ExecutionContext =
195194
ExecutionContext.fromExecutor(
196-
Executors.newCachedThreadPool(
197-
new ThreadFactoryBuilder()
198-
.setDaemon(true)
199-
.setNameFormat("hail-worker-thread-%d")
200-
.build()
201-
)
195+
Executors.newCachedThreadPool {
196+
val threadFactory = Executors.defaultThreadFactory()
197+
val counter = new AtomicInteger(0)
198+
task =>
199+
val thread = threadFactory.newThread(task)
200+
thread.setName(f"hail-worker-thread-${counter.getAndIncrement()}")
201+
thread.setDaemon(true)
202+
thread
203+
}
202204
)
203205

204206
def open(x: String): SeekableDataInputStream =

0 commit comments

Comments
 (0)