Skip to content

Commit 733ac4c

Browse files
authored
[release] fix classpath error for qob (#15201)
#15199 fixed running in dataproc. It broke query-on-batch, however, as the hail-all-spark.jar assembly bundled in different versions of libraries that spark provides. To fix this, I've explicitly excluded many of the libraries spark provides from the assembly. This is by no means complete because finding which library causes which dependency to get bundled is not trivial. Mill's dependence resolving isn't the best. To make the QoB classpath behave like dataproc, I've positioned the spark libraries before the hail jar on the worker's classpath. I believe this should not cause issues for previous hail versions as they've worked on dataproc where this is the case. This change has low impact on the Broad-managed hail batch deployment in GCP. ### Appsec Review - [x] Required: The impact has been assessed and approved by appsec
1 parent 5abf702 commit 733ac4c

File tree

4 files changed

+40
-20
lines changed

4 files changed

+40
-20
lines changed

batch/batch/worker/worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2545,7 +2545,7 @@ async def create_and_start(
25452545
'java',
25462546
f'-Xmx{heap_memory_mib}M',
25472547
'-cp',
2548-
f'/jvm-entryway/jvm-entryway.jar:{JVM.SPARK_HOME}/jars/*',
2548+
f'{JVM.SPARK_HOME}/jars/*:/jvm-entryway/jvm-entryway.jar',
25492549
'is.hail.JVMEntryway',
25502550
socket_file,
25512551
]

hail/build.mill

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ object Deps {
2424

2525
object Asm {
2626
val `asm-bom` = mvn"org.ow2.asm:asm-bom:9.9"
27-
val core = mvn"org.ow2.asm:asm"
27+
val asm = mvn"org.ow2.asm:asm"
2828
val `asm-analysis` = mvn"org.ow2.asm:asm-analysis"
2929
val `asm-util` = mvn"org.ow2.asm:asm-util"
3030
}
@@ -34,14 +34,14 @@ object Deps {
3434
// Before changing the breeze version review:
3535
// - https://hail.zulipchat.com/#narrow/stream/123011-Hail-Query-Dev/topic/new.20spark.20ndarray.20failures/near/41645
3636
// - https://github.com/hail-is/hail/pull/11555
37-
val core = mvn"org.scalanlp::breeze:1.1"
38-
val natives = mvn"org.scalanlp::breeze-natives:1.1".excludeOrg("org.apache.commons.math3")
37+
val breeze = mvn"org.scalanlp::breeze:1.1"
38+
val `breeze-natives` = mvn"org.scalanlp::breeze-natives:1.1"
3939
}
4040

4141
object GoogleCloud {
4242
val `libraries-bom` = mvn"com.google.cloud:libraries-bom:26.66.0"
4343
val `google-auth-library-oauth2-http` = mvn"com.google.auth:google-auth-library-oauth2-http"
44-
val `google-cloud-storage` = mvn"com.google.cloud:google-cloud-storage".excludeOrg("com.fasterxml.jackson.core")
44+
val `google-cloud-storage` = mvn"com.google.cloud:google-cloud-storage"
4545
}
4646

4747
object Spark {
@@ -65,13 +65,13 @@ object Deps {
6565
val `zstd-jni` = mvn"com.github.luben:zstd-jni:1.5.5-4"
6666
}
6767

68-
val `elasticsearch-spark` = mvn"org.elasticsearch::elasticsearch-spark-30:8.4.3".excludeOrg("org.apache.spark")
68+
val `elasticsearch-spark` = mvn"org.elasticsearch::elasticsearch-spark-30:9.2.1"
6969
val freemarker = mvn"org.freemarker:freemarker:2.3.31"
70-
val htsjdk = mvn"com.github.samtools:htsjdk:3.0.5".excludeOrg("*")
71-
val jdistlib = mvn"net.sourceforge.jdistlib:jdistlib:0.4.5".excludeOrg("*")
70+
val htsjdk = mvn"com.github.samtools:htsjdk:3.0.5"
71+
val jdistlib = mvn"net.sourceforge.jdistlib:jdistlib:0.4.5"
7272
val jna = mvn"net.java.dev.jna:jna:5.13.0"
7373
val `junixsocket-core` = mvn"com.kohlschutter.junixsocket:junixsocket-core:2.6.1"
74-
val `log4j-api-scala` = mvn"org.apache.logging.log4j::log4j-api-scala:13.1.0".excludeOrg("*")
74+
val `log4j-api-scala` = mvn"org.apache.logging.log4j::log4j-api-scala:13.1.0"
7575
val netlib = mvn"com.github.fommil.netlib:all:1.1.2"
7676
val `scala-collection-compat` = mvn"org.scala-lang.modules::scala-collection-compat:2.13.0"
7777
// provides @nowarn212 and @nowarn213
@@ -261,29 +261,48 @@ trait RootHailModule extends CrossScalaModule with HailModule { outer =>
261261
)
262262

263263
override def mvnDeps: T[Seq[Dep]] = Seq(
264-
Deps.Asm.core,
264+
Deps.Asm.asm,
265265
Deps.Asm.`asm-analysis`,
266266
Deps.Asm.`asm-util`,
267-
Deps.GoogleCloud.`google-auth-library-oauth2-http`,
268-
Deps.GoogleCloud.`google-cloud-storage`,
269-
Deps.`elasticsearch-spark`,
267+
Deps.GoogleCloud.`google-auth-library-oauth2-http`
268+
.excludeOrg(
269+
"commons-codec",
270+
"org.apache.httpcomponents",
271+
"org.slf4j",
272+
),
273+
Deps.GoogleCloud.`google-cloud-storage`
274+
.excludeOrg(
275+
"com.fasterxml.jackson.core",
276+
"commons-codec",
277+
"org.slf4j",
278+
),
279+
Deps.`elasticsearch-spark`
280+
.excludeOrg(
281+
"commons-logging",
282+
"org.apache.spark",
283+
"org.slf4j",
284+
),
270285
Deps.freemarker,
271-
Deps.htsjdk,
272-
Deps.jdistlib,
273-
Deps.`log4j-api-scala`,
286+
Deps.htsjdk.excludeOrg("*"),
287+
Deps.jdistlib.excludeOrg("*"),
288+
Deps.`log4j-api-scala`.excludeOrg("*"),
274289
Deps.jna,
275290
Deps.`scala-collection-compat`,
276291
Deps.sourcecode,
277292
)
278293

279294
override def runMvnDeps: T[Seq[Dep]] = Seq(
280-
Deps.Breeze.natives,
295+
Deps.Breeze.`breeze-natives`
296+
.excludeOrg(
297+
"org.apache.commons",
298+
"org.slf4j",
299+
),
281300
Deps.netlib,
282301
Deps.`junixsocket-core`,
283302
)
284303

285304
override def compileMvnDeps: T[Seq[Dep]] = Seq(
286-
Deps.Breeze.core,
305+
Deps.Breeze.breeze,
287306
Deps.Spark.core().excludeOrg("org.scalanlp"), // Hail has an explicit dependency on Breeze 1.1
288307
Deps.Spark.mllib().excludeOrg("org.scalanlp"), // Hail has an explicit dependency on Breeze 1.1
289308
Deps.Spark.avro,

hail/hail/src/is/hail/expr/ir/GenericLines.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ object GenericLines extends Logging {
6060
assert(!split || filePerPartition)
6161

6262
val delegate =
63-
new BoundedInputStream.Builder()
63+
BoundedInputStream
64+
.builder()
6465
.setInputStream(codec.makeInputStream(rawIS))
6566
.get()
6667

hail/python/hail/docs/change_log.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ critically depend on experimental functionality.**
5454

5555
## Version 0.2.137
5656

57-
Released 2025-11-21
57+
Released 2025-11-24
5858

5959
### New Features
6060

0 commit comments

Comments
 (0)