Skip to content

Commit 3e03303

Browse files
wangyumdongjoon-hyun
authored andcommitted
[SPARK-25258][SPARK-23131][SPARK-25176][BUILD] Upgrade Kryo to 4.0.2
## What changes were proposed in this pull request? Upgrade chill to 0.9.3, Kryo to 4.0.2, to get bug fixes and improvements. The resolved tickets includes: - SPARK-25258 Upgrade kryo package to version 4.0.2 - SPARK-23131 Kryo raises StackOverflow during serializing GLR model - SPARK-25176 Kryo fails to serialize a parametrised type hierarchy More details: https://github.com/twitter/chill/releases/tag/v0.9.3 twitter/chill@cc3910d ## How was this patch tested? Existing tests. Closes apache#22179 from wangyum/SPARK-23131. Lead-authored-by: Yuming Wang <[email protected]> Co-authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent 458468a commit 3e03303

File tree

7 files changed

+48
-15
lines changed

7 files changed

+48
-15
lines changed

core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,26 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
412412
assert(!ser2.getAutoReset)
413413
}
414414

415+
test("SPARK-25176 ClassCastException when writing a Map after previously " +
416+
"reading a Map with different generic type") {
417+
// This test uses the example in https://github.com/EsotericSoftware/kryo/issues/384
418+
import java.util._
419+
val ser = new KryoSerializer(new SparkConf).newInstance().asInstanceOf[KryoSerializerInstance]
420+
421+
class MapHolder {
422+
private val mapOne = new HashMap[Int, String]
423+
private val mapTwo = this.mapOne
424+
}
425+
426+
val serializedMapHolder = ser.serialize(new MapHolder)
427+
ser.deserialize[MapHolder](serializedMapHolder)
428+
429+
val stringMap = new HashMap[Int, List[String]]
430+
stringMap.put(1, new ArrayList[String])
431+
val serializedMap = ser.serialize[Map[Int, List[String]]](stringMap)
432+
ser.deserialize[HashMap[Int, List[String]]](serializedMap)
433+
}
434+
415435
private def testSerializerInstanceReuse(autoReset: Boolean, referenceTracking: Boolean): Unit = {
416436
val conf = new SparkConf(loadDefaults = false)
417437
.set("spark.kryo.referenceTracking", referenceTracking.toString)

dev/deps/spark-deps-hadoop-2.6

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
2727
calcite-avatica-1.2.0-incubating.jar
2828
calcite-core-1.2.0-incubating.jar
2929
calcite-linq4j-1.2.0-incubating.jar
30-
chill-java-0.8.4.jar
31-
chill_2.11-0.8.4.jar
30+
chill-java-0.9.3.jar
31+
chill_2.11-0.9.3.jar
3232
commons-beanutils-1.7.0.jar
3333
commons-beanutils-core-1.8.0.jar
3434
commons-cli-1.2.jar
@@ -130,7 +130,7 @@ jsr305-1.3.9.jar
130130
jta-1.1.jar
131131
jtransforms-2.4.0.jar
132132
jul-to-slf4j-1.7.16.jar
133-
kryo-shaded-3.0.3.jar
133+
kryo-shaded-4.0.2.jar
134134
kubernetes-client-3.0.0.jar
135135
kubernetes-model-2.0.0.jar
136136
leveldbjni-all-1.8.jar
@@ -149,7 +149,7 @@ metrics-jvm-3.1.5.jar
149149
minlog-1.3.0.jar
150150
netty-3.9.9.Final.jar
151151
netty-all-4.1.17.Final.jar
152-
objenesis-2.1.jar
152+
objenesis-2.5.1.jar
153153
okhttp-3.8.1.jar
154154
okio-1.13.0.jar
155155
opencsv-2.3.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
2727
calcite-avatica-1.2.0-incubating.jar
2828
calcite-core-1.2.0-incubating.jar
2929
calcite-linq4j-1.2.0-incubating.jar
30-
chill-java-0.8.4.jar
31-
chill_2.11-0.8.4.jar
30+
chill-java-0.9.3.jar
31+
chill_2.11-0.9.3.jar
3232
commons-beanutils-1.7.0.jar
3333
commons-beanutils-core-1.8.0.jar
3434
commons-cli-1.2.jar
@@ -132,7 +132,7 @@ jsr305-1.3.9.jar
132132
jta-1.1.jar
133133
jtransforms-2.4.0.jar
134134
jul-to-slf4j-1.7.16.jar
135-
kryo-shaded-3.0.3.jar
135+
kryo-shaded-4.0.2.jar
136136
kubernetes-client-3.0.0.jar
137137
kubernetes-model-2.0.0.jar
138138
leveldbjni-all-1.8.jar
@@ -151,7 +151,7 @@ metrics-jvm-3.1.5.jar
151151
minlog-1.3.0.jar
152152
netty-3.9.9.Final.jar
153153
netty-all-4.1.17.Final.jar
154-
objenesis-2.1.jar
154+
objenesis-2.5.1.jar
155155
okhttp-3.8.1.jar
156156
okio-1.13.0.jar
157157
opencsv-2.3.jar

dev/deps/spark-deps-hadoop-3.1

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ breeze_2.11-0.13.2.jar
2525
calcite-avatica-1.2.0-incubating.jar
2626
calcite-core-1.2.0-incubating.jar
2727
calcite-linq4j-1.2.0-incubating.jar
28-
chill-java-0.8.4.jar
29-
chill_2.11-0.8.4.jar
28+
chill-java-0.9.3.jar
29+
chill_2.11-0.9.3.jar
3030
commons-beanutils-1.9.3.jar
3131
commons-cli-1.2.jar
3232
commons-codec-1.10.jar
@@ -146,7 +146,7 @@ kerby-config-1.0.1.jar
146146
kerby-pkix-1.0.1.jar
147147
kerby-util-1.0.1.jar
148148
kerby-xdr-1.0.1.jar
149-
kryo-shaded-3.0.3.jar
149+
kryo-shaded-4.0.2.jar
150150
kubernetes-client-3.0.0.jar
151151
kubernetes-model-2.0.0.jar
152152
leveldbjni-all-1.8.jar
@@ -167,7 +167,7 @@ mssql-jdbc-6.2.1.jre7.jar
167167
netty-3.9.9.Final.jar
168168
netty-all-4.1.17.Final.jar
169169
nimbus-jose-jwt-4.41.1.jar
170-
objenesis-2.1.jar
170+
objenesis-2.5.1.jar
171171
okhttp-2.7.5.jar
172172
okhttp-3.8.1.jar
173173
okio-1.13.0.jar

docs/tuning.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ in your operations) and performance. It provides two serialization libraries:
3535
Java serialization is flexible but often quite slow, and leads to large
3636
serialized formats for many classes.
3737
* [Kryo serialization](https://github.com/EsotericSoftware/kryo): Spark can also use
38-
the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly
38+
the Kryo library (version 4) to serialize objects more quickly. Kryo is significantly
3939
faster and more compact than Java serialization (often as much as 10x), but does not support all
4040
`Serializable` types and requires you to *register* the classes you'll use in the program in advance
4141
for best performance.

mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
1919

2020
import scala.util.Random
2121

22-
import org.apache.spark.SparkFunSuite
22+
import org.apache.spark.{SparkConf, SparkFunSuite}
2323
import org.apache.spark.ml.classification.LogisticRegressionSuite._
2424
import org.apache.spark.ml.feature.{Instance, OffsetInstance}
2525
import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -29,6 +29,7 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
2929
import org.apache.spark.ml.util.TestingUtils._
3030
import org.apache.spark.mllib.random._
3131
import org.apache.spark.mllib.util.MLlibTestSparkContext
32+
import org.apache.spark.serializer.KryoSerializer
3233
import org.apache.spark.sql.{DataFrame, Row}
3334
import org.apache.spark.sql.functions._
3435
import org.apache.spark.sql.types.FloatType
@@ -1687,6 +1688,14 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
16871688
assert(evalSummary.deviance === summary.deviance)
16881689
assert(evalSummary.aic === summary.aic)
16891690
}
1691+
1692+
test("SPARK-23131 Kryo raises StackOverflow during serializing GLR model") {
1693+
val conf = new SparkConf(false)
1694+
val ser = new KryoSerializer(conf).newInstance()
1695+
val trainer = new GeneralizedLinearRegression()
1696+
val model = trainer.fit(Seq(Instance(1.0, 1.0, Vectors.dense(1.0, 7.0))).toDF)
1697+
ser.serialize[GeneralizedLinearRegressionModel](model)
1698+
}
16901699
}
16911700

16921701
object GeneralizedLinearRegressionSuite {

pom.xml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@
136136
<hive.parquet.version>1.6.0</hive.parquet.version>
137137
<jetty.version>9.3.24.v20180605</jetty.version>
138138
<javaxservlet.version>3.1.0</javaxservlet.version>
139-
<chill.version>0.8.4</chill.version>
139+
<chill.version>0.9.3</chill.version>
140140
<ivy.version>2.4.0</ivy.version>
141141
<oro.version>2.0.8</oro.version>
142142
<codahale.metrics.version>3.1.5</codahale.metrics.version>
@@ -1770,6 +1770,10 @@
17701770
<groupId>org.apache.hive</groupId>
17711771
<artifactId>hive-storage-api</artifactId>
17721772
</exclusion>
1773+
<exclusion>
1774+
<groupId> com.esotericsoftware</groupId>
1775+
<artifactId>kryo-shaded</artifactId>
1776+
</exclusion>
17731777
</exclusions>
17741778
</dependency>
17751779
<dependency>

0 commit comments

Comments
 (0)