Skip to content

Commit 1248e31

Browse files
robert3005Robert Kruszewski
authored andcommitted
Upgrade parquet to 1.12.0-palantir.1 (apache-spark-on-k8s#464)
1 parent 16b2b27 commit 1248e31

File tree

6 files changed

+23
-21
lines changed

6 files changed

+23
-21
lines changed

dev/deps/spark-deps-hadoop-palantir

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ arpack_combined_all-0.1.jar
1313
arrow-format-0.10.0.jar
1414
arrow-memory-0.10.0.jar
1515
arrow-vector-0.10.0.jar
16+
audience-annotations-0.7.0.jar
1617
automaton-1.11-8.jar
1718
avro-1.8.2.jar
1819
avro-ipc-1.8.2.jar
@@ -170,12 +171,12 @@ orc-shims-1.5.3.jar
170171
oro-2.0.8.jar
171172
osgi-resource-locator-1.0.1.jar
172173
paranamer-2.8.jar
173-
parquet-column-1.10.1-palantir.3.jar
174-
parquet-common-1.10.1-palantir.3.jar
175-
parquet-encoding-1.10.1-palantir.3.jar
176-
parquet-format-2.4.0.jar
177-
parquet-hadoop-1.10.1-palantir.3.jar
178-
parquet-jackson-1.10.1-palantir.3.jar
174+
parquet-column-1.12.0-palantir.1.jar
175+
parquet-common-1.12.0-palantir.1.jar
176+
parquet-encoding-1.12.0-palantir.1.jar
177+
parquet-format-structures-1.12.0-palantir.1.jar
178+
parquet-hadoop-1.12.0-palantir.1.jar
179+
parquet-jackson-1.12.0-palantir.1.jar
179180
protobuf-java-2.5.0.jar
180181
py4j-0.10.8.1.jar
181182
pyrolite-4.13.jar

pom.xml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,11 @@
130130
<!-- Version used for internal directory structure -->
131131
<hive.version.short>1.2.1</hive.version.short>
132132
<derby.version>10.12.1.1</derby.version>
133-
<parquet.version>1.10.1-palantir.3</parquet.version>
134-
<okhttp3.version>3.11.0</okhttp3.version>
135-
<okhttp.version>2.7.5</okhttp.version>
136-
<okio.version>1.15.0</okio.version>
133+
<parquet.version>1.12.0-palantir.1</parquet.version>
134+
<okhttp3.version>3.11.0</okhttp3.version>
135+
<okhttp.version>2.7.5</okhttp.version>
136+
<okio.version>1.15.0</okio.version>
137+
<jetty.version>9.4.12.v20180830</jetty.version>
137138
<orc.version>1.5.3</orc.version>
138139
<orc.classifier>nohive</orc.classifier>
139140
<hive.parquet.version>1.6.0</hive.parquet.version>

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,8 +618,8 @@ private[parquet] class ParquetFilters(
618618
}
619619

620620
override def keep(value: Binary): Boolean = {
621-
UTF8String.fromBytes(value.getBytes).startsWith(
622-
UTF8String.fromBytes(strToBinary.getBytes))
621+
value != null && UTF8String.fromBytes(value.getBytes)
622+
.startsWith(UTF8String.fromBytes(strToBinary.getBytes))
623623
}
624624
}
625625
)

sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ Partition Values [ds=2017-08-01, hr=10]
9393
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
9494
Created Time [not included in comparison]
9595
Last Access [not included in comparison]
96-
Partition Statistics 1195 bytes, 3 rows
96+
Partition Statistics 1264 bytes, 3 rows
9797

9898
# Storage Information
9999
Location [not included in comparison]sql/core/spark-warehouse/t
@@ -128,7 +128,7 @@ Partition Values [ds=2017-08-01, hr=10]
128128
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
129129
Created Time [not included in comparison]
130130
Last Access [not included in comparison]
131-
Partition Statistics 1195 bytes, 3 rows
131+
Partition Statistics 1264 bytes, 3 rows
132132

133133
# Storage Information
134134
Location [not included in comparison]sql/core/spark-warehouse/t
@@ -155,7 +155,7 @@ Partition Values [ds=2017-08-01, hr=11]
155155
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11
156156
Created Time [not included in comparison]
157157
Last Access [not included in comparison]
158-
Partition Statistics 1208 bytes, 4 rows
158+
Partition Statistics 1278 bytes, 4 rows
159159

160160
# Storage Information
161161
Location [not included in comparison]sql/core/spark-warehouse/t
@@ -190,7 +190,7 @@ Partition Values [ds=2017-08-01, hr=10]
190190
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10
191191
Created Time [not included in comparison]
192192
Last Access [not included in comparison]
193-
Partition Statistics 1195 bytes, 3 rows
193+
Partition Statistics 1264 bytes, 3 rows
194194

195195
# Storage Information
196196
Location [not included in comparison]sql/core/spark-warehouse/t
@@ -217,7 +217,7 @@ Partition Values [ds=2017-08-01, hr=11]
217217
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11
218218
Created Time [not included in comparison]
219219
Last Access [not included in comparison]
220-
Partition Statistics 1208 bytes, 4 rows
220+
Partition Statistics 1278 bytes, 4 rows
221221

222222
# Storage Information
223223
Location [not included in comparison]sql/core/spark-warehouse/t
@@ -244,7 +244,7 @@ Partition Values [ds=2017-09-01, hr=5]
244244
Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-09-01/hr=5
245245
Created Time [not included in comparison]
246246
Last Access [not included in comparison]
247-
Partition Statistics 1182 bytes, 2 rows
247+
Partition Statistics 1250 bytes, 2 rows
248248

249249
# Storage Information
250250
Location [not included in comparison]sql/core/spark-warehouse/t

sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
506506
case plan: InMemoryRelation => plan
507507
}.head
508508
// InMemoryRelation's stats is file size before the underlying RDD is materialized
509-
assert(inMemoryRelation.computeStats().sizeInBytes === 822)
509+
assert(inMemoryRelation.computeStats().sizeInBytes === 848)
510510

511511
// InMemoryRelation's stats is updated after materializing RDD
512512
dfFromFile.collect()
@@ -519,7 +519,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
519519

520520
// Even CBO enabled, InMemoryRelation's stats keeps as the file size before table's stats
521521
// is calculated
522-
assert(inMemoryRelation2.computeStats().sizeInBytes === 822)
522+
assert(inMemoryRelation2.computeStats().sizeInBytes === 848)
523523

524524
// InMemoryRelation's stats should be updated after calculating stats of the table
525525
// clear cache to simulate a fresh environment

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {
4545
import testImplicits._
4646
Seq(1.0, 0.5).foreach { compressionFactor =>
4747
withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString,
48-
"spark.sql.autoBroadcastJoinThreshold" -> "411") {
48+
"spark.sql.autoBroadcastJoinThreshold" -> "424") {
4949
withTempPath { workDir =>
5050
// the file size is 740 bytes
5151
val workDirPath = workDir.getAbsolutePath

0 commit comments

Comments
 (0)