Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.

Commit a17f473

Browse files
authored
Support Scala 2.13 / Spark 3.2; drop Scala 2.11 / Spark 2 support (#564)
1 parent c7ac4d5 commit a17f473

File tree

7 files changed

+24
-21
lines changed

7 files changed

+24
-21
lines changed

.github/workflows/test_spark_2_java_8.yml renamed to .github/workflows/test_spark_3_2_java_11.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Spark 2 / Java 8
1+
name: Spark 3.2 / Java 11 / Scala 2.13
22
on:
33
push:
44
branches: [master]
@@ -12,6 +12,6 @@ jobs:
1212
- name: Set up Java, SBT
1313
uses: olafurpg/setup-scala@v11
1414
with:
15-
java-version: 'adopt@1.8'
15+
java-version: 'adopt@1.11'
1616
- name: Build and test
17-
run: sbt -Dspark.testVersion=2.4.8 ++2.11.12 clean scalastyle test:scalastyle mimaReportBinaryIssues test
17+
run: sbt -Dspark.testVersion=3.2.0 ++2.13.5 clean test

.github/workflows/test_spark_3_java_11.yml renamed to .github/workflows/test_spark_3_java_8.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Spark 3 / Java 11
1+
name: Spark 3 / Java 8 / Scala 2.12
22
on:
33
push:
44
branches: [master]
@@ -12,8 +12,8 @@ jobs:
1212
- name: Set up Java, SBT
1313
uses: olafurpg/setup-scala@v11
1414
with:
15-
java-version: 'adopt@1.11'
15+
java-version: 'adopt@1.8'
1616
- name: Build and test
17-
run: sbt -Dspark.testVersion=3.1.2 ++2.12.10 clean scalastyle test:scalastyle mimaReportBinaryIssues coverage test coverageReport
17+
run: sbt -Dspark.testVersion=3.0.3 ++2.12.10 clean scalastyle test:scalastyle mimaReportBinaryIssues coverage test coverageReport
1818
- name: Check code coverage
19-
run: bash <(curl -s https://codecov.io/bash)
19+
run: bash <(curl -s https://codecov.io/bash)

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ The structure and test tools are mostly copied from [CSV Data Source for Spark](
77

88
- This package supports to process format-free XML files in a distributed way, unlike JSON datasource in Spark restricts in-line JSON format.
99

10-
- Compatible with Spark 2.4.x and 3.x, with Scala 2.12. Scala 2.11 support with Spark 2.4.x is deprecated.
10+
- Compatible with Spark 3.0 and later with Scala 2.12, and also Spark 3.2 and later with Scala 2.12 or 2.13. Scala 2.11 and Spark 2 support ended with version 0.13.0.
1111

1212
## Linking
1313

@@ -16,15 +16,15 @@ You can link against this library in your program at the following coordinates:
1616
```
1717
groupId: com.databricks
1818
artifactId: spark-xml_2.12
19-
version: 0.13.0
19+
version: 0.14.0
2020
```
2121

2222
## Using with Spark shell
2323

2424
This package can be added to Spark using the `--packages` command line option. For example, to include it when starting the spark shell:
2525

2626
```
27-
$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.12:0.13.0
27+
$SPARK_HOME/bin/spark-shell --packages com.databricks:spark-xml_2.12:0.14.0
2828
```
2929

3030
## Features
@@ -399,7 +399,7 @@ Automatically infer schema (data types)
399399
```R
400400
library(SparkR)
401401

402-
sparkR.session("local[4]", sparkPackages = c("com.databricks:spark-xml_2.12:0.13.0"))
402+
sparkR.session("local[4]", sparkPackages = c("com.databricks:spark-xml_2.12:0.14.0"))
403403

404404
df <- read.df("books.xml", source = "xml", rowTag = "book")
405405

@@ -411,7 +411,7 @@ You can manually specify schema:
411411
```R
412412
library(SparkR)
413413

414-
sparkR.session("local[4]", sparkPackages = c("com.databricks:spark-xml_2.12:0.13.0"))
414+
sparkR.session("local[4]", sparkPackages = c("com.databricks:spark-xml_2.12:0.14.0"))
415415
customSchema <- structType(
416416
structField("_id", "string"),
417417
structField("author", "string"),

build.sbt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
name := "spark-xml"
22

3-
version := "0.13.0"
3+
version := "0.14.0"
44

55
organization := "com.databricks"
66

77
scalaVersion := "2.12.10"
88

9-
crossScalaVersions := Seq("2.11.12", "2.12.10")
9+
crossScalaVersions := Seq("2.12.10", "2.13.5")
1010

1111
scalacOptions := Seq("-unchecked", "-deprecation")
1212

13-
val sparkVersion = sys.props.get("spark.testVersion").getOrElse("2.4.8")
13+
val sparkVersion = sys.props.get("spark.testVersion").getOrElse("3.2.0")
1414

1515
// To avoid packaging it, it's Provided below
1616
autoScalaLibrary := false

src/main/scala/com/databricks/spark/xml/XmlDataToCatalyst.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,7 @@ case class XmlDataToCatalyst(
5757
case _: StructType => Seq(StringType)
5858
case ArrayType(_: StructType, _) => Seq(ArrayType(StringType))
5959
}
60+
61+
// Overrides, in Spark 3.2.0+
62+
protected def withNewChildInternal(newChild: Expression): XmlDataToCatalyst = copy(newChild)
6063
}

src/main/scala/com/databricks/spark/xml/parsers/StaxXmlGenerator.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ private[xml] object StaxXmlGenerator {
6666
writer.writeAttribute(name.substring(options.attributePrefix.length), v.toString)
6767

6868
// For ArrayType, we just need to write each as XML element.
69-
case (ArrayType(ty, _), v: Seq[_]) =>
69+
case (ArrayType(ty, _), v: scala.collection.Seq[_]) =>
7070
v.foreach { e =>
7171
writeChildElement(name, ty, e)
7272
}
@@ -101,7 +101,7 @@ private[xml] object StaxXmlGenerator {
101101
// this case only can happen when we convert a normal [[DataFrame]] to XML file.
102102
// When [[ArrayType]] has [[ArrayType]] as elements, it is confusing what is element name
103103
// for XML file. Now, it is "item" but this might have to be according the parent field name.
104-
case (ArrayType(ty, _), v: Seq[_]) =>
104+
case (ArrayType(ty, _), v: scala.collection.Seq[_]) =>
105105
v.foreach { e =>
106106
writeChild("item", ty, e)
107107
}

src/test/scala/com/databricks/spark/xml/XmlSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -890,17 +890,17 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
890890
val resultsOne = spark.read
891891
.option("treatEmptyValuesAsNulls", "true")
892892
.xml(resDir + "gps-empty-field.xml")
893-
assert(resultsOne.selectExpr("extensions.TrackPointExtension").head.getStruct(0) !== null)
893+
assert(resultsOne.selectExpr("extensions.TrackPointExtension").head().getStruct(0) !== null)
894894
assert(resultsOne.selectExpr("extensions.TrackPointExtension")
895-
.head.getStruct(0)(0) === null)
895+
.head().getStruct(0)(0) === null)
896896
// Is the behavior below consistent? see line above.
897-
assert(resultsOne.selectExpr("extensions.TrackPointExtension.hr").head.getStruct(0) === null)
897+
assert(resultsOne.selectExpr("extensions.TrackPointExtension.hr").head().getStruct(0) === null)
898898
assert(resultsOne.collect().length === 2)
899899

900900
val resultsTwo = spark.read
901901
.option("nullValue", "2013-01-24T06:18:43Z")
902902
.xml(resDir + "gps-empty-field.xml")
903-
assert(resultsTwo.selectExpr("time").head.getStruct(0) === null)
903+
assert(resultsTwo.selectExpr("time").head().getStruct(0) === null)
904904
assert(resultsTwo.collect().length === 2)
905905
}
906906

0 commit comments

Comments
 (0)