Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.

Commit 2b4aca0

Browse files
authored
Miscellaneous code updates (#506)
1 parent e583370 commit 2b4aca0

File tree

7 files changed

+35
-26
lines changed

7 files changed

+35
-26
lines changed

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -383,13 +383,13 @@ from pyspark.sql import SparkSession
383383
from pyspark.sql.types import *
384384

385385
spark = SparkSession.builder.getOrCreate()
386-
customSchema = StructType([ \
387-
StructField("_id", StringType(), True), \
388-
StructField("author", StringType(), True), \
389-
StructField("description", StringType(), True), \
390-
StructField("genre", StringType(), True), \
391-
StructField("price", DoubleType(), True), \
392-
StructField("publish_date", StringType(), True), \
386+
customSchema = StructType([
387+
StructField("_id", StringType(), True),
388+
StructField("author", StringType(), True),
389+
StructField("description", StringType(), True),
390+
StructField("genre", StringType(), True),
391+
StructField("price", DoubleType(), True),
392+
StructField("publish_date", StringType(), True),
393393
StructField("title", StringType(), True)])
394394

395395
df = spark.read \

build.sbt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ val sparkVersion = sys.props.get("spark.testVersion").getOrElse("2.4.7")
1616
autoScalaLibrary := false
1717

1818
libraryDependencies ++= Seq(
19-
"commons-io" % "commons-io" % "2.7",
19+
"commons-io" % "commons-io" % "2.8.0",
2020
"org.glassfish.jaxb" % "txw2" % "2.3.3",
2121
"org.apache.ws.xmlschema" % "xmlschema-core" % "2.2.5",
2222
"org.slf4j" % "slf4j-api" % "1.7.25" % Provided,
23-
"org.scalatest" %% "scalatest" % "3.2.2" % Test,
23+
"org.scalatest" %% "scalatest" % "3.2.3" % Test,
2424
"com.novocode" % "junit-interface" % "0.11" % Test,
2525
"org.apache.spark" %% "spark-core" % sparkVersion % Provided,
2626
"org.apache.spark" %% "spark-sql" % sparkVersion % Provided,

project/plugins.sbt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ libraryDependencies += "org.scalariform" %% "scalariform" % "0.2.10"
66

77
addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3")
88

9-
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1")
9+
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.1.1")
1010

1111
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.1")
1212

13-
addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0")
13+
addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.1")

src/main/scala/com/databricks/spark/xml/parsers/StaxXmlParserUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ private[xml] object StaxXmlParserUtils {
6161
*/
6262
def skipUntil(parser: XMLEventReader, eventType: Int): XMLEvent = {
6363
var event = parser.peek
64-
while(parser.hasNext && event.getEventType != eventType) {
64+
while (parser.hasNext && event.getEventType != eventType) {
6565
event = parser.nextEvent
6666
}
6767
event

src/main/scala/com/databricks/spark/xml/util/InferSchema.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ private[xml] object InferSchema {
6868
* 3. Replace any remaining null fields with string, the top type
6969
*/
7070
def infer(xml: RDD[String], options: XmlOptions): StructType = {
71-
val schemaData = if (options.samplingRatio > 0.99) {
72-
xml
73-
} else {
71+
val schemaData = if (options.samplingRatio < 1.0) {
7472
xml.sample(withReplacement = false, options.samplingRatio, 1)
73+
} else {
74+
xml
7575
}
7676
// perform schema inference on each row and merge afterwards
7777
val rootType = schemaData.mapPartitions { iter =>

src/test/scala/com/databricks/spark/xml/XmlSuite.scala

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
package com.databricks.spark.xml
1717

1818
import java.nio.charset.{StandardCharsets, UnsupportedCharsetException}
19-
import java.nio.file.{Files, Path}
19+
import java.nio.file.{Files, Path, Paths}
2020
import java.sql.{Date, Timestamp}
2121
import java.util.TimeZone
2222

@@ -836,7 +836,7 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
836836
}
837837

838838
private[this] def testNextedElementFromFile(xmlFile: String) = {
839-
val lines = Source.fromFile(xmlFile).getLines.toList
839+
val lines = getLines(Paths.get(xmlFile)).toList
840840
val firstExpected = lines(2).trim
841841
val lastExpected = lines(3).trim
842842
val config = new Configuration(spark.sparkContext.hadoopConfiguration)
@@ -1282,7 +1282,7 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
12821282

12831283
val xmlFile =
12841284
Files.list(xmlPath).iterator.asScala.filter(_.getFileName.toString.startsWith("part-")).next
1285-
val firstLine = Source.fromFile(xmlFile.toFile).getLines.next
1285+
val firstLine = getLines(xmlFile).head
12861286
assert(firstLine === "<root foo=\"bar\" bing=\"baz\">")
12871287
}
12881288

@@ -1310,4 +1310,13 @@ final class XmlSuite extends AnyFunSuite with BeforeAndAfterAll {
13101310
assert(map.contains("M2"))
13111311
}
13121312

1313+
private def getLines(path: Path): Seq[String] = {
1314+
val source = Source.fromFile(path.toFile)
1315+
try {
1316+
source.getLines.toList
1317+
} finally {
1318+
source.close()
1319+
}
1320+
}
1321+
13131322
}

src/test/scala/com/databricks/spark/xml/util/XSDToSchemaSuite.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class XSDToSchemaSuite extends AnyFunSuite {
2828
private val resDir = "src/test/resources"
2929

3030
test("Basic parsing") {
31-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/basket.xsd"))
31+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/basket.xsd"))
3232
val expectedSchema = buildSchema(
3333
field("basket",
3434
struct(
@@ -39,7 +39,7 @@ class XSDToSchemaSuite extends AnyFunSuite {
3939
}
4040

4141
test("Relative path parsing") {
42-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/include-example/first.xsd"))
42+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/include-example/first.xsd"))
4343
val expectedSchema = buildSchema(
4444
field("basket",
4545
struct(
@@ -50,7 +50,7 @@ class XSDToSchemaSuite extends AnyFunSuite {
5050
}
5151

5252
test("Test schema types and attributes") {
53-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/catalog.xsd"))
53+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/catalog.xsd"))
5454
val expectedSchema = buildSchema(
5555
field("catalog",
5656
struct(
@@ -73,26 +73,26 @@ class XSDToSchemaSuite extends AnyFunSuite {
7373
}
7474

7575
test("Test xs:choice nullability") {
76-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/choice.xsd"))
76+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/choice.xsd"))
7777
val expectedSchema = buildSchema(
7878
field("el", struct(field("foo"), field("bar"), field("baz")), nullable = false))
7979
assert(expectedSchema === parsedSchema)
8080
}
8181

8282
test("Two root elements") {
83-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/twoelements.xsd"))
83+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/twoelements.xsd"))
8484
val expectedSchema = buildSchema(field("bar", nullable = false), field("foo", nullable = false))
8585
assert(expectedSchema === parsedSchema)
8686
}
8787

8888
test("xs:any schema") {
89-
val parsedSchema = XSDToSchema.read(Paths.get(s"${resDir}/xsany.xsd"))
89+
val parsedSchema = XSDToSchema.read(Paths.get(s"$resDir/xsany.xsd"))
9090
val expectedSchema = buildSchema(
9191
field("root",
9292
struct(
9393
field("foo",
9494
struct(
95-
field("xs_any", nullable = true)),
95+
field("xs_any")),
9696
nullable = false),
9797
field("bar",
9898
struct(
@@ -104,7 +104,7 @@ class XSDToSchemaSuite extends AnyFunSuite {
104104
nullable = false),
105105
field("bing",
106106
struct(
107-
field("xs_any", nullable = true)),
107+
field("xs_any")),
108108
nullable = false)),
109109
nullable = false))
110110
assert(expectedSchema === parsedSchema)

0 commit comments

Comments
 (0)