Skip to content

Commit 88aee39

Browse files
justinuangrobert3005
authored andcommitted
[SPARK-25493][SQL] Use auto-detection for CRLF in CSV datasource multiline mode (apache-spark-on-k8s#419)
1 parent 0069e76 commit 88aee39

File tree

3 files changed

+21
-0
lines changed

3 files changed

+21
-0
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ class CSVOptions(
212212
settings.setEmptyValue(emptyValueInRead)
213213
settings.setMaxCharsPerColumn(maxCharsPerColumn)
214214
settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER)
215+
settings.setLineSeparatorDetectionEnabled(multiLine == true)
216+
215217
settings
216218
}
217219
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
year,make,model,comment,blank
3+
"2012","Tesla","S","No comment",
4+
5+
1997,Ford,E350,"Go get one now they are going fast",
6+
2015,Chevy,Volt
7+

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
5252
private val carsNullFile = "test-data/cars-null.csv"
5353
private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
5454
private val carsBlankColName = "test-data/cars-blank-column-name.csv"
55+
private val carsCrlf = "test-data/cars-crlf.csv"
5556
private val emptyFile = "test-data/empty.csv"
5657
private val commentsFile = "test-data/comments.csv"
5758
private val disableCommentsFile = "test-data/disable_comments.csv"
@@ -220,6 +221,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
220221
}
221222
}
222223

224+
test("crlf line separators in multiline mode") {
225+
val cars = spark
226+
.read
227+
.format("csv")
228+
.option("multiLine", "true")
229+
.option("header", "true")
230+
.load(testFile(carsCrlf))
231+
232+
verifyCars(cars, withHeader = true)
233+
}
234+
223235
test("test aliases sep and encoding for delimiter and charset") {
224236
// scalastyle:off
225237
val cars = spark

0 commit comments

Comments
 (0)