Skip to content

Commit 33445a6

Browse files
committed
improve parsing
1 parent 18171b3 commit 33445a6

File tree

2 files changed

+77
-4
lines changed

2 files changed

+77
-4
lines changed

scautable/src/csvParser.scala

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,34 @@ private[scautable] object CSVParser:
3232
// End of quoted section
3333
inQuotes = false
3434

35-
case '\\' if inQuotes && i + 1 < line.length && line.charAt(i + 1) == quote =>
36-
// Handle backslash-escaped quotes
37-
cellBuffer.append(quote)
38-
i += 1 // Skip the escaped quote
35+
case '\\' if inQuotes && i + 1 < line.length =>
36+
// Handle backslash-escaped characters
37+
val nextChar = line.charAt(i + 1)
38+
nextChar match
39+
case 'n' =>
40+
// Escaped linefeed
41+
cellBuffer.append('\n')
42+
i += 1
43+
case 'r' =>
44+
// Escaped carriage return
45+
cellBuffer.append('\r')
46+
i += 1
47+
case '\\' =>
48+
// Escaped backslash
49+
cellBuffer.append('\\')
50+
i += 1
51+
case `delimiter` =>
52+
// Escaped delimiter
53+
cellBuffer.append(delimiter)
54+
i += 1
55+
case `quote` =>
56+
// Escaped quote character
57+
cellBuffer.append(quote)
58+
i += 1
59+
case _ =>
60+
// Unknown escape sequence - treat backslash literally
61+
cellBuffer.append('\\')
62+
// Don't increment i, let the next character be processed normally
3963

4064
case `delimiter` if !inQuotes =>
4165
// Delimiter outside quotes ends the current cell

scautable/test/src/io/github/quafadas/scautable/CSVParserSuite.scala

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,52 @@ class CSVParserSuite extends FunSuite:
116116
val result = CSVParser.parseLine(line)
117117
assertEquals(result, List("", "field2", "field3"))
118118
}
119+
120+
// Tests for full escape character support (ESCAPE specification)
121+
test("parseLine should handle backslash-escaped linefeeds") {
122+
val line = "field1,\"field2 with \\n newline\",field3"
123+
val result = CSVParser.parseLine(line)
124+
assertEquals(result, List("field1", "field2 with \n newline", "field3"))
125+
}
126+
127+
test("parseLine should handle backslash-escaped carriage returns") {
128+
val line = "field1,\"field2 with \\r return\",field3"
129+
val result = CSVParser.parseLine(line)
130+
assertEquals(result, List("field1", "field2 with \r return", "field3"))
131+
}
132+
133+
test("parseLine should handle backslash-escaped delimiters") {
134+
val line = "field1,\"field2 with \\, comma\",field3"
135+
val result = CSVParser.parseLine(line)
136+
assertEquals(result, List("field1", "field2 with , comma", "field3"))
137+
}
138+
139+
test("parseLine should handle backslash-escaped backslashes") {
140+
val line = "field1,\"field2 with \\\\ backslash\",field3"
141+
val result = CSVParser.parseLine(line)
142+
assertEquals(result, List("field1", "field2 with \\ backslash", "field3"))
143+
}
144+
145+
test("parseLine should handle custom delimiter with backslash-escaped delimiter") {
146+
val line = "field1;\"field2 with \\; semicolon\";field3"
147+
val result = CSVParser.parseLine(line, delimiter = ';')
148+
assertEquals(result, List("field1", "field2 with ; semicolon", "field3"))
149+
}
150+
151+
test("parseLine should handle multiple escape sequences in one field") {
152+
val line = "field1,\"field2 with \\n\\r\\, and \\\" escapes\",field3"
153+
val result = CSVParser.parseLine(line)
154+
assertEquals(result, List("field1", "field2 with \n\r, and \" escapes", "field3"))
155+
}
156+
157+
test("parseLine should handle backslash at end of field (not escaping anything)") {
158+
val line = "field1,\"field2 ends with \\\\\",field3"
159+
val result = CSVParser.parseLine(line)
160+
assertEquals(result, List("field1", "field2 ends with \\", "field3"))
161+
}
162+
163+
test("parseLine should handle invalid escape sequences by treating backslash literally") {
164+
val line = "field1,\"field2 with \\z invalid escape\",field3"
165+
val result = CSVParser.parseLine(line)
166+
assertEquals(result, List("field1", "field2 with \\z invalid escape", "field3"))
167+
}

0 commit comments

Comments
 (0)