Skip to content

Commit c179b1a

Browse files
authored
Fix escaping backslashes (#357)
1 parent 5e3be5b commit c179b1a

File tree

5 files changed

+100
-50
lines changed

5 files changed

+100
-50
lines changed

ktoml-core/src/commonMain/kotlin/com/akuleshov7/ktoml/parsers/StringUtils.kt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,23 @@ internal fun String.convertLineEndingBackslash(): String {
8989
}
9090
}
9191

92+
/**
93+
* Checks if the backslash at the given index is a line-ending backslash
94+
* A line-ending backslash is defined as a backslash that is followed only by
95+
* whitespace characters and then a newline character or the end of the string
96+
*
97+
* @param backslashIndex The index of the backslash to check
98+
* @return `true` if the backslash is a line-ending backslash, `false` otherwise
99+
*/
100+
internal fun String.isLineEndingBackslash(backslashIndex: Int): Boolean {
101+
var j = backslashIndex + 1
102+
while (j < length && this[j] != newLineChar() && this[j].isWhitespace()) {
103+
j++
104+
}
105+
106+
return j == length || this[j] == newLineChar() || j == length
107+
}
108+
92109
/**
93110
* If this string starts and end with quotes("") - will return the string with quotes removed
94111
* Otherwise, returns this string.

ktoml-core/src/commonMain/kotlin/com/akuleshov7/ktoml/utils/SpecialCharacters.kt

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
package com.akuleshov7.ktoml.utils
1111

1212
import com.akuleshov7.ktoml.exceptions.UnknownEscapeSymbolsException
13+
import com.akuleshov7.ktoml.parsers.isLineEndingBackslash
1314

1415
internal const val COMPLEX_UNICODE_LENGTH = 8
1516
internal const val COMPLEX_UNICODE_PREFIX = 'U'
@@ -103,13 +104,11 @@ public fun StringBuilder.appendEscapedUnicode(
103104
*/
104105
public fun String.escapeSpecialCharacters(multiline: Boolean = false): String =
105106
if (multiline) {
106-
escapeControlChars(Char::isMultilineControlChar)
107+
escapeControlChars(isMultiline = true)
107108
.replace("\"\"\"", "\"\"\\\"")
108-
.escapeBackslashes("btnfruU\"\r\n")
109109
} else {
110-
escapeControlChars(Char::isControlChar)
110+
escapeControlChars(isMultiline = false)
111111
.replace("\"", "\\\"")
112-
.escapeBackslashes("btnfruU\"")
113112
}
114113

115114
/**
@@ -133,24 +132,60 @@ internal fun Char.isControlChar() = this in CharCategory.CONTROL && this != '\t'
133132
*/
134133
internal fun Char.isMultilineControlChar() = isControlChar() && this !in "\n\r"
135134

136-
private inline fun String.escapeControlChars(predicate: (Char) -> Boolean): String {
135+
private fun String.escapeControlChars(isMultiline: Boolean): String {
136+
val isControlChar = if (isMultiline) {
137+
Char::isMultilineControlChar
138+
} else {
139+
Char::isControlChar
140+
}
141+
137142
val sb = StringBuilder(length)
138-
var last = 0
143+
var slashCount = 0
139144
for ((i, char) in withIndex()) {
140-
if (predicate(char)) {
141-
sb.append(this, last, i)
142-
.append(char.escapeControlChar())
143-
last = i + 1
145+
if (char == '\\') {
146+
slashCount++
147+
continue
148+
}
149+
150+
if (slashCount > 0) {
151+
sb.append(
152+
escapeBackslashes(lastBackslashIndex = i - 1, slashCount = slashCount, isMultiline = isMultiline)
153+
)
154+
slashCount = 0
155+
}
156+
157+
if (isControlChar(char)) {
158+
sb.append(char.escapeControlChar())
159+
} else {
160+
sb.append(char)
144161
}
145162
}
146163

147-
if (last < length) {
148-
sb.append(this, last, length)
164+
if (isNotEmpty() && slashCount > 0) {
165+
sb.append(
166+
escapeBackslashes(lastBackslashIndex = length - 1, slashCount = slashCount, isMultiline = isMultiline)
167+
)
149168
}
150169

151170
return sb.toString()
152171
}
153172

173+
private fun String.escapeBackslashes(
174+
lastBackslashIndex: Int,
175+
slashCount: Int,
176+
isMultiline: Boolean,
177+
): String {
178+
val isLineEndingBackslash = slashCount % 2 != 0 &&
179+
isMultiline &&
180+
isLineEndingBackslash(lastBackslashIndex)
181+
182+
return if (isLineEndingBackslash) {
183+
"\\".repeat(slashCount * 2 - 1)
184+
} else {
185+
"\\".repeat(slashCount * 2)
186+
}
187+
}
188+
154189
private fun Char.escapeControlChar() = when (this) {
155190
'\t' -> "\\t"
156191
'\b' -> "\\b"
@@ -166,31 +201,6 @@ private fun Char.escapeControlChar() = when (this) {
166201
}
167202
}
168203

169-
private fun String.escapeBackslashes(escapes: String): String {
170-
val sb = StringBuilder(length)
171-
var slashCount = 0
172-
var last = 0
173-
for ((i, char) in withIndex()) {
174-
if (char == '\\') {
175-
slashCount++
176-
} else {
177-
if (slashCount > 0 && char !in escapes && slashCount % 2 != 0) {
178-
sb.append(this, last, i - 1)
179-
.append("\\\\$char")
180-
last = i + 1
181-
}
182-
183-
slashCount = 0
184-
}
185-
}
186-
187-
if (last < length) {
188-
sb.append(this, last, length)
189-
}
190-
191-
return sb.toString()
192-
}
193-
194204
/**
195205
* just a newline character on different platforms/targets
196206
*

ktoml-core/src/commonTest/kotlin/com/akuleshov7/ktoml/encoders/EncodingAnnotationTest.kt

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@ package com.akuleshov7.ktoml.encoders
33
import com.akuleshov7.ktoml.Toml
44
import com.akuleshov7.ktoml.TomlOutputConfig
55
import com.akuleshov7.ktoml.annotations.*
6+
import com.akuleshov7.ktoml.utils.isControlChar
67
import com.akuleshov7.ktoml.writers.IntegerRepresentation.*
78
import kotlinx.serialization.EncodeDefault
89
import kotlinx.serialization.ExperimentalSerializationApi
910
import kotlinx.serialization.Serializable
11+
import kotlinx.serialization.decodeFromString
12+
import kotlinx.serialization.encodeToString
1013
import kotlin.test.Test
1114

1215
class EncodingAnnotationTest {
@@ -252,25 +255,19 @@ class EncodingAnnotationTest {
252255
@TomlMultiline
253256
val mlTextA: String = "\n\\tMultiline\ntext!\n",
254257
@TomlMultiline
255-
val mlTextB: String = """
256-
257-
Text with escaped quotes ""\"\
258-
and line break
259-
260-
""".trimIndent(),
258+
val mlTextB: String = "\nText with escaped quotes \"\"\"\\\nand line break\n",
261259
@TomlLiteral
262260
@TomlMultiline
263261
val mlTextC: String = "\n\"Multiline\ntext!\"\n"
264262
)
265-
266263
val tripleQuotes = "\"\"\""
267264

268265
assertEncodedEquals(
269266
value = File(),
270267
expectedToml = """
271268
mlTextA = $tripleQuotes
272269
273-
\tMultiline
270+
\\tMultiline
274271
text!
275272
276273
$tripleQuotes
@@ -290,6 +287,25 @@ class EncodingAnnotationTest {
290287
)
291288
}
292289

290+
@Test
291+
fun encodeBackslashesInMultiline() {
292+
@Serializable
293+
data class Reproducer(
294+
@TomlMultiline
295+
val foo: String
296+
)
297+
val tripleQuotes = "\"\"\""
298+
299+
assertEncodedEquals(
300+
Reproducer("\\\\, \\\""),
301+
"""
302+
foo = $tripleQuotes
303+
\\\\, \\"
304+
$tripleQuotes
305+
""".trimIndent()
306+
)
307+
}
308+
293309
@OptIn(ExperimentalSerializationApi::class)
294310
@Test
295311
fun encodeDefaultAnnotation() {

ktoml-core/src/commonTest/kotlin/com/akuleshov7/ktoml/encoders/PrimitiveEncoderTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class PrimitiveEncoderTest {
7373

7474
assertEncodedEquals(
7575
value = File("""hello \Uffffffff world"""),
76-
expectedToml = """escapeString = "hello \Uffffffff world""""
76+
expectedToml = """escapeString = "hello \\Uffffffff world""""
7777
)
7878

7979
assertEncodedEquals(

ktoml-core/src/commonTest/kotlin/com/akuleshov7/ktoml/writers/ValueWriteTest.kt

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package com.akuleshov7.ktoml.writers
33
import com.akuleshov7.ktoml.TomlOutputConfig
44
import com.akuleshov7.ktoml.exceptions.TomlWritingException
55
import com.akuleshov7.ktoml.tree.nodes.pairs.values.*
6-
import kotlinx.datetime.Instant
76
import kotlinx.datetime.LocalDate
87
import kotlinx.datetime.LocalDateTime
98
import kotlinx.datetime.LocalTime
@@ -46,9 +45,17 @@ class PrimitiveValueWriteTest {
4645

4746
// Backslash escaping
4847
testTomlValue(TomlBasicString("""hello\world""" as Any), """"hello\\world"""")
49-
testTomlValue(TomlBasicString("""hello\\\ world""" as Any), """"hello\\\\ world"""")
50-
testTomlValue(TomlBasicString("""hello\b\t\n\\\f\r world""" as Any), """"hello\b\t\n\\\f\r world"""")
51-
testTomlValue(TomlBasicString("""hello\u0000\\\Uffffffff world""" as Any), """"hello\u0000\\\Uffffffff world"""")
48+
testTomlValue(TomlBasicString("""hello\\\ world""" as Any), """"hello\\\\\\ world"""")
49+
testTomlValue(TomlBasicString("hello\b\\t\n\\\\f\r world" as Any), """"hello\b\\t\n\\\\f\r world"""")
50+
testTomlValue(TomlBasicString("hello\u0000\\\u0011 world" as Any), """"hello\u0000\\\u0011 world"""")
51+
}
52+
53+
@Test
54+
fun backslashEscapingWriteTest() {
55+
testTomlValue(TomlBasicString("""\""" as Any), """"\\"""", TomlOutputConfig())
56+
testTomlValue(TomlBasicString("""\a""" as Any), """"\\a"""", TomlOutputConfig())
57+
testTomlValue(TomlBasicString("""\\\""" as Any), """"\\\\\\"""", TomlOutputConfig())
58+
testTomlValue(TomlBasicString("""a\\b""" as Any), """"a\\\\b"""", TomlOutputConfig())
5259
}
5360

5461
@Test

0 commit comments

Comments
 (0)