diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index 94f7052cd..69e4c392e 100644 --- a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -113,4 +113,12 @@ class JsonParserTest : JsonTestBase() { assertTrue { value.jsonPrimitive.isString } assertEquals("null", obj["k"]!!.jsonPrimitive.content) } + + @Test + fun testUnicodeEscapeWithFollowingHex() { + // Test case for greedy parsing bug + val input = "\"\\u00f3a\"" + val decoded = Json.decodeFromString(input) + assertEquals("óa", decoded, "Should parse 'ó' then 'a', not try to consume 'a'") + } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index 5f570a95e..85a270447 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -148,6 +148,17 @@ internal abstract class AbstractJsonLexer { protected abstract val source: CharSequence + // Lookup table for fast hex digit validation and conversion + companion object { + private val HEX_TABLE = IntArray(128) { -1 } + + init { + for (i in '0'..'9') HEX_TABLE[i.code] = i - '0' + for (i in 'a'..'f') HEX_TABLE[i.code] = i - 'a' + 10 + for (i in 'A'..'F') HEX_TABLE[i.code] = i - 'A' + 10 + } + } + @JvmField internal var currentPosition: Int = 0 // position in source @@ -498,6 +509,7 @@ internal abstract class AbstractJsonLexer { } private fun appendHex(source: CharSequence, startPos: Int): Int { + // Ensure we have at least 4 characters for the unicode sequence if (startPos + 4 >= source.length) { currentPosition = startPos ensureHaveChars() @@ -505,12 +517,25 @@ internal abstract class AbstractJsonLexer { fail("Unexpected EOF during unicode escape") return appendHex(source, currentPosition) } - escapedString.append( - ((fromHexChar(source, startPos) shl 12) + - (fromHexChar(source, startPos + 1) shl 8) + - (fromHexChar(source, startPos + 2) shl 4) + - fromHexChar(source, startPos + 3)).toChar() - ) + + var value = 0 + // Strict 4-iteration loop to prevent greedy parsing and comply with RFC 8259 + for (i in 0..3) { + val char = source[startPos + i] + val code = char.code + + // Fast O(1) lookup. Check range to avoid IndexOutOfBounds for non-ASCII chars + val digit = if (code < 128) HEX_TABLE[code] else -1 + + if (digit == -1) { + fail("Invalid Unicode escape sequence: expected hex digit, found '$char'") + } + + // Accumulate result + value = (value shl 4) or digit + } + + escapedString.append(value.toChar()) return startPos + 4 } @@ -518,15 +543,6 @@ internal abstract class AbstractJsonLexer { if (!condition) fail(message(), position) } - private fun fromHexChar(source: CharSequence, currentPosition: Int): Int { - return when (val character = source[currentPosition]) { - in '0'..'9' -> character.code - '0'.code - in 'a'..'f' -> character.code - 'a'.code + 10 - in 'A'..'F' -> character.code - 'A'.code + 10 - else -> fail("Invalid toHexChar char '$character' in unicode escape") - } - } - fun skipElement(allowLenientStrings: Boolean) { val tokenStack = mutableListOf() var lastToken = peekNextToken() @@ -759,4 +775,4 @@ internal abstract class AbstractJsonLexer { currentPosition = snapshot } } -} +} \ No newline at end of file