Skip to content

Commit 9067d75

Browse files
committed
Attemtped fix failed
1 parent fb2c86d commit 9067d75

File tree

2 files changed

+146
-63
lines changed

2 files changed

+146
-63
lines changed

lib/src/main/kotlin/io/github/json5/kotlin/JSON5Lexer.kt

Lines changed: 144 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package io.github.json5.kotlin
22

3+
import kotlin.math.pow
4+
35
/**
46
* Lexer for JSON5 syntax
57
* Breaks JSON5 text into tokens for the parser
@@ -53,59 +55,75 @@ class JSON5Lexer(private val source: String) {
5355
}
5456
// Not Infinity/NaN, revert and continue with normal number parsing
5557
pos -= 1
58+
column -= 1
5659
currentChar = sign
5760
}
5861
readNumber()
5962
}
6063
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.' -> readNumber()
61-
'\\' -> readEscapedIdentifier()
62-
else -> {
63-
if (isIdentifierStart(currentChar)) {
64-
readIdentifier()
65-
} else {
66-
throw JSON5Exception("Unexpected character: $currentChar", line, column)
67-
}
68-
}
69-
}
70-
}
71-
72-
private fun readEscapedIdentifier(): Token {
73-
val startColumn = column
74-
advance() // Skip the backslash
64+
'\\' -> {
65+
// Handle Unicode escape sequences in identifiers
66+
val startColumn = column
67+
advance() // Skip the backslash
7568

76-
// Handle Unicode escapes in identifiers
77-
if (currentChar != 'u') {
78-
throw JSON5Exception("Expected 'u' after backslash in identifier", line, column)
79-
}
69+
if (currentChar != 'u') {
70+
throw JSON5Exception("Expected 'u' after backslash in identifier", line, column)
71+
}
8072

81-
advance() // Skip the 'u'
82-
val c = readHexEscape(4)
73+
advance() // Skip 'u'
74+
val hexDigits = StringBuilder()
75+
repeat(4) {
76+
if (currentChar == null || !currentChar!!.isHexDigit()) {
77+
throw JSON5Exception("Invalid hex escape sequence in identifier", line, column)
78+
}
79+
hexDigits.append(currentChar)
80+
advance()
81+
}
8382

84-
// Check if valid identifier start
85-
if (!isIdentifierStart(c)) {
86-
throw JSON5Exception("Invalid identifier character", line, column)
87-
}
83+
val char = hexDigits.toString().toInt(16).toChar()
84+
if (!isIdentifierStart(char)) {
85+
throw JSON5Exception("Invalid identifier character", line, startColumn)
86+
}
8887

89-
val buffer = StringBuilder().append(c)
88+
val buffer = StringBuilder().append(char)
89+
90+
// Continue reading the rest of the identifier
91+
while (true) {
92+
if (currentChar == '\\') {
93+
advance() // Skip backslash
94+
if (currentChar != 'u') {
95+
throw JSON5Exception("Expected 'u' after backslash in identifier", line, column)
96+
}
97+
advance() // Skip 'u'
98+
99+
val identHexDigits = StringBuilder()
100+
repeat(4) {
101+
if (currentChar == null || !currentChar!!.isHexDigit()) {
102+
throw JSON5Exception("Invalid hex escape sequence in identifier", line, column)
103+
}
104+
identHexDigits.append(currentChar)
105+
advance()
106+
}
107+
108+
buffer.append(identHexDigits.toString().toInt(16).toChar())
109+
} else if (currentChar != null && isIdentifierPart(currentChar)) {
110+
buffer.append(currentChar)
111+
advance()
112+
} else {
113+
break
114+
}
115+
}
90116

91-
// Continue reading the rest of the identifier
92-
while (true) {
93-
if (currentChar == '\\') {
94-
advance() // Skip backslash
95-
if (currentChar != 'u') {
96-
throw JSON5Exception("Expected 'u' after backslash in identifier", line, column)
117+
return Token.IdentifierToken(buffer.toString(), line, startColumn)
118+
}
119+
else -> {
120+
if (isIdentifierStart(currentChar)) {
121+
readIdentifier()
122+
} else {
123+
throw JSON5Exception("Unexpected character: $currentChar", line, column)
97124
}
98-
advance() // Skip 'u'
99-
buffer.append(readHexEscape(4))
100-
} else if (currentChar != null && isIdentifierPart(currentChar)) {
101-
buffer.append(currentChar)
102-
advance()
103-
} else {
104-
break
105125
}
106126
}
107-
108-
return Token.IdentifierToken(buffer.toString(), line, startColumn)
109127
}
110128

111129
private fun isIdentifierStart(c: Char?): Boolean {
@@ -214,43 +232,77 @@ class JSON5Lexer(private val source: String) {
214232
}
215233

216234
private fun readEscapeSequence(): Char {
217-
return when (currentChar) {
218-
'b' -> { advance(); '\b' }
219-
'f' -> { advance(); '\u000C' }
220-
'n' -> { advance(); '\n' }
221-
'r' -> { advance(); '\r' }
222-
't' -> { advance(); '\t' }
223-
'v' -> { advance(); '\u000B' }
224-
'0' -> { advance(); '\u0000' }
225-
'\\' -> { advance(); '\\' }
226-
'\'' -> { advance(); '\'' }
227-
'"' -> { advance(); '"' }
228-
'a' -> { advance(); '\u0007' } // Bell character
235+
when (currentChar) {
236+
'b' -> {
237+
advance()
238+
return '\b'
239+
}
240+
'f' -> {
241+
advance()
242+
return '\u000C'
243+
}
244+
'n' -> {
245+
advance()
246+
return '\n'
247+
}
248+
'r' -> {
249+
advance()
250+
return '\r'
251+
}
252+
't' -> {
253+
advance()
254+
return '\t'
255+
}
256+
'v' -> {
257+
advance()
258+
return '\u000B'
259+
}
260+
'0' -> {
261+
advance()
262+
return '\u0000'
263+
}
264+
'\\' -> {
265+
advance()
266+
return '\\'
267+
}
268+
'\'' -> {
269+
advance()
270+
return '\''
271+
}
272+
'"' -> {
273+
advance()
274+
return '"'
275+
}
276+
'a' -> {
277+
advance()
278+
return '\u0007' // Bell character
279+
}
229280
'\n' -> {
230281
advance()
231-
return ' ' // Line continuation returns nothing visible
282+
return '\u0000' // Line continuation returns nothing visible
232283
}
233284
'\r' -> {
234285
advance()
235286
if (currentChar == '\n') advance()
236-
return ' ' // Line continuation returns nothing visible
287+
return '\u0000' // Line continuation returns nothing visible
237288
}
238289
'\u2028', '\u2029' -> {
239290
advance()
240-
return ' ' // Line continuation with line/paragraph separator
291+
return '\u0000' // Line continuation with line/paragraph separator
241292
}
242293
'x' -> {
243294
advance()
244-
readHexEscape(2)
295+
return readHexEscape(2)
245296
}
246297
'u' -> {
247298
advance()
248-
readHexEscape(4)
299+
return readHexEscape(4)
249300
}
250301
else -> {
302+
// Just return the character after the backslash (e.g. for \', \", etc.)
251303
val c = currentChar
252304
advance()
253-
c ?: throw JSON5Exception("Invalid escape sequence", line, column)
305+
return c ?: throw JSON5Exception("Invalid escape sequence", line, column)
254306
}
255307
}
256308
}
@@ -428,11 +480,25 @@ class JSON5Lexer(private val source: String) {
428480
}
429481

430482
private fun parseHexToDouble(hexStr: String): Double {
431-
var result = 0.0
432-
for (c in hexStr) {
433-
result = result * 16 + c.digitToInt(16)
483+
// For very large hex numbers, we need to use a manual approach that mimics JavaScript
484+
// behavior to ensure consistent results across platforms
485+
486+
// If the hex string is too long, we need to process it carefully to avoid precision issues
487+
if (hexStr.length > 12) {
488+
// Parse the hex string in chunks to avoid overflow
489+
val upperHex = hexStr.substring(0, hexStr.length - 8)
490+
val lowerHex = hexStr.substring(hexStr.length - 8)
491+
492+
// Convert each chunk to a double and combine them
493+
val upperValue = upperHex.toULong(16).toDouble()
494+
val lowerValue = lowerHex.toULong(16).toDouble()
495+
496+
// Combine the chunks (upper * 2^32 + lower)
497+
return upperValue * 2.0.pow(32) + lowerValue
434498
}
435-
return result
499+
500+
// For shorter hex strings, direct conversion works fine
501+
return hexStr.toULong(16).toDouble()
436502
}
437503

438504
private fun readIdentifier(): Token {
@@ -446,4 +512,20 @@ class JSON5Lexer(private val source: String) {
446512

447513
return Token.IdentifierToken(buffer.toString(), line, startColumn)
448514
}
515+
516+
private fun readHexEscape(digits: Int): Char {
517+
val hexString = StringBuilder()
518+
repeat(digits) {
519+
if (currentChar == null || !currentChar!!.isHexDigit()) {
520+
throw JSON5Exception("Invalid hex escape sequence", line, column)
521+
}
522+
hexString.append(currentChar)
523+
advance()
524+
}
525+
return hexString.toString().toInt(16).toChar()
526+
}
527+
528+
private fun Char.isHexDigit(): Boolean {
529+
return this in '0'..'9' || this in 'a'..'f' || this in 'A'..'F'
530+
}
449531
}

lib/src/test/kotlin/io/github/json5/kotlin/JSON5ParseTest.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class JSON5ParseTest {
6565

6666
@Test
6767
fun `should parse special character property names`() {
68-
JSON5.parse("""{\$\_:1,_$:2,a\u200C:3}""") shouldBe mapOf("\$_" to 1.0, "_$" to 2.0, "a\u200C" to 3.0)
68+
JSON5.parse("""{\${"$"}_:1,_\$:2,a\u200C:3}""") shouldBe mapOf("\$_" to 1.0, "_$" to 2.0, "a\u200C" to 3.0)
6969
}
7070

7171
@Test
@@ -75,6 +75,7 @@ class JSON5ParseTest {
7575

7676
@Test
7777
fun `should parse escaped property names`() {
78+
// Note: We need a single backslash in the input JSON5 text
7879
JSON5.parse("""{\\u0061\\u0062:1,\\u0024\\u005F:2,\\u005F\\u0024:3}""") shouldBe mapOf("ab" to 1.0, "\$_" to 2.0, "_$" to 3.0)
7980
}
8081

0 commit comments

Comments
 (0)