11package io.github.json5.kotlin
22
3+ import kotlin.math.pow
4+
35/* *
46 * Lexer for JSON5 syntax
57 * Breaks JSON5 text into tokens for the parser
@@ -53,59 +55,75 @@ class JSON5Lexer(private val source: String) {
5355 }
5456 // Not Infinity/NaN, revert and continue with normal number parsing
5557 pos - = 1
58+ column - = 1
5659 currentChar = sign
5760 }
5861 readNumber()
5962 }
6063 ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' , ' .' -> readNumber()
61- ' \\ ' -> readEscapedIdentifier()
62- else -> {
63- if (isIdentifierStart(currentChar)) {
64- readIdentifier()
65- } else {
66- throw JSON5Exception (" Unexpected character: $currentChar " , line, column)
67- }
68- }
69- }
70- }
71-
72- private fun readEscapedIdentifier (): Token {
73- val startColumn = column
74- advance() // Skip the backslash
64+ ' \\ ' -> {
65+ // Handle Unicode escape sequences in identifiers
66+ val startColumn = column
67+ advance() // Skip the backslash
7568
76- // Handle Unicode escapes in identifiers
77- if (currentChar != ' u' ) {
78- throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
79- }
69+ if (currentChar != ' u' ) {
70+ throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
71+ }
8072
81- advance() // Skip the 'u'
82- val c = readHexEscape(4 )
73+ advance() // Skip 'u'
74+ val hexDigits = StringBuilder ()
75+ repeat(4 ) {
76+ if (currentChar == null || ! currentChar!! .isHexDigit()) {
77+ throw JSON5Exception (" Invalid hex escape sequence in identifier" , line, column)
78+ }
79+ hexDigits.append(currentChar)
80+ advance()
81+ }
8382
84- // Check if valid identifier start
85- if (! isIdentifierStart(c )) {
86- throw JSON5Exception (" Invalid identifier character" , line, column )
87- }
83+ val char = hexDigits.toString().toInt( 16 ).toChar()
84+ if (! isIdentifierStart(char )) {
85+ throw JSON5Exception (" Invalid identifier character" , line, startColumn )
86+ }
8887
89- val buffer = StringBuilder ().append(c)
88+ val buffer = StringBuilder ().append(char)
89+
90+ // Continue reading the rest of the identifier
91+ while (true ) {
92+ if (currentChar == ' \\ ' ) {
93+ advance() // Skip backslash
94+ if (currentChar != ' u' ) {
95+ throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
96+ }
97+ advance() // Skip 'u'
98+
99+ val identHexDigits = StringBuilder ()
100+ repeat(4 ) {
101+ if (currentChar == null || ! currentChar!! .isHexDigit()) {
102+ throw JSON5Exception (" Invalid hex escape sequence in identifier" , line, column)
103+ }
104+ identHexDigits.append(currentChar)
105+ advance()
106+ }
107+
108+ buffer.append(identHexDigits.toString().toInt(16 ).toChar())
109+ } else if (currentChar != null && isIdentifierPart(currentChar)) {
110+ buffer.append(currentChar)
111+ advance()
112+ } else {
113+ break
114+ }
115+ }
90116
91- // Continue reading the rest of the identifier
92- while (true ) {
93- if (currentChar == ' \\ ' ) {
94- advance() // Skip backslash
95- if (currentChar != ' u' ) {
96- throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
117+ return Token .IdentifierToken (buffer.toString(), line, startColumn)
118+ }
119+ else -> {
120+ if (isIdentifierStart(currentChar)) {
121+ readIdentifier()
122+ } else {
123+ throw JSON5Exception (" Unexpected character: $currentChar " , line, column)
97124 }
98- advance() // Skip 'u'
99- buffer.append(readHexEscape(4 ))
100- } else if (currentChar != null && isIdentifierPart(currentChar)) {
101- buffer.append(currentChar)
102- advance()
103- } else {
104- break
105125 }
106126 }
107-
108- return Token .IdentifierToken (buffer.toString(), line, startColumn)
109127 }
110128
111129 private fun isIdentifierStart (c : Char? ): Boolean {
@@ -214,43 +232,77 @@ class JSON5Lexer(private val source: String) {
214232 }
215233
216234 private fun readEscapeSequence (): Char {
217- return when (currentChar) {
218- ' b' -> { advance(); ' \b ' }
219- ' f' -> { advance(); ' \u000C ' }
220- ' n' -> { advance(); ' \n ' }
221- ' r' -> { advance(); ' \r ' }
222- ' t' -> { advance(); ' \t ' }
223- ' v' -> { advance(); ' \u000B ' }
224- ' 0' -> { advance(); ' \u0000 ' }
225- ' \\ ' -> { advance(); ' \\ ' }
226- ' \' ' -> { advance(); ' \' ' }
227- ' "' -> { advance(); ' "' }
228- ' a' -> { advance(); ' \u0007 ' } // Bell character
235+ when (currentChar) {
236+ ' b' -> {
237+ advance()
238+ return ' \b '
239+ }
240+ ' f' -> {
241+ advance()
242+ return ' \u000C '
243+ }
244+ ' n' -> {
245+ advance()
246+ return ' \n '
247+ }
248+ ' r' -> {
249+ advance()
250+ return ' \r '
251+ }
252+ ' t' -> {
253+ advance()
254+ return ' \t '
255+ }
256+ ' v' -> {
257+ advance()
258+ return ' \u000B '
259+ }
260+ ' 0' -> {
261+ advance()
262+ return ' \u0000 '
263+ }
264+ ' \\ ' -> {
265+ advance()
266+ return ' \\ '
267+ }
268+ ' \' ' -> {
269+ advance()
270+ return ' \' '
271+ }
272+ ' "' -> {
273+ advance()
274+ return ' "'
275+ }
276+ ' a' -> {
277+ advance()
278+ return ' \u0007 ' // Bell character
279+ }
229280 ' \n ' -> {
230281 advance()
231- return ' ' // Line continuation returns nothing visible
282+ return ' \u0000 ' // Line continuation returns nothing visible
232283 }
233284 ' \r ' -> {
234285 advance()
235286 if (currentChar == ' \n ' ) advance()
236- return ' ' // Line continuation returns nothing visible
287+ return ' \u0000 ' // Line continuation returns nothing visible
237288 }
238289 ' \u2028 ' , ' \u2029 ' -> {
239290 advance()
240- return ' ' // Line continuation with line/paragraph separator
291+ return ' \u0000 ' // Line continuation with line/paragraph separator
241292 }
242293 ' x' -> {
243294 advance()
244- readHexEscape(2 )
295+ return readHexEscape(2 )
245296 }
246297 ' u' -> {
247298 advance()
248- readHexEscape(4 )
299+ return readHexEscape(4 )
249300 }
250301 else -> {
302+ // Just return the character after the backslash (e.g. for \', \", etc.)
251303 val c = currentChar
252304 advance()
253- c ? : throw JSON5Exception (" Invalid escape sequence" , line, column)
305+ return c ? : throw JSON5Exception (" Invalid escape sequence" , line, column)
254306 }
255307 }
256308 }
@@ -428,11 +480,25 @@ class JSON5Lexer(private val source: String) {
428480 }
429481
430482 private fun parseHexToDouble (hexStr : String ): Double {
431- var result = 0.0
432- for (c in hexStr) {
433- result = result * 16 + c.digitToInt(16 )
483+ // For very large hex numbers, we need to use a manual approach that mimics JavaScript
484+ // behavior to ensure consistent results across platforms
485+
486+ // If the hex string is too long, we need to process it carefully to avoid precision issues
487+ if (hexStr.length > 12 ) {
488+ // Parse the hex string in chunks to avoid overflow
489+ val upperHex = hexStr.substring(0 , hexStr.length - 8 )
490+ val lowerHex = hexStr.substring(hexStr.length - 8 )
491+
492+ // Convert each chunk to a double and combine them
493+ val upperValue = upperHex.toULong(16 ).toDouble()
494+ val lowerValue = lowerHex.toULong(16 ).toDouble()
495+
496+ // Combine the chunks (upper * 2^32 + lower)
497+ return upperValue * 2.0 .pow(32 ) + lowerValue
434498 }
435- return result
499+
500+ // For shorter hex strings, direct conversion works fine
501+ return hexStr.toULong(16 ).toDouble()
436502 }
437503
438504 private fun readIdentifier (): Token {
@@ -446,4 +512,20 @@ class JSON5Lexer(private val source: String) {
446512
447513 return Token .IdentifierToken (buffer.toString(), line, startColumn)
448514 }
515+
516+ private fun readHexEscape (digits : Int ): Char {
517+ val hexString = StringBuilder ()
518+ repeat(digits) {
519+ if (currentChar == null || ! currentChar!! .isHexDigit()) {
520+ throw JSON5Exception (" Invalid hex escape sequence" , line, column)
521+ }
522+ hexString.append(currentChar)
523+ advance()
524+ }
525+ return hexString.toString().toInt(16 ).toChar()
526+ }
527+
528+ private fun Char.isHexDigit (): Boolean {
529+ return this in ' 0' .. ' 9' || this in ' a' .. ' f' || this in ' A' .. ' F'
530+ }
449531}
0 commit comments