@@ -38,7 +38,27 @@ class JSON5Lexer(private val source: String) {
3838 ' f' -> readFalse()
3939 ' I' -> readInfinity()
4040 ' N' -> readNaN()
41- ' +' , ' -' , ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' , ' .' -> readNumber()
41+ ' +' , ' -' -> {
42+ if (peek() == ' I' ) {
43+ // Handle -Infinity
44+ val sign = currentChar
45+ advance()
46+ if (source.substring(pos, minOf(pos + 8 , source.length)) == " Infinity" ) {
47+ repeat(8 ) { advance() }
48+ return Token .NumericToken (if (sign == ' -' ) Double .NEGATIVE_INFINITY else Double .POSITIVE_INFINITY , line, column)
49+ } else if (source.substring(pos, minOf(pos + 3 , source.length)) == " NaN" ) {
50+ // Handle -NaN (technically the same as NaN)
51+ repeat(3 ) { advance() }
52+ return Token .NumericToken (Double .NaN , line, column)
53+ }
54+ // Not Infinity/NaN, revert and continue with normal number parsing
55+ pos - = 1
56+ currentChar = sign
57+ }
58+ readNumber()
59+ }
60+ ' 0' , ' 1' , ' 2' , ' 3' , ' 4' , ' 5' , ' 6' , ' 7' , ' 8' , ' 9' , ' .' -> readNumber()
61+ ' \\ ' -> readEscapedIdentifier()
4262 else -> {
4363 if (isIdentifierStart(currentChar)) {
4464 readIdentifier()
@@ -49,6 +69,45 @@ class JSON5Lexer(private val source: String) {
4969 }
5070 }
5171
72+ private fun readEscapedIdentifier (): Token {
73+ val startColumn = column
74+ advance() // Skip the backslash
75+
76+ // Handle Unicode escapes in identifiers
77+ if (currentChar != ' u' ) {
78+ throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
79+ }
80+
81+ advance() // Skip the 'u'
82+ val c = readHexEscape(4 )
83+
84+ // Check if valid identifier start
85+ if (! isIdentifierStart(c)) {
86+ throw JSON5Exception (" Invalid identifier character" , line, column)
87+ }
88+
89+ val buffer = StringBuilder ().append(c)
90+
91+ // Continue reading the rest of the identifier
92+ while (true ) {
93+ if (currentChar == ' \\ ' ) {
94+ advance() // Skip backslash
95+ if (currentChar != ' u' ) {
96+ throw JSON5Exception (" Expected 'u' after backslash in identifier" , line, column)
97+ }
98+ advance() // Skip 'u'
99+ buffer.append(readHexEscape(4 ))
100+ } else if (currentChar != null && isIdentifierPart(currentChar)) {
101+ buffer.append(currentChar)
102+ advance()
103+ } else {
104+ break
105+ }
106+ }
107+
108+ return Token .IdentifierToken (buffer.toString(), line, startColumn)
109+ }
110+
52111 private fun isIdentifierStart (c : Char? ): Boolean {
53112 if (c == null ) return false
54113 return c == ' $' || c == ' _' || c.isLetter()
@@ -80,11 +139,19 @@ class JSON5Lexer(private val source: String) {
80139 }
81140
82141 private fun skipWhitespace () {
83- while (currentChar != null && currentChar!! .isWhitespace( )) {
142+ while (currentChar != null && isWhitespace( currentChar!! )) {
84143 advance()
85144 }
86145 }
87146
147+ private fun isWhitespace (c : Char ): Boolean {
148+ return c == ' ' || c == ' \t ' || c == ' \n ' || c == ' \r ' ||
149+ c == ' \u00A0 ' || c == ' \u2028 ' || c == ' \u2029 ' ||
150+ c == ' \u000B ' || c == ' \u000C ' || c == ' \uFEFF ' ||
151+ // Include other Unicode space separators
152+ c.category == CharCategory .SPACE_SEPARATOR
153+ }
154+
88155 private fun skipComments () {
89156 if (currentChar == ' /' && peek() == ' /' ) {
90157 // Skip single-line comment
@@ -158,11 +225,19 @@ class JSON5Lexer(private val source: String) {
158225 ' \\ ' -> { advance(); ' \\ ' }
159226 ' \' ' -> { advance(); ' \' ' }
160227 ' "' -> { advance(); ' "' }
161- ' \n ' -> { advance(); ' \n ' }
228+ ' a' -> { advance(); ' \u0007 ' } // Bell character
229+ ' \n ' -> {
230+ advance()
231+ return ' ' // Line continuation returns nothing visible
232+ }
162233 ' \r ' -> {
163234 advance()
164235 if (currentChar == ' \n ' ) advance()
165- ' ' // Line continuation returns nothing
236+ return ' ' // Line continuation returns nothing visible
237+ }
238+ ' \u2028 ' , ' \u2029 ' -> {
239+ advance()
240+ return ' ' // Line continuation with line/paragraph separator
166241 }
167242 ' x' -> {
168243 advance()
@@ -180,22 +255,6 @@ class JSON5Lexer(private val source: String) {
180255 }
181256 }
182257
183- private fun readHexEscape (digits : Int ): Char {
184- val hexString = StringBuilder ()
185- repeat(digits) {
186- if (currentChar == null || ! currentChar!! .isHexDigit()) {
187- throw JSON5Exception (" Invalid hex escape sequence" , line, column)
188- }
189- hexString.append(currentChar)
190- advance()
191- }
192- return hexString.toString().toInt(16 ).toChar()
193- }
194-
195- private fun Char.isHexDigit (): Boolean {
196- return this in ' 0' .. ' 9' || this in ' a' .. ' f' || this in ' A' .. ' F'
197- }
198-
199258 private fun readNull (): Token {
200259 val startColumn = column
201260 val startLine = line
@@ -295,8 +354,18 @@ class JSON5Lexer(private val source: String) {
295354 throw JSON5Exception (" Invalid hexadecimal number" , line, column)
296355 }
297356
298- val value = buffer.toString().toDouble()
299- return Token .NumericToken (value, startLine, startColumn)
357+ try {
358+ // Parse the hex number manually instead of using toDouble()
359+ val hexStr = buffer.toString()
360+ val value = if (isNegative) {
361+ - parseHexToDouble(hexStr.substring(3 )) // skip "-0x"
362+ } else {
363+ parseHexToDouble(hexStr.substring(2 )) // skip "0x"
364+ }
365+ return Token .NumericToken (value, startLine, startColumn)
366+ } catch (e: NumberFormatException ) {
367+ throw JSON5Exception (" Invalid hexadecimal number: ${buffer} " , line, column)
368+ }
300369 }
301370
302371 // Handle decimal notation
@@ -358,6 +427,14 @@ class JSON5Lexer(private val source: String) {
358427 return Token .NumericToken (value, startLine, startColumn)
359428 }
360429
430+ private fun parseHexToDouble (hexStr : String ): Double {
431+ var result = 0.0
432+ for (c in hexStr) {
433+ result = result * 16 + c.digitToInt(16 )
434+ }
435+ return result
436+ }
437+
361438 private fun readIdentifier (): Token {
362439 val startColumn = column
363440 val buffer = StringBuilder ()
0 commit comments