From 3c981f57e2bd10b862f4549bc4b7e81681db3ee4 Mon Sep 17 00:00:00 2001 From: Antonis Geralis <43617260+planetis-m@users.noreply.github.com> Date: Mon, 9 Feb 2026 17:28:35 +0200 Subject: [PATCH 1/3] Add addSpan procedure for string parsing --- lib/pure/parsejson.nim | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim index 9292a859641b7..69b3775fe12e3 100644 --- a/lib/pure/parsejson.nim +++ b/lib/pure/parsejson.nim @@ -175,23 +175,34 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int = else: return -1 +proc addSpan(dst: var string; src: string; startPos, endPos: int) {.inline.} = + let n = endPos - startPos + if n <= 0: + return + let oldLen = dst.len + setLen(dst, oldLen + n) + copyMem(addr dst[oldLen], addr src[startPos], n) + proc parseString(my: var JsonParser): TokKind = result = tkString var pos = my.bufpos + 1 + var spanStart = pos if my.rawStringLiterals: add(my.a, '"') while true: case my.buf[pos] of '\0': - my.err = errQuoteExpected + addSpan(my.a, my.buf, spanStart, pos) result = tkError break of '"': + addSpan(my.a, my.buf, spanStart, pos) if my.rawStringLiterals: add(my.a, '"') inc(pos) break of '\\': + addSpan(my.a, my.buf, spanStart, pos) if my.rawStringLiterals: add(my.a, '\\') case my.buf[pos+1] @@ -223,19 +234,16 @@ proc parseString(my: var JsonParser): TokKind = var pos2 = pos var r = parseEscapedUTF16(cstring(my.buf), pos) if r < 0: - my.err = errInvalidToken break # Deal with surrogates if (r and 0xfc00) == 0xd800: if my.buf[pos] != '\\' or my.buf[pos+1] != 'u': - my.err = errInvalidToken break inc(pos, 2) var s = parseEscapedUTF16(cstring(my.buf), pos) if (s and 0xfc00) == 0xdc00 and s > 0: r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) else: - my.err = errInvalidToken break if my.rawStringLiterals: let length = pos - pos2 @@ -251,14 +259,18 @@ proc parseString(my: var JsonParser): TokKind = # don't bother with the error add(my.a, my.buf[pos]) inc(pos) + spanStart = pos of '\c': + addSpan(my.a, my.buf, spanStart, pos) pos = lexbase.handleCR(my, pos) add(my.a, '\c') + spanStart = pos of '\L': + addSpan(my.a, my.buf, spanStart, pos) pos = lexbase.handleLF(my, pos) add(my.a, '\L') + spanStart = pos else: - add(my.a, my.buf[pos]) inc(pos) my.bufpos = pos # store back From aed70cc752f468dbdc62902e050e090c12779bd7 Mon Sep 17 00:00:00 2001 From: Antonis Geralis <43617260+planetis-m@users.noreply.github.com> Date: Mon, 9 Feb 2026 22:02:45 +0200 Subject: [PATCH 2/3] Fix CI for addSpan with nimvm, nimscript and js special handling --- lib/pure/parsejson.nim | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim index 69b3775fe12e3..a8dcc04c62191 100644 --- a/lib/pure/parsejson.nim +++ b/lib/pure/parsejson.nim @@ -179,9 +179,22 @@ proc addSpan(dst: var string; src: string; startPos, endPos: int) {.inline.} = let n = endPos - startPos if n <= 0: return - let oldLen = dst.len - setLen(dst, oldLen + n) - copyMem(addr dst[oldLen], addr src[startPos], n) + + let old = dst.len + dst.setLen old + n + + template impl = + for i in 0.. Date: Fri, 27 Feb 2026 19:31:21 +0200 Subject: [PATCH 3/3] Improve error handling in parsejson Handle invalid token errors for JSON parsing. --- lib/pure/parsejson.nim | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/pure/parsejson.nim b/lib/pure/parsejson.nim index a8dcc04c62191..657f8cc697f50 100644 --- a/lib/pure/parsejson.nim +++ b/lib/pure/parsejson.nim @@ -205,6 +205,7 @@ proc parseString(my: var JsonParser): TokKind = while true: case my.buf[pos] of '\0': + my.err = errInvalidToken addSpan(my.a, my.buf, spanStart, pos) result = tkError break @@ -247,16 +248,19 @@ proc parseString(my: var JsonParser): TokKind = var pos2 = pos var r = parseEscapedUTF16(cstring(my.buf), pos) if r < 0: + my.err = errInvalidToken break # Deal with surrogates if (r and 0xfc00) == 0xd800: if my.buf[pos] != '\\' or my.buf[pos+1] != 'u': + my.err = errInvalidToken break inc(pos, 2) var s = parseEscapedUTF16(cstring(my.buf), pos) if (s and 0xfc00) == 0xdc00 and s > 0: r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) else: + my.err = errInvalidToken break if my.rawStringLiterals: let length = pos - pos2