Skip to content

Commit 6760930

Browse files
committed
cover more cases
1 parent 47f2bbf commit 6760930

File tree

1 file changed

+46
-25
lines changed

1 file changed

+46
-25
lines changed

lib/json_mend/parser.rb

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -715,37 +715,52 @@ def check_unmatched_in_object_value(index:, lstring_delimiter:, rstring_delimite
715715
end
716716

717717
def check_unmatched_in_array(rstring_delimiter:)
718-
# Heuristic: Check if this quote is a closer or internal.
719-
# 1. Find the NEXT delimiter (quote) index `j`.
720-
j = 1
718+
saved_pos = @scanner.pos
719+
@scanner.getch # Skip the current char (the potential closer)
720+
721721
found_next = false
722-
while (c = peek_char(j))
723-
if c == rstring_delimiter
724-
# Check if escaped (count preceding backslashes)
725-
bk = 1
726-
slashes = 0
727-
while j - bk >= 0 && peek_char(j - bk) == '\\'
728-
slashes += 1
729-
bk += 1
730-
end
731-
if slashes.even?
732-
found_next = true
733-
break
734-
end
735-
end
722+
j = 1
723+
724+
# Scan forward linearly
725+
while (c = @scanner.getch)
736726
j += 1
727+
next if c != rstring_delimiter
728+
729+
# Check if escaped (count preceding backslashes)
730+
# We need to look behind from the current scanner position
731+
bk = 1
732+
slashes = 0
733+
# Look back in the string buffer directly for speed
734+
while (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) && char_code == 92 # 92 is backslash
735+
slashes += 1
736+
bk += 1
737+
end
738+
739+
if slashes.even?
740+
found_next = true
741+
break
742+
end
737743
end
738744

739-
# 2. Check conditions to STOP (treat as closing quote):
740-
# a) Strictly whitespace between quotes: ["a" "b"]
741-
is_whitespace = (1...j).all? { |k| peek_char(k).match?(/\s/) }
745+
# Reset position immediately after scanning
746+
@scanner.pos = saved_pos
747+
748+
# Check conditions to STOP (treat as closing quote):
749+
# a) Strictly whitespace between quotes
750+
# We can check this by examining the substring we just scanned
751+
substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
752+
is_whitespace = substring_between&.match?(/\A\s*\z/)
742753

743-
# b) Next quote is followed by a separator: ["val1" val2",]
754+
# b) Next quote is followed by a separator
744755
is_next_closer = false
745756
if found_next
746-
k = j + 1
747-
k += 1 while peek_char(k)&.match?(/\s/) # skip whitespaces
748-
is_next_closer = TERMINATORS_VALUE.include?(peek_char(k))
757+
# We need to peek ahead from where we found the next quote.
758+
# Since we reset the scanner, we can use peek_char with the calculated offset `j`
759+
# OR better, temporarily move scanner to `saved_pos + j`
760+
@scanner.pos = saved_pos + j
761+
@scanner.skip(/\s+/)
762+
is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
763+
@scanner.pos = saved_pos
749764
end
750765

751766
return [true, true] unless is_whitespace || is_next_closer
@@ -1053,7 +1068,13 @@ def parse_comment
10531068
# It quickly iterates to find a character, handling escaped characters, and
10541069
# returns the index (offset) from the scanner
10551070
def skip_to_character(characters, start_idx: 0)
1056-
pattern = characters.is_a?(Array) ? Regexp.union(characters) : characters
1071+
pattern = if characters.is_a?(Regexp)
1072+
characters
1073+
else
1074+
# Escape if it's a string, join if it's an array
1075+
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
1076+
Regexp.new(chars.join('|'))
1077+
end
10571078

10581079
saved_pos = @scanner.pos
10591080
# Skip start_idx

0 commit comments

Comments
 (0)