@@ -8,7 +8,7 @@ module JsonMend
88 # The core parser that does the heavy lifting of fixing the JSON.
99 class Parser
1010 COMMENT_DELIMETERS = [ '#' , '/' ] . freeze
11- NUMBER_CHARS = Set . new ( " 0123456789-.eE/," . chars ) . freeze
11+ NUMBER_CHARS = Set . new ( ' 0123456789-.eE/,' . chars ) . freeze
1212 STRING_DELIMITERS = [ '"' , "'" , '“' , '”' ] . freeze
1313
1414 def initialize ( json_string )
@@ -22,17 +22,17 @@ def parse
2222
2323 unless @scanner . eos?
2424 json = [ json ]
25- while ! @scanner . eos?
25+ until @scanner . eos?
2626 new_json = parse_json
27- if new_json != ""
27+ if new_json . empty?
28+ @scanner . getch # continue
29+ else
2830 json . pop if same_object_type? ( json . last , new_json )
2931 json << new_json
30- else
31- @scanner . getch
3232 end
3333 end
3434
35- json = json . length > 1 ? json : json . first
35+ json = json . first if json . length > 1
3636 end
3737
3838 json
@@ -49,7 +49,7 @@ def parse_json
4949 when '['
5050 @scanner . getch # consume '['
5151 return parse_array
52- when -> ( c ) { STRING_DELIMITERS . include? ( c ) || c &.match? ( /[a-zA-Z] / ) }
52+ when -> ( c ) { STRING_DELIMITERS . include? ( c ) || c &.match? ( /\p {L} / ) }
5353 return parse_string
5454 when -> ( c ) { c &.match? ( /\d / ) || c == '-' || c == '.' }
5555 return parse_number
@@ -230,7 +230,7 @@ def parse_string
230230 return parse_comment if [ '#' , '/' ] . include? ( char )
231231
232232 # A valid string can only start with a valid quote or, in our case, with a literal
233- while !@scanner . eos? && !STRING_DELIMITERS . include? ( char ) && !char . match? ( /[a-zA-Z0 -9]/ )
233+ while !@scanner . eos? && !STRING_DELIMITERS . include? ( char ) && !char . match? ( /[\p {L}0 -9]/ )
234234 @scanner . getch
235235 char = peek_char
236236 end
@@ -244,7 +244,7 @@ def parse_string
244244 when '“'
245245 lstring_delimiter = '“'
246246 rstring_delimiter = '”'
247- when /[a-zA-Z0 -9]/
247+ when /[\p {L}0 -9]/
248248 # Could be a boolean/null, but not if it's an object key.
249249 if [ "t" , "f" , "n" ] . include? ( char . downcase ) && current_context != :object_key
250250 # parse_literal is non-destructive if it fails to match.
@@ -482,7 +482,7 @@ def parse_string
482482 while next_c && ![ rstring_delimiter , lstring_delimiter ] . include? ( next_c )
483483 # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
484484 # This is because the routine after will make sure to correct any bad guess and this solves a corner case
485- check_comma_in_object_value = false if check_comma_in_object_value && next_c . match? ( /[a-zA-Z] / )
485+ check_comma_in_object_value = false if check_comma_in_object_value && next_c . match? ( /\p {L} / )
486486 # If we are in an object context, let's check for the right delimiters
487487 if ( @context . include? ( :object_key ) && [ ':' , '}' ] . include? ( next_c ) ) ||
488488 ( @context . include? ( :object_value ) && next_c == '}' ) ||
@@ -631,7 +631,7 @@ def parse_number
631631 scanned_str . chop!
632632 # Handle cases where what looked like a number is actually a string.
633633 # e.g., "123-abc"
634- elsif peek_char &.match? ( /[a-zA-Z] / )
634+ elsif peek_char &.match? ( /\p {L} / )
635635 # Roll back the entire scan and re-parse as a string.
636636 @scanner . pos -= scanned_str . bytesize
637637 return parse_string
0 commit comments