Skip to content

Commit c67825e

Browse files
committed
optimization
1 parent 2ccbeed commit c67825e

File tree

2 files changed

+123
-6
lines changed

2 files changed

+123
-6
lines changed

lib/json_mend/parser.rb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def parse_string
357357
char = prepare_string_parsing
358358

359359
# A valid string can only start with a valid quote or, in our case, with a literal
360-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9]/)
360+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
361361
return '' if TERMINATORS_STRING_GUESSED.include?(char)
362362

363363
@scanner.getch
@@ -436,7 +436,7 @@ def determine_delimiters(char:)
436436
when '“'
437437
lstring_delimiter = '“'
438438
rstring_delimiter = '”'
439-
when /[\p{L}0-9]/
439+
when /[\p{L}0-9$_-]/
440440
# Could be a boolean/null, but not if it's an object key.
441441
if BOOLEAN_OR_NULL_CHARS.include?(char.downcase) && !current_context?(:object_key)
442442
# parse_literal is non-destructive if it fails to match.
@@ -512,12 +512,13 @@ def check_unmatched_delimiters(
512512
# --- Main Parsing Loop ---
513513
while !@scanner.eos? && char != rstring_delimiter
514514
# Fast-path for unquoted keys (e.g. { key: val })
515-
# consumes a chunk of valid identifier characters at once.
515+
# consumes a chunk of valid identifier characters at once
516516
if missing_quotes && current_context?(:object_key)
517517
chunk = @scanner.scan(/[a-zA-Z0-9_$-]+/)
518518
if chunk
519519
string_parts << chunk
520520
char = peek_char
521+
next
521522
end
522523
end
523524

@@ -991,8 +992,7 @@ def parse_number
991992

992993
# Handle cases where the number ends with an invalid character.
993994
if !scanned_str.empty? && INVALID_NUMBER_TRAILERS.include?(scanned_str[-1])
994-
# Rewind scanner for the invalid char so it can be handled by the main loop (e.g. as a separator)
995-
@scanner.pos -= 1
995+
# Do not rewind scanner, simply discard the invalid trailing char (garbage)
996996
scanned_str = scanned_str[0...-1]
997997
# Handle cases where what looked like a number is actually a string.
998998
# e.g. "123-abc"
@@ -1218,7 +1218,7 @@ def context_contain?(value)
12181218

12191219
# Checks if the character signifies the start of a string or literal
12201220
def string_start?(char)
1221-
STRING_DELIMITERS.include?(char) || char&.match?(/\p{L}/)
1221+
STRING_DELIMITERS.include?(char) || char&.match?(/[\p{L}$_]/)
12221222
end
12231223

12241224
# Checks if the character signifies the start of a number

spec/json_mend_spec.rb

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,123 @@
12041204
end
12051205
end
12061206

1207+
context 'when fast-path for unquoted keys' do
1208+
[
1209+
{
1210+
input: '{ simple: "val" }',
1211+
expected_output: JSON.dump({ 'simple' => 'val' }),
1212+
desc: 'simple alphabetic unquoted key'
1213+
},
1214+
{
1215+
input: '{ my_var_name: "val" }',
1216+
expected_output: JSON.dump({ 'my_var_name' => 'val' }),
1217+
desc: 'underscored identifier'
1218+
},
1219+
{
1220+
input: '{ $special-var_1: "val" }',
1221+
expected_output: JSON.dump({ '$special-var_1' => 'val' }),
1222+
desc: 'special characters ($, -) allowed in fast-path regex'
1223+
},
1224+
{
1225+
input: '{ key1: "v1", key2: "v2", key3: "v3" }',
1226+
expected_output: JSON.dump({ 'key1' => 'v1', 'key2' => 'v2', 'key3' => 'v3' }),
1227+
desc: 'sequence of fast-path keys'
1228+
},
1229+
{
1230+
input: '{veryLongVariableNameThatShouldBeScannedInOneGo: true}',
1231+
expected_output: JSON.dump({ 'veryLongVariableNameThatShouldBeScannedInOneGo' => true }),
1232+
desc: 'long key triggering chunk scan'
1233+
},
1234+
{
1235+
input: '{ key:val }',
1236+
expected_output: JSON.dump({ 'key' => 'val' }),
1237+
desc: 'unquoted key and unquoted value'
1238+
}
1239+
].each do |tc|
1240+
it "correctly parses #{tc[:desc]}" do
1241+
expect(described_class.repair(tc[:input])).to eq(tc[:expected_output])
1242+
end
1243+
end
1244+
end
1245+
1246+
context 'when parse_number optimization and rewind logic' do
1247+
[
1248+
{
1249+
input: '{"a":1,"b":2}',
1250+
expected_output: JSON.dump({ 'a' => 1, 'b' => 2 }),
1251+
desc: 'compact JSON with comma immediately following number'
1252+
},
1253+
{
1254+
input: '{"a":123,"b":456}',
1255+
expected_output: JSON.dump({ 'a' => 123, 'b' => 456 }),
1256+
desc: 'compact JSON with multi-digit numbers'
1257+
},
1258+
{
1259+
input: '{"float":1.5,"int":1}',
1260+
expected_output: JSON.dump({ 'float' => 1.5, 'int' => 1 }),
1261+
desc: 'compact JSON with mixed number types'
1262+
},
1263+
{
1264+
input: '[1,2,3,4]',
1265+
expected_output: JSON.dump([1, 2, 3, 4]),
1266+
desc: 'compact array with numbers'
1267+
},
1268+
{
1269+
input: '{"a": 1, "b": 2}',
1270+
expected_output: JSON.dump({ 'a' => 1, 'b' => 2 }),
1271+
desc: 'standard spacing (boundary check)'
1272+
},
1273+
{
1274+
input: '{"key": 1e5,}',
1275+
expected_output: JSON.dump({ 'key' => 100_000.0 }),
1276+
desc: 'scientific notation followed by comma'
1277+
},
1278+
{
1279+
input: '{"key": 123-}',
1280+
expected_output: JSON.dump({ 'key' => 123 }),
1281+
desc: 'number with invalid trailer needing strip and rewind'
1282+
}
1283+
].each do |tc|
1284+
it "correctly handles #{tc[:desc]}" do
1285+
expect(described_class.repair(tc[:input])).to eq(tc[:expected_output])
1286+
end
1287+
end
1288+
end
1289+
1290+
context 'when peek_char unicode stability' do
1291+
[
1292+
{
1293+
input: '{"ascii": 1, "uni\u00f6": 2}',
1294+
expected_output: JSON.dump({ 'ascii' => 1, 'uniö' => 2 }),
1295+
desc: 'mixed ASCII and Unicode escape'
1296+
},
1297+
{
1298+
input: '{"👍": "thumbs_up"}',
1299+
expected_output: JSON.dump({ '👍' => 'thumbs_up' }),
1300+
desc: 'multibyte emoji as key'
1301+
},
1302+
{
1303+
input: '{"“smart”": "quotes"}',
1304+
expected_output: JSON.dump({ '“smart”' => 'quotes' }),
1305+
desc: 'multibyte smart quotes as key'
1306+
},
1307+
{
1308+
input: '{"key": "value with — dash"}',
1309+
expected_output: JSON.dump({ 'key' => 'value with — dash' }),
1310+
desc: 'multibyte char in value'
1311+
},
1312+
{
1313+
input: '{"Український": "text"}',
1314+
expected_output: JSON.dump({ 'Український' => 'text' }),
1315+
desc: 'Cyrillic characters'
1316+
}
1317+
].each do |tc|
1318+
it "correctly parses #{tc[:desc]}" do
1319+
expect(described_class.repair(tc[:input])).to eq(tc[:expected_output])
1320+
end
1321+
end
1322+
end
1323+
12071324
context 'with valid JSON (direct parser usage)' do
12081325
[
12091326
{

0 commit comments

Comments
 (0)