Skip to content

Commit fde1385

Browse files
authored
fix(decode): Handle unquoted strings with parentheses and preserve spacing (#34)
Fix parsing of multi-word strings containing parentheses (e.g., "Mostly Functions (3 of 3)", "0(f)") by switching to line-based value parsing. Eliminate extra spaces in decoded output and optimize single-token parsing. Add comprehensive tests for edge cases.
1 parent 98c38b9 commit fde1385

File tree

2 files changed

+384
-34
lines changed

2 files changed

+384
-34
lines changed

src/decode/parser.rs

Lines changed: 173 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ impl<'a> Parser<'a> {
182182
));
183183
}
184184

185-
// Multiple consecutive string tokens get joined with spaces
185+
// Root-level string value - join consecutive tokens
186186
let mut accumulated = first;
187187
while let Token::String(next, _) = &self.current_token {
188188
if !accumulated.is_empty() {
@@ -401,7 +401,6 @@ impl<'a> Parser<'a> {
401401
validate_depth(depth, MAX_DEPTH)?;
402402

403403
if matches!(self.current_token, Token::Newline | Token::Eof) {
404-
// After a colon on a new line, check if there are indented children
405404
let has_children = if matches!(self.current_token, Token::Newline) {
406405
let current_depth_indent = self.options.indent.get_spaces() * (depth + 1);
407406
let next_indent = self.scanner.count_leading_spaces();
@@ -413,11 +412,85 @@ impl<'a> Parser<'a> {
413412
if has_children {
414413
self.parse_value_with_depth(depth + 1)
415414
} else {
416-
// Empty object when colon is followed by newline with no children
417415
Ok(Value::Object(Map::new()))
418416
}
419-
} else {
417+
} else if matches!(self.current_token, Token::LeftBracket) {
420418
self.parse_value_with_depth(depth + 1)
419+
} else {
420+
// Check if there's more content after the current token
421+
let (rest, had_space) = self.scanner.read_rest_of_line_with_space_info();
422+
423+
let result = if rest.is_empty() {
424+
// Single token - convert directly to avoid redundant parsing
425+
match &self.current_token {
426+
Token::String(s, _) => Ok(Value::String(s.clone())),
427+
Token::Integer(i) => Ok(serde_json::Number::from(*i).into()),
428+
Token::Number(n) => {
429+
let val = *n;
430+
if val.is_finite() && val.fract() == 0.0 && val.abs() <= i64::MAX as f64 {
431+
Ok(serde_json::Number::from(val as i64).into())
432+
} else {
433+
Ok(serde_json::Number::from_f64(val)
434+
.ok_or_else(|| {
435+
ToonError::InvalidInput(format!("Invalid number: {val}"))
436+
})?
437+
.into())
438+
}
439+
}
440+
Token::Bool(b) => Ok(Value::Bool(*b)),
441+
Token::Null => Ok(Value::Null),
442+
_ => Err(self.parse_error_with_context("Unexpected token after colon")),
443+
}
444+
} else {
445+
// Multi-token value - reconstruct and re-parse as complete string
446+
let mut value_str = String::new();
447+
448+
match &self.current_token {
449+
Token::String(s, true) => {
450+
// Quoted strings need quotes preserved for re-parsing
451+
value_str.push('"');
452+
value_str.push_str(&crate::utils::escape_string(s));
453+
value_str.push('"');
454+
}
455+
Token::String(s, false) => value_str.push_str(s),
456+
Token::Integer(i) => value_str.push_str(&i.to_string()),
457+
Token::Number(n) => value_str.push_str(&n.to_string()),
458+
Token::Bool(b) => value_str.push_str(if *b { "true" } else { "false" }),
459+
Token::Null => value_str.push_str("null"),
460+
_ => {
461+
return Err(self.parse_error_with_context("Unexpected token after colon"));
462+
}
463+
}
464+
465+
// Only add space if there was whitespace in the original input
466+
if had_space {
467+
value_str.push(' ');
468+
}
469+
value_str.push_str(&rest);
470+
471+
let token = self.scanner.parse_value_string(&value_str)?;
472+
match token {
473+
Token::String(s, _) => Ok(Value::String(s)),
474+
Token::Integer(i) => Ok(serde_json::Number::from(i).into()),
475+
Token::Number(n) => {
476+
if n.is_finite() && n.fract() == 0.0 && n.abs() <= i64::MAX as f64 {
477+
Ok(serde_json::Number::from(n as i64).into())
478+
} else {
479+
Ok(serde_json::Number::from_f64(n)
480+
.ok_or_else(|| {
481+
ToonError::InvalidInput(format!("Invalid number: {n}"))
482+
})?
483+
.into())
484+
}
485+
}
486+
Token::Bool(b) => Ok(Value::Bool(b)),
487+
Token::Null => Ok(Value::Null),
488+
_ => Err(ToonError::InvalidInput("Unexpected token type".to_string())),
489+
}
490+
}?;
491+
492+
self.current_token = self.scanner.scan_token()?;
493+
Ok(result)
421494
}
422495
}
423496

@@ -891,16 +964,8 @@ impl<'a> Parser<'a> {
891964
obj.insert(key, array_value);
892965
Value::Object(obj)
893966
} else {
894-
// Plain string value - join consecutive string tokens
895-
let mut accumulated = key;
896-
while let Token::String(next, _) = &self.current_token {
897-
if !accumulated.is_empty() {
898-
accumulated.push(' ');
899-
}
900-
accumulated.push_str(next);
901-
self.advance()?;
902-
}
903-
Value::String(accumulated)
967+
// Plain string value
968+
Value::String(key)
904969
}
905970
} else {
906971
self.parse_primitive()?
@@ -1259,7 +1324,6 @@ mod tests {
12591324
let mut parser = Parser::new(input, opts).unwrap();
12601325
let result = parser.parse().unwrap();
12611326

1262-
// Expected: {"a":{"b":{"c":1},"d":2}}
12631327
let a = result.as_object().unwrap().get("a").unwrap();
12641328
let a_obj = a.as_object().unwrap();
12651329

@@ -1272,4 +1336,98 @@ mod tests {
12721336
assert!(b.contains_key("c"), "b should have key 'c'");
12731337
assert!(!b.contains_key("d"), "b should NOT have key 'd'");
12741338
}
1339+
1340+
#[test]
1341+
fn test_field_value_with_parentheses() {
1342+
let result = parse("msg: Mostly Functions (3 of 3)").unwrap();
1343+
assert_eq!(result, json!({"msg": "Mostly Functions (3 of 3)"}));
1344+
1345+
let result = parse("val: (hello)").unwrap();
1346+
assert_eq!(result, json!({"val": "(hello)"}));
1347+
1348+
let result = parse("test: a (b) c (d)").unwrap();
1349+
assert_eq!(result, json!({"test": "a (b) c (d)"}));
1350+
}
1351+
1352+
#[test]
1353+
fn test_field_value_number_with_parentheses() {
1354+
let result = parse("code: 0(f)").unwrap();
1355+
assert_eq!(result, json!({"code": "0(f)"}));
1356+
1357+
let result = parse("val: 5(test)").unwrap();
1358+
assert_eq!(result, json!({"val": "5(test)"}));
1359+
1360+
let result = parse("msg: test 123)").unwrap();
1361+
assert_eq!(result, json!({"msg": "test 123)"}));
1362+
}
1363+
1364+
#[test]
1365+
fn test_field_value_single_token_optimization() {
1366+
let result = parse("name: hello").unwrap();
1367+
assert_eq!(result, json!({"name": "hello"}));
1368+
1369+
let result = parse("age: 42").unwrap();
1370+
assert_eq!(result, json!({"age": 42}));
1371+
1372+
let result = parse("active: true").unwrap();
1373+
assert_eq!(result, json!({"active": true}));
1374+
1375+
let result = parse("value: null").unwrap();
1376+
assert_eq!(result, json!({"value": null}));
1377+
}
1378+
1379+
#[test]
1380+
fn test_field_value_multi_token() {
1381+
let result = parse("msg: hello world").unwrap();
1382+
assert_eq!(result, json!({"msg": "hello world"}));
1383+
1384+
let result = parse("msg: test 123 end").unwrap();
1385+
assert_eq!(result, json!({"msg": "test 123 end"}));
1386+
}
1387+
1388+
#[test]
1389+
fn test_field_value_spacing_preserved() {
1390+
let result = parse("val: hello world").unwrap();
1391+
assert_eq!(result, json!({"val": "hello world"}));
1392+
1393+
let result = parse("val: 0(f)").unwrap();
1394+
assert_eq!(result, json!({"val": "0(f)"}));
1395+
}
1396+
1397+
#[test]
1398+
fn test_round_trip_parentheses() {
1399+
use crate::{
1400+
decode::decode_default,
1401+
encode::encode_default,
1402+
};
1403+
1404+
let original = json!({
1405+
"message": "Mostly Functions (3 of 3)",
1406+
"code": "0(f)",
1407+
"simple": "(hello)",
1408+
"mixed": "test 123)"
1409+
});
1410+
1411+
let encoded = encode_default(&original).unwrap();
1412+
let decoded: Value = decode_default(&encoded).unwrap();
1413+
1414+
assert_eq!(original, decoded);
1415+
}
1416+
1417+
#[test]
1418+
fn test_multiple_fields_with_edge_cases() {
1419+
let input = r#"message: Mostly Functions (3 of 3)
1420+
sone: (hello)
1421+
hello: 0(f)"#;
1422+
1423+
let result = parse(input).unwrap();
1424+
assert_eq!(
1425+
result,
1426+
json!({
1427+
"message": "Mostly Functions (3 of 3)",
1428+
"sone": "(hello)",
1429+
"hello": "0(f)"
1430+
})
1431+
);
1432+
}
12751433
}

0 commit comments

Comments
 (0)