@@ -149,9 +149,34 @@ impl<'a> Parser<'a> {
149149 self . advance ( ) ?;
150150 self . parse_object_with_initial_key ( key, depth)
151151 } else {
152+ let first_text = self . scanner . last_token_text ( ) . to_string ( ) ;
152153 let val = * i;
153154 self . advance ( ) ?;
154- Ok ( serde_json:: Number :: from ( val) . into ( ) )
155+ // Check if followed by more value tokens on the same line
156+ match & self . current_token {
157+ Token :: String ( ..)
158+ | Token :: Integer ( ..)
159+ | Token :: Number ( ..)
160+ | Token :: Bool ( ..)
161+ | Token :: Null => {
162+ let mut accumulated = first_text;
163+ while let Token :: String ( ..)
164+ | Token :: Integer ( ..)
165+ | Token :: Number ( ..)
166+ | Token :: Bool ( ..)
167+ | Token :: Null = & self . current_token
168+ {
169+ let ws = self . scanner . last_whitespace_count ( ) . max ( 1 ) ;
170+ for _ in 0 ..ws {
171+ accumulated. push ( ' ' ) ;
172+ }
173+ accumulated. push_str ( self . scanner . last_token_text ( ) ) ;
174+ self . advance ( ) ?;
175+ }
176+ Ok ( Value :: String ( accumulated) )
177+ }
178+ _ => Ok ( serde_json:: Number :: from ( val) . into ( ) ) ,
179+ }
155180 }
156181 }
157182 Token :: Number ( n) => {
@@ -161,17 +186,45 @@ impl<'a> Parser<'a> {
161186 self . advance ( ) ?;
162187 self . parse_object_with_initial_key ( key, depth)
163188 } else {
189+ let first_text = self . scanner . last_token_text ( ) . to_string ( ) ;
164190 let val = * n;
165191 self . advance ( ) ?;
166- // Normalize floats that are actually integers
167- if val. is_finite ( ) && val. fract ( ) == 0.0 && val. abs ( ) <= i64:: MAX as f64 {
168- Ok ( serde_json:: Number :: from ( val as i64 ) . into ( ) )
169- } else {
170- Ok ( serde_json:: Number :: from_f64 ( val)
171- . ok_or_else ( || {
172- ToonError :: InvalidInput ( format ! ( "Invalid number: {val}" ) )
173- } ) ?
174- . into ( ) )
192+ // Check if followed by more value tokens on the same line
193+ match & self . current_token {
194+ Token :: String ( ..)
195+ | Token :: Integer ( ..)
196+ | Token :: Number ( ..)
197+ | Token :: Bool ( ..)
198+ | Token :: Null => {
199+ let mut accumulated = first_text;
200+ while let Token :: String ( ..)
201+ | Token :: Integer ( ..)
202+ | Token :: Number ( ..)
203+ | Token :: Bool ( ..)
204+ | Token :: Null = & self . current_token
205+ {
206+ let ws = self . scanner . last_whitespace_count ( ) . max ( 1 ) ;
207+ for _ in 0 ..ws {
208+ accumulated. push ( ' ' ) ;
209+ }
210+ accumulated. push_str ( self . scanner . last_token_text ( ) ) ;
211+ self . advance ( ) ?;
212+ }
213+ Ok ( Value :: String ( accumulated) )
214+ }
215+ _ => {
216+ // Normalize floats that are actually integers
217+ if val. is_finite ( ) && val. fract ( ) == 0.0 && val. abs ( ) <= i64:: MAX as f64
218+ {
219+ Ok ( serde_json:: Number :: from ( val as i64 ) . into ( ) )
220+ } else {
221+ Ok ( serde_json:: Number :: from_f64 ( val)
222+ . ok_or_else ( || {
223+ ToonError :: InvalidInput ( format ! ( "Invalid number: {val}" ) )
224+ } ) ?
225+ . into ( ) )
226+ }
227+ }
175228 }
176229 }
177230 }
@@ -197,13 +250,22 @@ impl<'a> Parser<'a> {
197250 ) ) ;
198251 }
199252
200- // Root-level string value - join consecutive tokens
253+ if matches ! ( self . current_token, Token :: Newline | Token :: Eof ) {
254+ return Ok ( Value :: String ( first) ) ;
255+ }
256+ // Root-level string value - join consecutive tokens with exact spacing
201257 let mut accumulated = first;
202- while let Token :: String ( next, _) = & self . current_token {
203- if !accumulated. is_empty ( ) {
258+ while let Token :: String ( ..)
259+ | Token :: Integer ( ..)
260+ | Token :: Number ( ..)
261+ | Token :: Bool ( ..)
262+ | Token :: Null = & self . current_token
263+ {
264+ let ws = self . scanner . last_whitespace_count ( ) . max ( 1 ) ;
265+ for _ in 0 ..ws {
204266 accumulated. push ( ' ' ) ;
205267 }
206- accumulated. push_str ( next ) ;
268+ accumulated. push_str ( self . scanner . last_token_text ( ) ) ;
207269 self . advance ( ) ?;
208270 }
209271 Ok ( Value :: String ( accumulated) )
@@ -433,9 +495,10 @@ impl<'a> Parser<'a> {
433495 self . parse_value_with_depth ( depth + 1 )
434496 } else {
435497 // Check if there's more content after the current token
436- let ( rest, had_space) = self . scanner . read_rest_of_line_with_space_info ( ) ;
498+ let token_text = self . scanner . last_token_text ( ) . to_string ( ) ;
499+ let ( rest, space_count) = self . scanner . read_rest_of_line_with_space_info ( ) ;
437500
438- let result = if rest. is_empty ( ) {
501+ let result = if rest. is_empty ( ) && space_count == 0 {
439502 // Single token - convert directly to avoid redundant parsing
440503 match & self . current_token {
441504 Token :: String ( s, _) => Ok ( Value :: String ( s. clone ( ) ) ) ,
@@ -457,28 +520,24 @@ impl<'a> Parser<'a> {
457520 _ => Err ( self . parse_error_with_context ( "Unexpected token after colon" ) ) ,
458521 }
459522 } else {
460- // Multi-token value - reconstruct and re-parse as complete string
461- let mut value_str = String :: new ( ) ;
462-
463- match & self . current_token {
464- Token :: String ( s, true ) => {
465- // Quoted strings need quotes preserved for re-parsing
466- value_str. push ( '"' ) ;
467- value_str. push_str ( & crate :: utils:: escape_string ( s) ) ;
468- value_str. push ( '"' ) ;
523+ // Multi-token value - reconstruct using original token text and re-parse
524+ let mut value_str = match & self . current_token {
525+ Token :: String ( _, true ) => {
526+ // Quoted strings: use last_token_text which includes quotes
527+ token_text. clone ( )
469528 }
470- Token :: String ( s , false ) => value_str . push_str ( s ) ,
471- Token :: Integer ( i ) => value_str . push_str ( & i . to_string ( ) ) ,
472- Token :: Number ( n ) => value_str . push_str ( & n . to_string ( ) ) ,
473- Token :: Bool ( b ) => value_str . push_str ( if * b { "true" } else { "false" } ) ,
474- Token :: Null => value_str . push_str ( "null" ) ,
529+ Token :: String ( _ , false )
530+ | Token :: Integer ( _ )
531+ | Token :: Number ( _ )
532+ | Token :: Bool ( _ )
533+ | Token :: Null => token_text . clone ( ) ,
475534 _ => {
476535 return Err ( self . parse_error_with_context ( "Unexpected token after colon" ) ) ;
477536 }
478- }
537+ } ;
479538
480- // Only add space if there was whitespace in the original input
481- if had_space {
539+ // Preserve exact spacing from the original input
540+ for _ in 0 ..space_count {
482541 value_str. push ( ' ' ) ;
483542 }
484543 value_str. push_str ( & rest) ;
@@ -1112,71 +1171,65 @@ impl<'a> Parser<'a> {
11121171 }
11131172
11141173 fn parse_tabular_field_value ( & mut self ) -> ToonResult < Value > {
1115- match & self . current_token {
1116- Token :: Null => {
1117- self . advance ( ) ?;
1118- Ok ( Value :: Null )
1119- }
1120- Token :: Bool ( b) => {
1121- let val = * b;
1122- self . advance ( ) ?;
1123- Ok ( Value :: Bool ( val) )
1124- }
1125- Token :: Integer ( i) => {
1126- let val = * i;
1127- self . advance ( ) ?;
1128- // If followed by string tokens, treat the whole value as a string
1129- if let Token :: String ( ..) = & self . current_token {
1130- let mut accumulated = val. to_string ( ) ;
1131- while let Token :: String ( next, _) = & self . current_token {
1132- accumulated. push ( ' ' ) ;
1133- accumulated. push_str ( next) ;
1134- self . advance ( ) ?;
1135- }
1136- Ok ( Value :: String ( accumulated) )
1137- } else {
1138- Ok ( Number :: from ( val) . into ( ) )
1139- }
1140- }
1141- Token :: Number ( n) => {
1142- let val = * n;
1143- self . advance ( ) ?;
1144- // If followed by string tokens, treat the whole value as a string
1145- if let Token :: String ( ..) = & self . current_token {
1146- let mut accumulated = val. to_string ( ) ;
1147- while let Token :: String ( next, _) = & self . current_token {
1148- accumulated. push ( ' ' ) ;
1149- accumulated. push_str ( next) ;
1150- self . advance ( ) ?;
1174+ // Get the original text of the current token
1175+ let token_text = self . scanner . last_token_text ( ) . to_string ( ) ;
1176+
1177+ // Read remaining text until delimiter/newline/EOF
1178+ let ( rest, space_count) = self . scanner . read_until_delimiter_with_space_info ( ) ;
1179+
1180+ if rest. is_empty ( ) && space_count == 0 {
1181+ // Single token — handle as primitive directly
1182+ let result = match & self . current_token {
1183+ Token :: Null => Ok ( Value :: Null ) ,
1184+ Token :: Bool ( b) => Ok ( Value :: Bool ( * b) ) ,
1185+ Token :: Integer ( i) => Ok ( Number :: from ( * i) . into ( ) ) ,
1186+ Token :: Number ( n) => {
1187+ let val = * n;
1188+ if val. is_finite ( ) && val. fract ( ) == 0.0 && val. abs ( ) <= i64:: MAX as f64 {
1189+ Ok ( Number :: from ( val as i64 ) . into ( ) )
1190+ } else {
1191+ Ok ( Number :: from_f64 ( val)
1192+ . ok_or_else ( || {
1193+ ToonError :: InvalidInput ( format ! ( "Invalid number: {val}" ) )
1194+ } ) ?
1195+ . into ( ) )
11511196 }
1152- Ok ( Value :: String ( accumulated) )
1153- } else if val. is_finite ( ) && val. fract ( ) == 0.0 && val. abs ( ) <= i64:: MAX as f64 {
1154- Ok ( Number :: from ( val as i64 ) . into ( ) )
1155- } else {
1156- Ok ( Number :: from_f64 ( val)
1157- . ok_or_else ( || ToonError :: InvalidInput ( format ! ( "Invalid number: {val}" ) ) ) ?
1158- . into ( ) )
11591197 }
1198+ Token :: String ( s, _) => Ok ( Value :: String ( s. clone ( ) ) ) ,
1199+ _ => Err ( self . parse_error_with_context ( format ! (
1200+ "Expected primitive value, found {:?}" ,
1201+ self . current_token
1202+ ) ) ) ,
1203+ } ;
1204+ self . advance ( ) ?;
1205+ result
1206+ } else {
1207+ // Multiple tokens — combine original text + spaces + rest, then type-infer
1208+ let mut value_str = token_text;
1209+ for _ in 0 ..space_count {
1210+ value_str. push ( ' ' ) ;
11601211 }
1161- Token :: String ( s, _) => {
1162- // Tabular fields can have multiple string tokens joined with spaces
1163- let mut accumulated = s. clone ( ) ;
1164- self . advance ( ) ?;
1212+ value_str. push_str ( & rest) ;
11651213
1166- while let Token :: String ( next, _) = & self . current_token {
1167- if !accumulated. is_empty ( ) {
1168- accumulated. push ( ' ' ) ;
1214+ let token = self . scanner . parse_value_string ( & value_str) ?;
1215+ // Rescan so current_token is positioned at the next delimiter/newline
1216+ self . current_token = self . scanner . scan_token ( ) ?;
1217+ match token {
1218+ Token :: String ( s, _) => Ok ( Value :: String ( s) ) ,
1219+ Token :: Integer ( i) => Ok ( Number :: from ( i) . into ( ) ) ,
1220+ Token :: Number ( n) => {
1221+ if n. is_finite ( ) && n. fract ( ) == 0.0 && n. abs ( ) <= i64:: MAX as f64 {
1222+ Ok ( Number :: from ( n as i64 ) . into ( ) )
1223+ } else {
1224+ Ok ( Number :: from_f64 ( n)
1225+ . ok_or_else ( || ToonError :: InvalidInput ( format ! ( "Invalid number: {n}" ) ) ) ?
1226+ . into ( ) )
11691227 }
1170- accumulated. push_str ( next) ;
1171- self . advance ( ) ?;
11721228 }
1173-
1174- Ok ( Value :: String ( accumulated) )
1229+ Token :: Bool ( b) => Ok ( Value :: Bool ( b) ) ,
1230+ Token :: Null => Ok ( Value :: Null ) ,
1231+ _ => Err ( ToonError :: InvalidInput ( "Unexpected token type" . to_string ( ) ) ) ,
11751232 }
1176- _ => Err ( self . parse_error_with_context ( format ! (
1177- "Expected primitive value, found {:?}" ,
1178- self . current_token
1179- ) ) ) ,
11801233 }
11811234 }
11821235
@@ -1695,7 +1748,7 @@ hello: 0(f)"#;
16951748 // Issue #56: Array elements starting with a number should be parsed as string
16961749 // when followed by non-numeric text
16971750 let result = parse ( "version1[1]: 1.0 something" ) . unwrap ( ) ;
1698- assert_eq ! ( result[ "version1" ] , json!( [ "1 something" ] ) ) ;
1751+ assert_eq ! ( result[ "version1" ] , json!( [ "1.0 something" ] ) ) ;
16991752
17001753 let result = parse ( "data[1]: 42 units" ) . unwrap ( ) ;
17011754 assert_eq ! ( result[ "data" ] , json!( [ "42 units" ] ) ) ;
@@ -1707,4 +1760,64 @@ hello: 0(f)"#;
17071760 let result = parse ( "nums[1]: 2.75" ) . unwrap ( ) ;
17081761 assert_eq ! ( result[ "nums" ] , json!( [ 2.75 ] ) ) ;
17091762 }
1763+
1764+ #[ test]
1765+ fn test_issue_59_multiple_spaces_preserved ( ) {
1766+ // Issue #59: Multiple spaces between words should be preserved
1767+ // Field value context
1768+ let result = parse ( "key: a b" ) . unwrap ( ) ;
1769+ assert_eq ! ( result[ "key" ] , json!( "a b" ) ) ;
1770+
1771+ // Tabular cell context
1772+ let result = parse ( "data[2]: a b, c d" ) . unwrap ( ) ;
1773+ assert_eq ! ( result[ "data" ] , json!( [ "a b" , "c d" ] ) ) ;
1774+
1775+ // Root-level value
1776+ let result = parse ( "a b" ) . unwrap ( ) ;
1777+ assert_eq ! ( result, json!( "a b" ) ) ;
1778+ }
1779+
1780+ #[ test]
1781+ fn test_issue_60_mixed_type_tokens_as_string ( ) {
1782+ // Issue #60: "1 null" and "a 1" should parse as strings in tabular rows
1783+ // Tabular cell context
1784+ let result = parse ( "data[2]: 1 null, a 1" ) . unwrap ( ) ;
1785+ assert_eq ! ( result[ "data" ] , json!( [ "1 null" , "a 1" ] ) ) ;
1786+
1787+ // Root-level value
1788+ let result = parse ( "1 null" ) . unwrap ( ) ;
1789+ assert_eq ! ( result, json!( "1 null" ) ) ;
1790+
1791+ let result = parse ( "a 1" ) . unwrap ( ) ;
1792+ assert_eq ! ( result, json!( "a 1" ) ) ;
1793+
1794+ // Field value context
1795+ let result = parse ( "key: 1 null" ) . unwrap ( ) ;
1796+ assert_eq ! ( result[ "key" ] , json!( "1 null" ) ) ;
1797+
1798+ let result = parse ( "key: a 1" ) . unwrap ( ) ;
1799+ assert_eq ! ( result[ "key" ] , json!( "a 1" ) ) ;
1800+ }
1801+
1802+ #[ test]
1803+ fn test_issue_61_number_format_preserved ( ) {
1804+ // Issue #61: "1.0 b" should preserve "1.0", not become "1 b"
1805+ // Tabular cell context
1806+ let result = parse ( "data[2]: 1.0 b, 1e1 b" ) . unwrap ( ) ;
1807+ assert_eq ! ( result[ "data" ] , json!( [ "1.0 b" , "1e1 b" ] ) ) ;
1808+
1809+ // Field value context
1810+ let result = parse ( "key: 1.0 b" ) . unwrap ( ) ;
1811+ assert_eq ! ( result[ "key" ] , json!( "1.0 b" ) ) ;
1812+
1813+ let result = parse ( "key: 1e1 b" ) . unwrap ( ) ;
1814+ assert_eq ! ( result[ "key" ] , json!( "1e1 b" ) ) ;
1815+
1816+ // Root-level value
1817+ let result = parse ( "1.0 b" ) . unwrap ( ) ;
1818+ assert_eq ! ( result, json!( "1.0 b" ) ) ;
1819+
1820+ let result = parse ( "1e1 b" ) . unwrap ( ) ;
1821+ assert_eq ! ( result, json!( "1e1 b" ) ) ;
1822+ }
17101823}
0 commit comments