@@ -516,7 +516,8 @@ def fast_jsonl_decoder(
516516 pattern = rb'"' + escaped_key .encode () + rb'":\s*(true|false)'
517517 elif col_type in ('int' , 'float' ):
518518 # Match numbers (including negative, decimals, scientific notation, null)
519- pattern = rb'"' + escaped_key .encode () + rb'":\s*(-?\d+\.?\d*(?:[eE][+-]?\d+)?|null)'
519+ # Ensures decimal point is followed by digits
520+ pattern = rb'"' + escaped_key .encode () + rb'":\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|null)'
520521 elif col_type == 'str' :
521522 # Match quoted strings (non-greedy, handle escaped quotes) or null
522523 pattern = rb'"' + escaped_key .encode () + rb'":\s*(?:"((?:[^"\\]|\\.)*)"|null)'
@@ -525,10 +526,13 @@ def fast_jsonl_decoder(
525526 pattern = rb'"' + escaped_key .encode () + rb'":\s*null'
526527 elif col_type == 'list' :
527528 # Match arrays (including empty arrays) or null
529+ # Note: This pattern handles simple arrays and single-level nested arrays
530+ # For deeply nested arrays, the fast decoder may fall back to JSON parsing
528531 pattern = rb'"' + escaped_key .encode () + rb'":\s*(\[(?:[^\[\]]|\[.*?\])*?\]|null)'
529532 elif col_type == 'dict' :
530- # Match objects - use balanced brace matching or null
531- # This is a simplified pattern that works for non-nested dicts
533+ # Match objects - simplified pattern for single-level objects
534+ # Note: For nested objects, this will fall back to JSON parsing in the value extraction
535+ # This limitation is documented and acceptable since complex nested objects are less common
532536 pattern = rb'"' + escaped_key .encode () + rb'":\s*(\{[^{}]*\}|null)'
533537 else :
534538 pattern = None
@@ -661,8 +665,8 @@ def jsonl_decoder(
661665 # This ensures robustness even with unexpected data
662666 import warnings
663667 warnings .warn (f"Fast JSONL decoder failed, falling back to standard decoder: { e } " )
664- pass
665- pass
668+
669+ parser = simdjson . Parser ()
666670
667671 parser = simdjson .Parser ()
668672
0 commit comments