Skip to content

Commit b383354

Browse files
Copilotjoocer
andcommitted
Fix code review issues: remove redundant pass, improve number regex, document limitations
Co-authored-by: joocer <[email protected]>
1 parent c7d59f1 commit b383354

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

opteryx/utils/file_decoders.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,8 @@ def fast_jsonl_decoder(
516516
pattern = rb'"' + escaped_key.encode() + rb'":\s*(true|false)'
517517
elif col_type in ('int', 'float'):
518518
# Match numbers (including negative, decimals, scientific notation, null)
519-
pattern = rb'"' + escaped_key.encode() + rb'":\s*(-?\d+\.?\d*(?:[eE][+-]?\d+)?|null)'
519+
# Ensures decimal point is followed by digits
520+
pattern = rb'"' + escaped_key.encode() + rb'":\s*(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?|null)'
520521
elif col_type == 'str':
521522
# Match quoted strings (non-greedy, handle escaped quotes) or null
522523
pattern = rb'"' + escaped_key.encode() + rb'":\s*(?:"((?:[^"\\]|\\.)*)"|null)'
@@ -525,10 +526,13 @@ def fast_jsonl_decoder(
525526
pattern = rb'"' + escaped_key.encode() + rb'":\s*null'
526527
elif col_type == 'list':
527528
# Match arrays (including empty arrays) or null
529+
# Note: This pattern handles simple arrays and single-level nested arrays
530+
# For deeply nested arrays, the fast decoder may fall back to JSON parsing
528531
pattern = rb'"' + escaped_key.encode() + rb'":\s*(\[(?:[^\[\]]|\[.*?\])*?\]|null)'
529532
elif col_type == 'dict':
530-
# Match objects - use balanced brace matching or null
531-
# This is a simplified pattern that works for non-nested dicts
533+
# Match objects - simplified pattern for single-level objects
534+
# Note: For nested objects, this will fall back to JSON parsing in the value extraction
535+
# This limitation is documented and acceptable since complex nested objects are less common
532536
pattern = rb'"' + escaped_key.encode() + rb'":\s*(\{[^{}]*\}|null)'
533537
else:
534538
pattern = None
@@ -661,8 +665,8 @@ def jsonl_decoder(
661665
# This ensures robustness even with unexpected data
662666
import warnings
663667
warnings.warn(f"Fast JSONL decoder failed, falling back to standard decoder: {e}")
664-
pass
665-
pass
668+
669+
parser = simdjson.Parser()
666670

667671
parser = simdjson.Parser()
668672

0 commit comments

Comments
 (0)