Skip to content

Commit 51411af

Browse files
authored
feat: add streaming mode for large files (#58)
Adds --stream flag for element-by-element processing of JSON arrays, JSONL, and CSV input. In streaming mode, each element is parsed, optionally transformed via mappings, and written to the output format immediately rather than buffering the entire dataset in memory. Supported streaming pipelines: - JSONL → JSONL/JSON/CSV (true line-by-line streaming) - CSV → JSONL/JSON/CSV (true row-by-row streaming) - JSON array → JSONL/JSON/CSV (parse + stream output) Features: - StreamWriter abstraction for incremental output - Mapping support (per-element transforms via -e/-m) - CSV delimiter support in streaming mode - Graceful fallback: --stream is silently ignored for unsupported format combinations (e.g. YAML→JSON) - Periodic flushing via BufWriter Includes 24 unit tests and 7 integration tests covering all streaming combinations, edge cases, and mapping integration. Fixes #29
1 parent ca37b8a commit 51411af

File tree

4 files changed

+853
-0
lines changed

4 files changed

+853
-0
lines changed

src/cli.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ pub struct Cli {
150150
/// Treat YAML input as multi-document (return array of documents)
151151
#[arg(long = "yaml-multi")]
152152
pub yaml_multi: bool,
153+
154+
/// Enable streaming mode for large files (processes elements one at a time)
155+
#[arg(long = "stream")]
156+
pub stream: bool,
153157
}
154158

155159
impl Cli {
@@ -419,6 +423,11 @@ pub fn run(cli: &Cli) -> crate::error::Result<()> {
419423
let in_fmt = cli.resolve_input_format()?;
420424
let out_fmt = cli.resolve_output_format()?;
421425

426+
// Streaming mode: process elements one at a time for large files
427+
if cli.stream && crate::streaming::can_stream(in_fmt, out_fmt) {
428+
return crate::streaming::run_streaming(cli, in_fmt, out_fmt, mapping_program.as_ref());
429+
}
430+
422431
let input_data = read_input(cli)?;
423432
let value = parse_input_with_cli(&input_data, in_fmt, Some(cli))?;
424433

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ pub mod cli;
22
pub mod error;
33
pub mod formats;
44
pub mod mapping;
5+
pub mod streaming;
56
pub mod value;

0 commit comments

Comments
 (0)