Skip to content

Commit 38288f5

Browse files
jqnatividadclaude
andcommitted
fix(pragmastat): propagate no_headers to parallel reader & add tests
Propagate the --no-headers flag to per-chunk Config in the parallel CSV reading path, fixing incorrect seek positions when headers are disabled. Add two integration tests exercising the parallel path (>=10k rows): pragmastat_parallel_reading and pragmastat_parallel_reading_no_headers. Addresses Copilot review feedback on PR #3595. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f5a6fd2 commit 38288f5

File tree

2 files changed

+79
-1
lines changed

2 files changed

+79
-1
lines changed

src/cmd/pragmastat.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ fn collect_numeric_values_parallel(
630630
let selected_vec = selected.to_vec();
631631
let col_types_vec = col_types.clone();
632632
let delimiter = Delimiter(rconfig.get_delimiter());
633+
let no_headers = rconfig.no_headers;
633634

634635
for chunk_idx in 0..nchunks {
635636
let send = send.clone();
@@ -644,7 +645,9 @@ fn collect_numeric_values_parallel(
644645
};
645646

646647
pool.execute(move || {
647-
let rconfig_chunk = Config::new(Some(&input_path_string)).delimiter(Some(delimiter));
648+
let rconfig_chunk = Config::new(Some(&input_path_string))
649+
.delimiter(Some(delimiter))
650+
.no_headers_flag(no_headers);
648651
let Ok(Some(mut idx)) = rconfig_chunk.indexed() else {
649652
let _ = send.send(Err(CliError::Other(
650653
"Failed to open index for parallel reading".to_string(),

tests/test_pragmastat.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,3 +824,78 @@ fn pragmastat_twosample_date_shift_as_days() {
824824
.expect("disparity should be a numeric value");
825825
}
826826
}
827+
828+
#[test]
829+
fn pragmastat_parallel_reading() {
830+
// Generate a CSV with >10k rows to trigger the indexed parallel reading path
831+
let wrk = Workdir::new("pragmastat_parallel_reading");
832+
let mut data = String::from("a,b\n");
833+
for i in 0..15_000 {
834+
data.push_str(&format!("{},{}\n", i as f64 * 0.1, (i as f64 * 0.3) + 1.0));
835+
}
836+
wrk.create_from_string("data.csv", &data);
837+
838+
// Build an index so the parallel path is triggered
839+
let mut idx_cmd = wrk.command("index");
840+
idx_cmd.arg(wrk.path("data.csv"));
841+
wrk.run(&mut idx_cmd);
842+
843+
// Run with --jobs 1 to force single-threaded parallel path (deterministic order)
844+
let mut cmd = wrk.command("pragmastat");
845+
cmd.arg("--select")
846+
.arg("a,b")
847+
.arg("--jobs")
848+
.arg("1")
849+
.arg(wrk.path("data.csv"));
850+
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
851+
852+
assert_eq!(got.len(), 3); // header + 2 columns
853+
assert_eq!(got[1][0], "a");
854+
assert_eq!(got[1][1], "15000"); // n
855+
assert_eq!(got[2][0], "b");
856+
assert_eq!(got[2][1], "15000");
857+
858+
// Verify center values are reasonable
859+
let center_a: f64 = got[1][2].parse().expect("center for a should be numeric");
860+
let center_b: f64 = got[2][2].parse().expect("center for b should be numeric");
861+
// a ranges 0..1499.9, center should be near 750
862+
assert!(
863+
(center_a - 750.0).abs() < 1.0,
864+
"center_a ({center_a}) should be near 750"
865+
);
866+
// b ranges 1..4501, center should be near 2251
867+
assert!(
868+
(center_b - 2251.0).abs() < 1.0,
869+
"center_b ({center_b}) should be near 2251"
870+
);
871+
}
872+
873+
#[test]
874+
fn pragmastat_parallel_reading_no_headers() {
875+
// Same test but with --no-headers to verify header handling in parallel path
876+
let wrk = Workdir::new("pragmastat_parallel_no_headers");
877+
let mut data = String::new();
878+
for i in 0..12_000 {
879+
data.push_str(&format!("{},{}\n", i as f64 * 0.1, (i as f64 * 0.3) + 1.0));
880+
}
881+
wrk.create_from_string("data.csv", &data);
882+
883+
let mut idx_cmd = wrk.command("index");
884+
idx_cmd.arg(wrk.path("data.csv"));
885+
wrk.run(&mut idx_cmd);
886+
887+
let mut cmd = wrk.command("pragmastat");
888+
cmd.arg("--no-headers")
889+
.arg("--select")
890+
.arg("1,2")
891+
.arg("--jobs")
892+
.arg("1")
893+
.arg(wrk.path("data.csv"));
894+
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
895+
896+
assert_eq!(got.len(), 3); // header + 2 columns
897+
assert_eq!(got[1][0], "1"); // column named "1" (no-headers mode)
898+
assert_eq!(got[1][1], "12000"); // n
899+
assert_eq!(got[2][0], "2");
900+
assert_eq!(got[2][1], "12000");
901+
}

0 commit comments

Comments
 (0)