Skip to content

Commit dfaaded

Browse files
committed
Refactoring xan reverse for better performance
1 parent 7b7d4f0 commit dfaaded

File tree

4 files changed

+67
-50
lines changed

4 files changed

+67
-50
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ The **parallel** update.
5353
* Adding support for `xan search -l -p -t`.
5454
* Adding `rms` moonblade aggregation function.
5555
* Adding `xan scrape -E/--encoding`.
56+
* Adding CDX files support.
5657

5758
*Fixes*
5859

@@ -93,6 +94,7 @@ The **parallel** update.
9394

9495
* Switching hashmaps to `ahash`.
9596
* Optimizing moonblade pipelines with more than a single underscore substitution.
97+
* Improving `xan reverse` performance.
9698

9799
*Quality of Life*
98100

src/cmd/reverse.rs

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
4747

4848
let rconfig = &mut Config::new(&args.arg_input)
4949
.delimiter(args.flag_delimiter)
50-
.no_headers(true);
50+
.no_headers(args.flag_no_headers);
5151

5252
if args.flag_in_memory {
5353
run_without_memory_efficiency(rconfig, args)
@@ -57,49 +57,37 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
5757
}
5858

5959
fn run_with_memory_efficiency(rconfig: &mut Config, args: Args) -> CliResult<()> {
60-
rconfig.no_headers = true;
60+
let (headers, mut reverse_reader) = rconfig.reverse_reader().map_err(|_| {
61+
"can't use provided input: needs to be loaded in the RAM using -m, --in-memory flag"
62+
})?;
6163

62-
let mut config_csv_reader = rconfig.reader()?;
63-
let headers_offset = if args.flag_no_headers {
64-
0
65-
} else {
66-
config_csv_reader.byte_headers()?;
67-
config_csv_reader.position().byte()
68-
};
69-
70-
let reverse_reader = rconfig.io_reader_for_reverse_reading(headers_offset);
71-
72-
match reverse_reader {
73-
Err(_) => Err(
74-
"can't use provided input: needs to be loaded in the RAM using -m, --in-memory flag",
75-
)?,
76-
Ok(rr) => {
77-
let mut wtr = Config::new(&args.flag_output).writer()?;
78-
let mut reverse_csv_reader = rconfig.csv_reader_from_reader(rr);
79-
80-
if !args.flag_no_headers && headers_offset > 0 {
81-
let headers = config_csv_reader.byte_headers()?;
82-
wtr.write_byte_record(headers)?;
83-
}
84-
85-
for record in reverse_csv_reader.byte_records().flatten() {
86-
let new_record: Vec<Vec<u8>> = record
87-
.iter()
88-
.rev()
89-
.map(|b| b.iter().rev().copied().collect())
90-
.collect();
91-
92-
wtr.write_record(new_record)?;
93-
}
94-
95-
Ok(wtr.flush()?)
64+
let mut wtr = Config::new(&args.flag_output).writer()?;
65+
66+
if !args.flag_no_headers && !headers.is_empty() {
67+
wtr.write_byte_record(&headers)?;
68+
}
69+
70+
let mut record = csv::ByteRecord::new();
71+
let mut reversed_record = csv::ByteRecord::new();
72+
let mut reversed_bytes: Vec<u8> = Vec::new();
73+
74+
while reverse_reader.read_byte_record(&mut record)? {
75+
reversed_record.clear();
76+
77+
for cell in record.iter().rev() {
78+
reversed_bytes.clear();
79+
reversed_bytes.extend(cell.iter().rev());
80+
81+
reversed_record.push_field(&reversed_bytes);
9682
}
83+
84+
wtr.write_byte_record(&reversed_record)?;
9785
}
86+
87+
Ok(wtr.flush()?)
9888
}
9989

10090
fn run_without_memory_efficiency(rconfig: &mut Config, args: Args) -> CliResult<()> {
101-
rconfig.no_headers = args.flag_no_headers;
102-
10391
let mut reader = rconfig.reader()?;
10492
let all = reader.byte_records().collect::<Result<Vec<_>, _>>()?;
10593

src/config.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -488,24 +488,13 @@ impl Config {
488488
let reverse_reader = ReverseRead::new(forward_reader.into_inner(), filesize, offset);
489489
let mut reader_builder = self.csv_reader_builder();
490490
reader_builder.has_headers(false);
491-
491+
dbg!(offset, filesize);
492492
Ok((
493493
headers,
494494
reader_builder.from_reader(Box::new(reverse_reader)),
495495
))
496496
}
497497

498-
pub fn io_reader_for_reverse_reading(
499-
&self,
500-
offset: u64,
501-
) -> CliResult<Box<dyn io::Read + 'static>> {
502-
let mut reader = self.io_reader_for_random_access()?;
503-
504-
let filesize = reader.seek(SeekFrom::End(0))?;
505-
506-
Ok(Box::new(ReverseRead::new(reader, filesize, offset)))
507-
}
508-
509498
pub fn csv_reader_builder(&self) -> csv::ReaderBuilder {
510499
let mut builder = csv::ReaderBuilder::new();
511500

tests/test_slice.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,41 @@ fn slice_conditions() {
207207
let expected = vec![svec!["n"], svec!["2"]];
208208
assert_eq!(got, expected);
209209
}
210+
211+
#[test]
212+
fn slice_last() {
213+
let wrk = Workdir::new("slice_last");
214+
wrk.create(
215+
"data.csv",
216+
vec![
217+
svec!["n"],
218+
svec!["zero"],
219+
svec!["one"],
220+
svec!["two"],
221+
svec!["three"],
222+
svec!["four"],
223+
svec!["five"],
224+
],
225+
);
226+
let mut cmd = wrk.command("slice");
227+
cmd.args(["-L", "3"]).arg("data.csv");
228+
229+
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
230+
let expected = vec![svec!["n"], svec!["three"], svec!["four"], svec!["five"]];
231+
assert_eq!(got, expected);
232+
233+
let mut cmd = wrk.command("slice");
234+
cmd.args(["-L", "300"]).arg("data.csv");
235+
236+
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
237+
let expected = vec![
238+
svec!["n"],
239+
svec!["zero"],
240+
svec!["one"],
241+
svec!["two"],
242+
svec!["three"],
243+
svec!["four"],
244+
svec!["five"],
245+
];
246+
assert_eq!(got, expected);
247+
}

0 commit comments

Comments
 (0)