Skip to content

Commit 40c060c

Browse files
committed
Smarter handling of input lines
Instead of using a Vec<String>, which allocates for each line, we introduce a BufLines type which keeps the whole file in one allocation. input/c_major.tab, repeated into a 25MB file, processed with muxml: [OLD:] measurement mean ± σ min … max outliers delta wall_time 345ms ± 3.78ms 339ms … 354ms 0 ( 0%) 0% peak_rss 462MB ± 126KB 462MB … 463MB 0 ( 0%) 0% cpu_cycles 1.07G ± 20.7M 1.03G … 1.12G 0 ( 0%) 0% instructions 3.56G ± 294 3.56G … 3.56G 0 ( 0%) 0% cache_references 29.7M ± 1.10M 29.2M … 33.7M 1 ( 7%) 0% cache_misses 3.19M ± 21.6K 3.17M … 3.24M 0 ( 0%) 0% branch_misses 2.47M ± 160K 2.16M … 2.69M 0 ( 0%) 0% [NEW:] measurement mean ± σ min … max outliers delta wall_time 329ms ± 2.72ms 322ms … 334ms 3 (19%) ⚡- 4.9% ± 0.7% peak_rss 408MB ± 74.3KB 407MB … 408MB 0 ( 0%) ⚡- 11.9% ± 0.0% cpu_cycles 1.03G ± 7.11M 1.02G … 1.04G 0 ( 0%) ⚡- 3.7% ± 1.1% instructions 3.30G ± 365 3.30G … 3.30G 1 ( 6%) ⚡- 7.2% ± 0.0% cache_references 23.7M ± 227K 23.4M … 24.3M 1 ( 6%) ⚡- 20.4% ± 1.9% cache_misses 2.99M ± 20.7K 2.96M … 3.05M 1 ( 6%) ⚡- 6.5% ± 0.5% branch_misses 1.79M ± 162K 1.55M … 2.08M 0 ( 0%) ⚡- 27.4% ± 4.8% the same file, using the fixup backend: [OLD:] measurement mean ± σ min … max outliers delta wall_time 164ms ± 2.00ms 160ms … 168ms 0 ( 0%) 0% peak_rss 182MB ± 102KB 182MB … 183MB 0 ( 0%) 0% cpu_cycles 528M ± 6.68M 519M … 543M 0 ( 0%) 0% instructions 2.26G ± 254 2.26G … 2.26G 1 ( 3%) 0% cache_references 18.2M ± 190K 17.9M … 19.1M 2 ( 6%) 0% cache_misses 320K ± 11.6K 292K … 342K 0 ( 0%) 0% branch_misses 1.27M ± 132K 1.15M … 1.66M 2 ( 6%) 0% [NEW:] measurement mean ± σ min … max outliers delta wall_time 122ms ± 2.23ms 119ms … 128ms 0 ( 0%) ⚡- 25.4% ± 0.6% peak_rss 107MB ± 123KB 107MB … 107MB 1 ( 2%) ⚡- 41.2% ± 0.0% cpu_cycles 422M ± 7.88M 414M … 453M 1 ( 2%) ⚡- 20.1% ± 0.7% instructions 1.87G ± 237 1.87G … 1.87G 0 ( 0%) ⚡- 17.1% ± 0.0% cache_references 9.03M ± 65.9K 8.95M … 9.25M 1 ( 2%) ⚡- 50.4% ± 0.4% cache_misses 165K ± 14.3K 101K … 179K 6 (15%) ⚡- 48.4% ± 2.0% branch_misses 1.11M ± 122K 1.01M … 1.43M 6 (15%) ⚡- 13.2% ± 4.7%
1 parent cf21572 commit 40c060c

File tree

13 files changed

+205
-135
lines changed

13 files changed

+205
-135
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ clap = { version = "4.5.4", features = ["derive"] }
1414
entrace_core = { version = "0.1.0", features = [] }
1515
itertools = "0.14.0"
1616
itoa = "1.0.11"
17+
memchr = "2.7.5"
1718
midly = "0.5.3"
1819
rustc-hash = "2.1.1"
1920
tracing = {version = "0.1.41", features = ["max_level_off","release_max_level_off"]}

flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
packages.${system}.default = pkgs.rustPlatform.buildRustPackage {
3131
pname = "scoreman";
3232
version = "1.0.0";
33-
cargoHash = "sha256-Rq1gAKmz0DPe9Dsgou5CJmTUM6ifG9AgHSOct7jcRjI=";
33+
cargoHash = "sha256-A/248fXOHCvNov44PUrGgMqwb9e4SUVok4Do67/or9Q=";
3434
src = ./.;
3535
};
3636
devShells.${system}.default = pkgs.mkShell {

src/backend/fixup/mod.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::time::Instant;
1+
use std::{borrow::Cow, time::Instant};
22

33
use clap::ValueEnum;
44
use tracing::trace;
@@ -13,7 +13,7 @@ use crate::{
1313
Backend,
1414
},
1515
parser::parse,
16-
time,
16+
time, BufLines,
1717
};
1818

1919
pub struct FixupBackend();
@@ -54,7 +54,7 @@ impl Backend for FixupBackend {
5454
type BackendSettings = FixupBackendSettings;
5555

5656
fn process<Out: std::io::Write>(
57-
parser_input: &[String], out: &mut Out, settings: Self::BackendSettings,
57+
parser_input: &BufLines, out: &mut Out, settings: Self::BackendSettings,
5858
) -> BackendResult {
5959
if let Some(dump) = settings.dump {
6060
let (parse_time, parsed) = time(|| parse(parser_input));
@@ -68,9 +68,9 @@ impl Backend for FixupBackend {
6868
r.err = parsed.error;
6969
return r;
7070
}
71+
7172
let mut diagnostics = vec![];
72-
// TODO: figure out a way not to clone these
73-
let mut parser_input = parser_input.to_owned();
73+
let mut parser_input: Vec<Cow<str>> = parser_input.buf.lines().map(Cow::Borrowed).collect();
7474
let mut parse_time;
7575
let fixup_start = Instant::now();
7676
let mut location_tracker = LocationTracker::new();
@@ -117,18 +117,17 @@ impl Backend for FixupBackend {
117117
};
118118
// we just replace both with a rest to be sure
119119
// todo: use a better strategy by checking the actual ticks
120-
parser_input[line_idx].replace_range(char_idx..char_idx + 1, "-");
121-
parser_input[line_idx].replace_range(char_idx + 1..char_idx + 2, "-");
120+
let line_mut = parser_input[line_idx].to_mut();
121+
line_mut.replace_range(char_idx..char_idx + 1, "-");
122+
line_mut.replace_range(char_idx + 1..char_idx + 2, "-");
122123
diagnostics.push(Diagnostic::info(
123124
err.main_location.clone(),
124125
DiagnosticKind::FormatReplacedInvalid,
125126
));
126127
}
127128
BackendErrorKind::NoClosingBarline => {
128129
let l_idx = err.main_location.get_line_idx().unwrap();
129-
let line = &mut parser_input[l_idx];
130-
line.truncate(line.trim_end().len());
131-
line.push('|');
130+
parser_input[l_idx].to_mut().push('|');
132131
diagnostics.push(Diagnostic::info(
133132
err.main_location.clone(),
134133
DiagnosticKind::FormatAddedBarline,
@@ -143,7 +142,9 @@ impl Backend for FixupBackend {
143142
else {
144143
continue;
145144
};
146-
parser_input[line_idx].replace_range(char_idx..char_idx + 1, "-");
145+
parser_input[line_idx]
146+
.to_mut()
147+
.replace_range(char_idx + 1..char_idx + 2, "-");
147148
diagnostics.push(Diagnostic::info(
148149
err.main_location.clone(),
149150
DiagnosticKind::FormatReplacedInvalid,
@@ -154,8 +155,8 @@ impl Backend for FixupBackend {
154155
}
155156
}
156157
let gen_time = fixup_start.elapsed();
157-
let maybe_io_err =
158-
out.write_all(parser_input.join("\n").as_ref()).map_err(BackendError::from).err();
158+
let joined = parser_input.join("\n");
159+
let maybe_io_err = out.write_all(joined.as_bytes()).map_err(BackendError::from).err();
159160
BackendResult::new(diagnostics, maybe_io_err, Some(parse_time), Some(gen_time))
160161
}
161162
}

src/backend/midi/mod.rs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@ use midly::{
77
use tracing::trace;
88

99
use super::{Backend, BackendResult};
10-
use crate::parser::{
11-
parse,
12-
tab_element::TabElement::{self, Fret},
13-
ParseResult,
14-
};
1510
use crate::time;
11+
use crate::{
12+
parser::{
13+
parse,
14+
tab_element::TabElement::{self, Fret},
15+
ParseResult,
16+
},
17+
BufLines,
18+
};
1619

1720
const BPM: u32 = 80;
1821
const MINUTE_IN_MS: u32 = 60 * 1000;
@@ -25,7 +28,7 @@ impl Backend for MidiBackend {
2528
type BackendSettings = ();
2629

2730
fn process<Out: std::io::Write>(
28-
input: &[String], out: &mut Out, _settings: Self::BackendSettings,
31+
input: &BufLines, out: &mut Out, _settings: Self::BackendSettings,
2932
) -> BackendResult {
3033
let diagnostics = vec![];
3134
let (parse_time, parse_result) = time(|| parse(input));
@@ -58,15 +61,11 @@ impl Backend for MidiBackend {
5861
tracks,
5962
};
6063
let gen_time = gen_start.elapsed();
64+
let mut last_err = None;
6165
if let Err(x) = smf.write_std(out) {
62-
return BackendResult::new(
63-
diagnostics,
64-
Some(x.into()),
65-
Some(parse_time),
66-
Some(gen_time),
67-
);
68-
}
69-
BackendResult::new(diagnostics, None, Some(parse_time), Some(gen_time))
66+
last_err = Some(x.into());
67+
};
68+
BackendResult::new(diagnostics, last_err, Some(parse_time), Some(gen_time))
7069
}
7170
}
7271

src/backend/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use errors::{backend_error::BackendError, diagnostic::Diagnostic};
22

33
use std::{fmt::Display, time::Duration};
4+
5+
use crate::BufLines;
46
pub mod errors;
57
pub mod fixup;
68
pub mod midi;
@@ -23,7 +25,7 @@ pub trait Backend {
2325
type BackendSettings;
2426

2527
fn process<Out: std::io::Write>(
26-
input: &[String], out: &mut Out, settings: Self::BackendSettings,
28+
input: &BufLines, out: &mut Out, settings: Self::BackendSettings,
2729
) -> BackendResult;
2830
}
2931

@@ -44,7 +46,7 @@ pub enum BackendSelector {
4446
}
4547

4648
impl BackendSelector {
47-
pub fn process<Out: std::io::Write>(self, input: &[String], out: &mut Out) -> BackendResult {
49+
pub fn process<Out: std::io::Write>(self, input: &BufLines, out: &mut Out) -> BackendResult {
4850
match self {
4951
BackendSelector::Midi => midi::MidiBackend::process(input, out, ()),
5052
BackendSelector::Muxml(settings) => muxml::MuxmlBackend::process(input, out, settings),

src/backend/muxml/mod.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ pub mod fretboard;
44
mod muxml2_tests;
55
pub mod settings;
66
use crate::backend::errors::backend_error::BackendError;
7-
use crate::parser;
87
use crate::parser::tab_element::TabElement;
98
use crate::parser::{source_location_from_stream, ParseResult};
109
use crate::{
1110
backend::{Backend, BackendResult},
1211
rlen, time,
1312
};
13+
use crate::{parser, BufLines};
1414
use formatters::{
1515
write_muxml2_measure_prelude, write_muxml2_note, write_muxml2_rest, MUXML2_DOCUMENT_END,
1616
MUXML_INCOMPLETE_DOC_PRELUDE,
@@ -26,7 +26,7 @@ impl Backend for MuxmlBackend {
2626
type BackendSettings = settings::Settings;
2727

2828
fn process<Out: std::io::Write>(
29-
input: &[String], out: &mut Out, settings: Self::BackendSettings,
29+
input: &BufLines, out: &mut Out, settings: Self::BackendSettings,
3030
) -> BackendResult {
3131
let (parse_time, parse_result) = time(|| parser::parse(input));
3232
if let Some(err) = parse_result.error {
@@ -228,17 +228,20 @@ impl MuxmlGenerator {
228228
pub fn process_measure(&mut self, measure_idx: usize) -> Result<(), BackendError> {
229229
let meas = span!(Level::TRACE, "Muxml2: processing measure", measure_idx);
230230
let _meas = meas.enter();
231-
let ticks_in_measure = rlen(&self.parsed.measures[measure_idx].data_range) / 6;
232-
debug_assert!(rlen(&self.parsed.measures[measure_idx].data_range) % 6 == 0);
233-
// Length of actual content in measure. `remove_space_between_notes` will reduce this for
234-
// example
231+
232+
let data_range = &self.parsed.measures[measure_idx].data_range;
233+
let ticks_in_measure = rlen(data_range) / 6;
234+
debug_assert!(rlen(data_range) % 6 == 0);
235+
236+
// Length of actual content in measure. `remove_space_between_notes` will reduce this for example
235237
let mut measure_content_len = ticks_in_measure;
236238
self.measure_buf.clear();
237239
debug!("initial measure_content_len = {measure_content_len}");
238-
let mut stream_idx: usize = *self.parsed.measures[measure_idx].data_range.start() as usize;
239-
let mut note_count = 0;
240-
let mut stream_proc_cnt = 0;
241-
while stream_idx <= *self.parsed.measures[measure_idx].data_range.end() as usize {
240+
241+
let mut stream_idx: usize = *data_range.start() as usize;
242+
let (mut note_count, mut stream_proc_cnt) = (0, 0);
243+
let end = *data_range.end() as usize;
244+
while stream_idx <= end {
242245
let elem = &self.parsed.tick_stream[stream_idx];
243246
//trace!(
244247
// "current elem: {elem:?}, note_count: {note_count}, proc_cnt = {stream_proc_cnt}"
@@ -281,8 +284,7 @@ impl MuxmlGenerator {
281284
self.measure_buf.push(Muxml2TabElement::Rest(1));
282285
}
283286
trace!(kind = ?self.measure_buf.last().unwrap(), "Parsed a tick");
284-
note_count = 0;
285-
stream_proc_cnt = 0;
287+
(note_count, stream_proc_cnt) = (0, 0)
286288
} else {
287289
stream_proc_cnt += 1;
288290
}
@@ -308,14 +310,14 @@ impl MuxmlGenerator {
308310
measure_denominator,
309311
)
310312
.unwrap();
311-
// borrowing schenanigans
312313
for i in 0..self.measure_buf.len() {
313314
self.write_tab_element(i)?;
314315
}
315316
self.document.push_str("</measure>");
316317
Ok(())
317318
}
318319
#[inline(always)]
320+
// takes an index because of borrowing schenanigans
319321
pub fn write_tab_element(&mut self, elem_idx: usize) -> std::fmt::Result {
320322
let elem = &self.measure_buf[elem_idx];
321323
match elem {

src/backend/muxml/muxml2_tests.rs

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ use crate::backend::{
33
muxml::{settings::Settings, MuxmlBackend},
44
Backend,
55
};
6-
use itertools::Itertools;
76

87
#[test]
98
fn test_muxml2() -> anyhow::Result<()> {
@@ -21,11 +20,7 @@ E|--------------|
2120
trim_measure: true,
2221
simplify_time_signature: true,
2322
};
24-
MuxmlBackend::process(
25-
&i1.lines().map(|x| x.to_string()).collect::<Vec<_>>(),
26-
&mut out,
27-
settings,
28-
);
23+
MuxmlBackend::process(&i1.into(), &mut out, settings);
2924
insta::assert_snapshot!(String::from_utf8_lossy(&out));
3025
Ok(())
3126
}
@@ -45,11 +40,7 @@ E|----|
4540
trim_measure: true,
4641
simplify_time_signature: true,
4742
};
48-
MuxmlBackend::process(
49-
&i1.lines().map(|x| x.to_string()).collect::<Vec<_>>(),
50-
&mut out,
51-
settings,
52-
);
43+
MuxmlBackend::process(&i1.into(), &mut out, settings);
5344
insta::assert_snapshot!(String::from_utf8_lossy(&out));
5445
Ok(())
5546
}
@@ -77,11 +68,7 @@ E|-----9-|-----9-|"#;
7768
trim_measure: true,
7869
simplify_time_signature: true,
7970
};
80-
let res = MuxmlBackend::process(
81-
&example_score.lines().map(|x| x.to_string()).collect_vec(),
82-
&mut Vec::new(),
83-
settings,
84-
);
71+
let res = MuxmlBackend::process(&example_score.into(), &mut Vec::new(), settings);
8572
let e = res.err.unwrap();
8673
use crate::backend::errors::error_location::ErrorLocation;
8774
assert_eq!(e.main_location, ErrorLocation::LineAndChar(3, 25));

0 commit comments

Comments
 (0)