Skip to content

Commit 7477b79

Browse files
authored
Fix whitespace trimming of spans when unicode present (#25)
1 parent 3babcdd commit 7477b79

File tree

4 files changed

+33
-26
lines changed

4 files changed

+33
-26
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "beancount-parser-lima"
3-
version = "0.10.0"
3+
version = "0.10.1-alpha.1"
44
edition = "2021"
55
license = "MIT OR Apache-2.0"
66
description = "A zero-copy parser for Beancount"

src/lib.rs

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ use std::{
9797
iter::once,
9898
path::{Path, PathBuf},
9999
};
100-
pub use types::*;
100+
101+
pub use crate::{trim::trim_trailing_whitespace, types::*};
101102

102103
/// Contains the content of the Beancount source file, and the content of
103104
/// the transitive closure of all the include'd source files.
@@ -306,32 +307,10 @@ impl BeancountSources {
306307

307308
(
308309
source_id_str.to_string(),
309-
self.adjusted_span(source_content, span),
310+
trimmed_span(source_content, span),
310311
)
311312
}
312313

313-
// adjust the span to exclude trailing whitespace
314-
fn adjusted_span(&self, content: &str, span: &Span) -> Span {
315-
let mut chars = content.chars();
316-
let content = chars.by_ref();
317-
let mut i = span.start;
318-
let mut final_non_whitespace = i;
319-
for c in content.skip(span.start) {
320-
if i >= span.end {
321-
break;
322-
}
323-
324-
if !c.is_whitespace() {
325-
final_non_whitespace = i;
326-
}
327-
328-
i += 1;
329-
}
330-
let mut adjusted_span = *span;
331-
adjusted_span.end = final_non_whitespace + 1;
332-
adjusted_span
333-
}
334-
335314
fn source_id_string(&self, source_id: SourceId) -> &str {
336315
self.source_id_strings[Into::<usize>::into(source_id)].as_str()
337316
}
@@ -869,6 +848,12 @@ fn end_of_input(source_id: SourceId, s: &str) -> Span {
869848
chumsky::span::Span::new(source_id, s.len()..s.len())
870849
}
871850

851+
fn trimmed_span(source: &str, span: &Span) -> Span {
852+
let mut trimmed = *span;
853+
trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
854+
trimmed
855+
}
856+
872857
#[cfg(test)]
873858
pub use lexer::bare_lex;
874859
mod format;
@@ -877,4 +862,5 @@ pub use options::Options;
877862
mod options;
878863
mod parsers;
879864
mod sort;
865+
mod trim;
880866
pub mod types;

src/trim.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#[cfg(test)]
2+
use test_case::test_case;
3+
4+
// adjust the span to exclude trailing whitespace
5+
pub fn trim_trailing_whitespace(content: &str, start: usize, end: usize) -> usize {
6+
match &content[start..end].rfind(|c: char| !c.is_whitespace()) {
7+
Some(i) => start + *i + 1,
8+
None => end,
9+
}
10+
}
11+
12+
#[cfg(test)]
13+
#[test_case("abc", "abc"; "abc")]
14+
#[test_case("abc ", "abc"; "abc space")]
15+
#[test_case("abc\n", "abc"; "abc newline")]
16+
#[test_case("abc\ndef\n\n", "abc\ndef"; "abc def")]
17+
#[test_case("abc\n😱\ndef\n", "abc\n😱\ndef"; "abc scream def")]
18+
fn test_trim_trailing_whitespace(source: &str, expected: &str) {
19+
let trimmed_end = trim_trailing_whitespace(source, 0, source.len());
20+
assert_eq!(&source[..trimmed_end], expected);
21+
}

0 commit comments

Comments
 (0)