Skip to content

Commit 5290e84

Browse files
authored
Speed up line index construction (#802)
* Add memchr crate * Speed up line index construction
1 parent 2afaf30 commit 5290e84

File tree

3 files changed

+63
-45
lines changed

3 files changed

+63
-45
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

librubyfmt/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ edition = "2024"
99
[dependencies]
1010
fancy-regex = "0.14.0"
1111
log = { version = "0.4.8", features = ["max_level_debug", "release_max_level_warn"] }
12+
memchr = "2.7"
1213
simplelog = "0.12"
1314
ruby-prism="1.8.0"
1415

librubyfmt/src/file_comments.rs

Lines changed: 61 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
use std::collections::BTreeSet;
1+
use memchr::memchr_iter;
22

33
use crate::comment_block::CommentBlock;
44
use crate::parser_state::line_difference_requires_newline;
55
use crate::types::{LineNumber, SourceOffset};
6-
use crate::util::{u8_to_str, u8_to_string};
6+
use crate::util::u8_to_string;
77

88
/// A vector of offsets in the source code where lines start, which
99
/// we use to detect what line a given offset is one.
@@ -18,18 +18,7 @@ pub struct LineIndex {
1818
}
1919

2020
impl LineIndex {
21-
pub fn new(file_contents: &[u8]) -> Self {
22-
let mut line_starts = Vec::new();
23-
24-
// First line always starts at position 0
25-
line_starts.push(0);
26-
27-
for (i, &byte) in file_contents.iter().enumerate() {
28-
if byte == b'\n' {
29-
line_starts.push(i + 1);
30-
}
31-
}
32-
21+
fn from_vec(line_starts: Vec<usize>) -> Self {
3322
LineIndex { line_starts }
3423
}
3524

@@ -63,7 +52,8 @@ pub struct FileComments {
6352
start_of_file_contiguous_comment_lines: Option<CommentBlock>,
6453
/// A list of comments, sorted in order by `LineNumber`
6554
other_comments: Vec<(LineNumber, String)>,
66-
lines_with_ruby: BTreeSet<LineNumber>,
55+
/// Sorted list of line numbers that contain Ruby code (not comments/blank)
56+
lines_with_ruby: Vec<LineNumber>,
6757
last_lineno: LineNumber,
6858
line_index: LineIndex,
6959
/// Sorted list of byte offsets where comments start
@@ -72,7 +62,36 @@ pub struct FileComments {
7262

7363
impl FileComments {
7464
pub fn from_prism_comments(comments: ruby_prism::Comments, source: &[u8]) -> FileComments {
75-
let line_index = LineIndex::new(source);
65+
let mut line_starts = Vec::new();
66+
let mut lines_with_ruby = Vec::new();
67+
68+
line_starts.push(0); // First line always starts at position 0
69+
70+
let mut line_start = 0;
71+
let mut lineno = 1;
72+
let mut inside_embdoc = false;
73+
74+
for i in memchr_iter(b'\n', source) {
75+
line_starts.push(i + 1);
76+
77+
if Self::line_has_ruby(&source[line_start..i], &mut inside_embdoc) {
78+
lines_with_ruby.push(lineno);
79+
}
80+
81+
line_start = i + 1;
82+
lineno += 1;
83+
}
84+
85+
// Handle last line if no trailing newline
86+
if line_start < source.len() {
87+
let line = &source[line_start..];
88+
if Self::line_has_ruby(line, &mut inside_embdoc) {
89+
lines_with_ruby.push(lineno);
90+
}
91+
}
92+
93+
let line_index = LineIndex::from_vec(line_starts);
94+
7695
let mut file_comments = FileComments::default();
7796
for comment in comments {
7897
file_comments.push_comment(
@@ -84,39 +103,36 @@ impl FileComments {
84103
.push(comment.location().start_offset());
85104
}
86105

87-
// Lookup lines that have any Ruby
88-
let mut inside_embdoc = false;
89-
u8_to_str(source)
90-
.lines()
91-
.enumerate()
92-
.filter(|(_lineno, line_contents)| {
93-
let contents = line_contents.trim();
94-
if contents.starts_with("=begin") {
95-
inside_embdoc = true;
96-
return false;
97-
}
98-
if contents.starts_with("=end") {
99-
inside_embdoc = false;
100-
return false;
101-
}
102-
if inside_embdoc {
103-
return false;
104-
}
105-
!(contents.starts_with("#") || contents.is_empty())
106-
})
107-
.for_each(|(lineno, _)| {
108-
file_comments
109-
.lines_with_ruby
110-
// Insert as one-offset to work with Ripper.
111-
// This (and elsewhere) can be zero-offset once Ripper is removed
112-
.insert((lineno + 1) as u64);
113-
});
114-
106+
file_comments.lines_with_ruby = lines_with_ruby;
115107
file_comments.last_lineno = line_index.line_starts.len() as u64;
116108
file_comments.line_index = line_index;
117109
file_comments
118110
}
119111

112+
fn line_has_ruby(line: &[u8], inside_embdoc: &mut bool) -> bool {
113+
let first_non_ws = line.iter().position(|b| !u8::is_ascii_whitespace(b));
114+
let Some(idx) = first_non_ws else {
115+
return false;
116+
};
117+
118+
let trimmed = &line[idx..];
119+
120+
if trimmed.starts_with(b"=begin") {
121+
*inside_embdoc = true;
122+
return false;
123+
}
124+
if trimmed.starts_with(b"=end") {
125+
*inside_embdoc = false;
126+
return false;
127+
}
128+
if *inside_embdoc {
129+
return false;
130+
}
131+
132+
// Check if it's a comment
133+
trimmed[0] != b'#'
134+
}
135+
120136
pub fn still_in_file(&self, line_number: LineNumber) -> bool {
121137
line_number < self.last_lineno
122138
}
@@ -162,7 +178,7 @@ impl FileComments {
162178
}
163179

164180
pub fn is_empty_line(&self, line_number: LineNumber) -> bool {
165-
!self.lines_with_ruby.contains(&line_number)
181+
self.lines_with_ruby.binary_search(&line_number).is_err()
166182
}
167183

168184
pub fn take_start_of_file_contiguous_comment_lines(&mut self) -> Option<CommentBlock> {

0 commit comments

Comments
 (0)