Skip to content

Commit 81583f7

Browse files
committed
Store SourceView linecache as offsets rather than pointers
1 parent 986b691 commit 81583f7

File tree

1 file changed

+80
-36
lines changed

1 file changed

+80
-36
lines changed

src/sourceview.rs

Lines changed: 80 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
use std::fmt;
2-
use std::slice;
32
use std::str;
4-
use std::sync::atomic::AtomicUsize;
5-
use std::sync::atomic::Ordering;
63
use std::sync::Arc;
74
use std::sync::Mutex;
85

@@ -129,16 +126,14 @@ impl<'a> Iterator for Lines<'a> {
129126
/// operations.
130127
pub struct SourceView {
131128
source: Arc<str>,
132-
processed_until: AtomicUsize,
133-
lines: Mutex<Vec<&'static str>>,
129+
line_end_offsets: Mutex<Vec<LineEndOffset>>,
134130
}
135131

136132
impl Clone for SourceView {
137133
fn clone(&self) -> SourceView {
138134
SourceView {
139135
source: self.source.clone(),
140-
processed_until: AtomicUsize::new(0),
141-
lines: Mutex::new(vec![]),
136+
line_end_offsets: Mutex::new(vec![]),
142137
}
143138
}
144139
}
@@ -162,59 +157,72 @@ impl SourceView {
162157
pub fn new(source: Arc<str>) -> SourceView {
163158
SourceView {
164159
source,
165-
processed_until: AtomicUsize::new(0),
166-
lines: Mutex::new(vec![]),
160+
line_end_offsets: Mutex::new(vec![]),
167161
}
168162
}
169163

170164
/// Creates an optimized view from a given source string
171165
pub fn from_string(source: String) -> SourceView {
172166
SourceView {
173167
source: source.into(),
174-
processed_until: AtomicUsize::new(0),
175-
lines: Mutex::new(vec![]),
168+
line_end_offsets: Mutex::new(vec![]),
176169
}
177170
}
178171

179172
/// Returns a requested minified line.
180173
pub fn get_line(&self, idx: u32) -> Option<&str> {
181174
let idx = idx as usize;
182-
{
183-
let lines = self.lines.lock().unwrap();
184-
if idx < lines.len() {
185-
return Some(lines[idx]);
186-
}
175+
176+
let get_from_line_ends = |line_ends: &[LineEndOffset]| {
177+
line_ends.get(idx).map(|&end_offset| {
178+
let start_offset = if idx == 0 {
179+
0
180+
} else {
181+
line_ends[idx - 1].to_start_index()
182+
};
183+
&self.source[start_offset..end_offset.to_end_index()]
184+
})
185+
};
186+
187+
let mut line_ends = self.line_end_offsets.lock().unwrap();
188+
189+
if let Some(line) = get_from_line_ends(&line_ends) {
190+
return Some(line);
187191
}
188192

189-
// fetched everything
190-
if self.processed_until.load(Ordering::Relaxed) > self.source.len() {
193+
// check whether we've processed the entire string - the end of the
194+
// last-processed line would be the same as the end of the string
195+
if line_ends
196+
.last()
197+
.is_some_and(|i| i.to_end_index() == self.source.len())
198+
{
191199
return None;
192200
}
193201

194-
let mut lines = self.lines.lock().unwrap();
202+
let mut rest_offset = line_ends.last().map_or(0, |i| i.to_start_index());
203+
let mut rest = &self.source[rest_offset..];
195204
let mut done = false;
196205

197206
while !done {
198-
let rest = &self.source.as_bytes()[self.processed_until.load(Ordering::Relaxed)..];
199-
200-
let rv = if let Some(mut idx) = rest.iter().position(|&x| x == b'\n' || x == b'\r') {
201-
let rv = &rest[..idx];
202-
if rest[idx] == b'\r' && rest.get(idx + 1) == Some(&b'\n') {
203-
idx += 1;
207+
let line_term = if let Some(idx) = rest.find(['\n', '\r']) {
208+
rest_offset += idx;
209+
rest = &rest[idx..];
210+
if rest.starts_with("\r\n") {
211+
LineTerminator::CrLf
212+
} else {
213+
LineTerminator::LfOrCr
204214
}
205-
self.processed_until.fetch_add(idx + 1, Ordering::Relaxed);
206-
rv
207215
} else {
208-
self.processed_until
209-
.fetch_add(rest.len() + 1, Ordering::Relaxed);
210-
done = true;
211-
rest
216+
done = rest.is_empty();
217+
rest_offset += rest.len();
218+
rest = &rest[rest.len()..];
219+
LineTerminator::Eof
212220
};
213221

214-
lines.push(unsafe {
215-
str::from_utf8_unchecked(slice::from_raw_parts(rv.as_ptr(), rv.len()))
216-
});
217-
if let Some(&line) = lines.get(idx) {
222+
line_ends.push(LineEndOffset::new(rest_offset, line_term));
223+
rest_offset += line_term as usize;
224+
rest = &rest[line_term as usize..];
225+
if let Some(line) = get_from_line_ends(&line_ends) {
218226
return Some(line);
219227
}
220228
}
@@ -311,7 +319,7 @@ impl SourceView {
311319
/// Returns the number of lines.
312320
pub fn line_count(&self) -> usize {
313321
self.get_line(!0);
314-
self.lines.lock().unwrap().len()
322+
self.line_end_offsets.lock().unwrap().len()
315323
}
316324

317325
/// Returns the source map reference in the source view.
@@ -320,6 +328,42 @@ impl SourceView {
320328
}
321329
}
322330

331+
/// A wrapper around an index that stores a [`LineTerminator`] in its 2 lowest bits.
332+
#[derive(Clone, Copy)]
333+
struct LineEndOffset(usize);
334+
335+
#[derive(Clone, Copy)]
336+
enum LineTerminator {
337+
Eof = 0,
338+
LfOrCr = 1,
339+
CrLf = 2,
340+
}
341+
342+
impl LineEndOffset {
343+
fn new(index: usize, line_end: LineTerminator) -> Self {
344+
let shifted = index << 2;
345+
346+
// check for overflow - on 64-bit, this isn't a concern, since you'd have to
347+
// have a source string longer than 4 exabytes
348+
#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))]
349+
if index >> 2 != index {
350+
panic!("index too large!")
351+
}
352+
353+
Self(shifted | line_end as usize)
354+
}
355+
356+
/// Return the index of the end of this line.
357+
fn to_end_index(self) -> usize {
358+
self.0 >> 2
359+
}
360+
361+
/// Return the index of the start of the next line.
362+
fn to_start_index(self) -> usize {
363+
self.to_end_index() + (self.0 & 0b11)
364+
}
365+
}
366+
323367
#[cfg(test)]
324368
mod tests {
325369
use super::*;

0 commit comments

Comments
 (0)