Skip to content

Commit de24044

Browse files
authored
ref: Sort tokens in SourceMap (#91)
The fact that the tokens in a sourcemap can be arbitrarily ordered causes a substantial amount of complication (we have to keep a sorted index in addition) for no benefit that I've ever seen. Therefore, we now sort tokens upon creation and remove all the Index rigmarole. This is a breaking change because it removes some types and functions.
1 parent 3de0fa3 commit de24044

File tree

7 files changed

+66
-98
lines changed

7 files changed

+66
-98
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
### Various fixes and improvements
6+
7+
- ref: Tokens within a sourcemap are now always sorted by their position in the
8+
minified file (#91) by @loewenheim.
9+
Consequently:
10+
- the type `IndexIter` and the functions `get_index_size`, `index_iter`,
11+
and `idx_from_token` have been deleted;
12+
- the function `sourcemap_from_token` has been turned into the method
13+
`sourcemap` on `Token`;
14+
- the `idx` parameter of `SourceMap::get_token` now has the type `usize`.
15+
16+
317
## 8.0.1
418

519
### Various fixes & improvements

src/encoder.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,6 @@ fn serialize_mappings(sm: &SourceMap) -> String {
113113
let mut prev_src_id = 0;
114114

115115
for (idx, token) in sm.tokens().enumerate() {
116-
let idx = idx as u32;
117-
118116
if token.get_dst_line() != prev_dst_line {
119117
prev_dst_col = 0;
120118
while token.get_dst_line() != prev_dst_line {

src/hermes.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,10 @@ impl SourceMapHermes {
108108

109109
// Find the closest mapping, just like here:
110110
// https://github.com/facebook/metro/blob/63b523eb20e7bdf62018aeaf195bb5a3a1a67f36/packages/metro-symbolicate/src/SourceMetadataMapConsumer.js#L204-L231
111-
let mapping = greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
112-
(o.line, o.column)
113-
})?;
111+
let (_mapping_idx, mapping) =
112+
greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
113+
(o.line, o.column)
114+
})?;
114115
function_map
115116
.names
116117
.get(mapping.name_index as usize)

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ pub use crate::errors::{Error, Result};
5858
pub use crate::hermes::SourceMapHermes;
5959
pub use crate::sourceview::SourceView;
6060
pub use crate::types::{
61-
DecodedMap, IndexIter, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter,
62-
SourceMap, SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
61+
DecodedMap, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter, SourceMap,
62+
SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
6363
};
6464
pub use crate::utils::make_relative_path;
6565

src/sourceview.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use if_chain::if_chain;
1111
use crate::detector::{locate_sourcemap_reference_slice, SourceMapRef};
1212
use crate::errors::Result;
1313
use crate::js_identifiers::{get_javascript_token, is_valid_javascript_identifier};
14-
use crate::types::{idx_from_token, sourcemap_from_token, Token};
14+
use crate::types::Token;
1515

1616
/// An iterator that iterates over tokens in reverse.
1717
pub struct RevTokenIter<'view, 'map> {
@@ -24,17 +24,11 @@ impl<'view, 'map> Iterator for RevTokenIter<'view, 'map> {
2424
type Item = (Token<'map>, Option<&'view str>);
2525

2626
fn next(&mut self) -> Option<(Token<'map>, Option<&'view str>)> {
27-
let token = match self.token.take() {
28-
None => {
29-
return None;
30-
}
31-
Some(token) => token,
32-
};
27+
let token = self.token.take()?;
28+
let idx = token.idx;
3329

34-
let idx = idx_from_token(&token);
3530
if idx > 0 {
36-
let sm = sourcemap_from_token(&token);
37-
self.token = sm.get_token(idx - 1);
31+
self.token = token.sm.get_token(idx - 1);
3832
}
3933

4034
// if we are going to the same line as we did last iteration, we don't have to scan

src/types.rs

Lines changed: 29 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,16 @@ pub struct RawToken {
157157
#[derive(Copy, Clone)]
158158
pub struct Token<'a> {
159159
raw: &'a RawToken,
160-
i: &'a SourceMap,
160+
pub(crate) sm: &'a SourceMap,
161+
pub(crate) idx: usize,
161162
offset: u32,
162-
idx: u32,
163+
}
164+
165+
impl<'a> Token<'a> {
166+
/// The sourcemap this token is linked to.
167+
pub fn sourcemap(&self) -> &'a SourceMap {
168+
self.sm
169+
}
163170
}
164171

165172
impl<'a> PartialEq for Token<'a> {
@@ -241,7 +248,7 @@ impl<'a> Token<'a> {
241248
if self.raw.src_id == !0 {
242249
None
243250
} else {
244-
self.i.get_source(self.raw.src_id)
251+
self.sm.get_source(self.raw.src_id)
245252
}
246253
}
247254

@@ -255,7 +262,7 @@ impl<'a> Token<'a> {
255262
if self.raw.name_id == !0 {
256263
None
257264
} else {
258-
self.i.get_name(self.raw.name_id)
265+
self.sm.get_name(self.raw.name_id)
259266
}
260267
}
261268

@@ -287,7 +294,7 @@ impl<'a> Token<'a> {
287294

288295
/// Returns the referenced source view.
289296
pub fn get_source_view(&self) -> Option<&SourceView> {
290-
self.i.get_source_view(self.get_src_id())
297+
self.sm.get_source_view(self.get_src_id())
291298
}
292299

293300
/// If true, this token is a range token.
@@ -298,18 +305,10 @@ impl<'a> Token<'a> {
298305
}
299306
}
300307

301-
pub fn idx_from_token(token: &Token<'_>) -> u32 {
302-
token.idx
303-
}
304-
305-
pub fn sourcemap_from_token<'a>(token: &Token<'a>) -> &'a SourceMap {
306-
token.i
307-
}
308-
309308
/// Iterates over all tokens in a sourcemap
310309
pub struct TokenIter<'a> {
311310
i: &'a SourceMap,
312-
next_idx: u32,
311+
next_idx: usize,
313312
}
314313

315314
impl<'a> TokenIter<'a> {
@@ -390,23 +389,6 @@ impl<'a> Iterator for NameIter<'a> {
390389
}
391390
}
392391

393-
/// Iterates over all index items in a sourcemap
394-
pub struct IndexIter<'a> {
395-
i: &'a SourceMap,
396-
next_idx: usize,
397-
}
398-
399-
impl<'a> Iterator for IndexIter<'a> {
400-
type Item = (u32, u32, u32);
401-
402-
fn next(&mut self) -> Option<(u32, u32, u32)> {
403-
self.i.index.get(self.next_idx).map(|idx| {
404-
self.next_idx += 1;
405-
*idx
406-
})
407-
}
408-
}
409-
410392
impl<'a> fmt::Debug for Token<'a> {
411393
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
412394
write!(f, "<Token {self:#}>")
@@ -481,7 +463,6 @@ pub struct SourceMapIndex {
481463
pub struct SourceMap {
482464
pub(crate) file: Option<Arc<str>>,
483465
pub(crate) tokens: Vec<RawToken>,
484-
pub(crate) index: Vec<(u32, u32, u32)>,
485466
pub(crate) names: Vec<Arc<str>>,
486467
pub(crate) source_root: Option<Arc<str>>,
487468
pub(crate) sources: Vec<Arc<str>>,
@@ -596,21 +577,15 @@ impl SourceMap {
596577
/// - `sources_content` optional source contents
597578
pub fn new(
598579
file: Option<Arc<str>>,
599-
tokens: Vec<RawToken>,
580+
mut tokens: Vec<RawToken>,
600581
names: Vec<Arc<str>>,
601582
sources: Vec<Arc<str>>,
602583
sources_content: Option<Vec<Option<Arc<str>>>>,
603584
) -> SourceMap {
604-
let mut index: Vec<_> = tokens
605-
.iter()
606-
.enumerate()
607-
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32))
608-
.collect();
609-
index.sort_unstable();
585+
tokens.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
610586
SourceMap {
611587
file,
612588
tokens,
613-
index,
614589
names,
615590
source_root: None,
616591
sources,
@@ -681,10 +656,10 @@ impl SourceMap {
681656
}
682657

683658
/// Looks up a token by its index.
684-
pub fn get_token(&self, idx: u32) -> Option<Token<'_>> {
685-
self.tokens.get(idx as usize).map(|raw| Token {
659+
pub fn get_token(&self, idx: usize) -> Option<Token<'_>> {
660+
self.tokens.get(idx).map(|raw| Token {
686661
raw,
687-
i: self,
662+
sm: self,
688663
idx,
689664
offset: 0,
690665
})
@@ -705,9 +680,15 @@ impl SourceMap {
705680

706681
/// Looks up the closest token to a given 0-indexed line and column.
707682
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
708-
let ii = greatest_lower_bound(&self.index, &(line, col), |ii| (ii.0, ii.1))?;
683+
let (idx, raw) =
684+
greatest_lower_bound(&self.tokens, &(line, col), |t| (t.dst_line, t.dst_col))?;
709685

710-
let mut token = self.get_token(ii.2)?;
686+
let mut token = Token {
687+
raw,
688+
sm: self,
689+
idx,
690+
offset: 0,
691+
};
711692

712693
if token.is_range() {
713694
token.offset = col - token.get_dst_col();
@@ -827,19 +808,6 @@ impl SourceMap {
827808
self.names.clear();
828809
}
829810

830-
/// Returns the number of items in the index
831-
pub fn get_index_size(&self) -> usize {
832-
self.index.len()
833-
}
834-
835-
/// Returns the number of items in the index
836-
pub fn index_iter(&self) -> IndexIter<'_> {
837-
IndexIter {
838-
i: self,
839-
next_idx: 0,
840-
}
841-
}
842-
843811
/// This rewrites the sourcemap according to the provided rewrite
844812
/// options.
845813
///
@@ -998,7 +966,6 @@ impl SourceMap {
998966
// the `self` tokens and `src_line/col` for the `adjustment` tokens.
999967
let self_tokens = std::mem::take(&mut self.tokens);
1000968
let original_ranges = create_ranges(self_tokens, |t| (t.dst_line, t.dst_col));
1001-
self.index.clear();
1002969
let adjustment_ranges =
1003970
create_ranges(adjustment.tokens.clone(), |t| (t.src_line, t.src_col));
1004971

@@ -1059,14 +1026,8 @@ impl SourceMap {
10591026
}
10601027
}
10611028

1062-
// Regenerate the index
1063-
self.index.extend(
1064-
self.tokens
1065-
.iter()
1066-
.enumerate()
1067-
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32)),
1068-
);
1069-
self.index.sort_unstable();
1029+
self.tokens
1030+
.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
10701031
}
10711032
}
10721033

@@ -1190,7 +1151,7 @@ impl SourceMapIndex {
11901151
/// If a sourcemap is encountered that is not embedded but just
11911152
/// externally referenced it is silently skipped.
11921153
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
1193-
let section =
1154+
let (_section_idx, section) =
11941155
greatest_lower_bound(&self.sections, &(line, col), SourceMapSection::get_offset)?;
11951156
let map = section.get_sourcemap()?;
11961157
let (off_line, off_col) = section.get_offset();

src/utils.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,13 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
118118
slice: &'a [T],
119119
key: &K,
120120
map: F,
121-
) -> Option<&'a T> {
121+
) -> Option<(usize, &'a T)> {
122122
let mut idx = match slice.binary_search_by_key(key, &map) {
123123
Ok(index) => index,
124124
Err(index) => {
125125
// If there is no match, then we know for certain that the index is where we should
126126
// insert a new token, and that the token directly before is the greatest lower bound.
127-
return slice.get(index.checked_sub(1)?);
127+
return slice.get(index.checked_sub(1)?).map(|res| (index, res));
128128
}
129129
};
130130

@@ -138,7 +138,7 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
138138
break;
139139
}
140140
}
141-
slice.get(idx)
141+
slice.get(idx).map(|res| (idx, res))
142142
}
143143

144144
#[test]
@@ -201,27 +201,27 @@ fn test_greatest_lower_bound() {
201201
let cmp = |&(i, _id)| i;
202202

203203
let haystack = vec![(1, 1)];
204-
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
205-
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 1)));
204+
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
205+
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 1));
206206
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
207207

208208
let haystack = vec![(1, 1), (1, 2)];
209-
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
210-
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 2)));
209+
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
210+
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 2));
211211
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
212212

213213
let haystack = vec![(1, 1), (1, 2), (1, 3)];
214-
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
215-
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 3)));
214+
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
215+
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 3));
216216
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
217217

218218
let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4)];
219-
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
220-
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 4)));
219+
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
220+
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 4));
221221
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
222222

223223
let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)];
224-
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
225-
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 5)));
224+
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
225+
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 5));
226226
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
227227
}

0 commit comments

Comments
 (0)