Skip to content

Commit 493f610

Browse files
committed
diff: build left-right index map without using (word, occurrence) hash keys
We can assign a unique integer to each (word, occurrence) pair instead. As a bonus, HashMap can be replaced with Vec. ``` group new old ----- --- --- bench_diff_git_git_read_tree_c 1.00 72.5±3.25µs 1.08 78.5±0.48µs bench_diff_lines/modified/10k 1.00 45.1±1.18ms 1.10 49.8±1.85ms bench_diff_lines/modified/1k 1.00 4.1±0.07ms 1.11 4.5±0.34ms bench_diff_lines/reversed/10k 1.00 19.0±0.12ms 1.12 21.2±1.26ms bench_diff_lines/reversed/1k 1.00 558.5±37.42µs 1.17 655.6±16.27µs bench_diff_lines/unchanged/10k 1.00 5.3±0.78ms 1.33 7.0±0.89ms bench_diff_lines/unchanged/1k 1.00 422.0±16.68µs 1.28 540.7±13.96µs ```
1 parent f38c59f commit 493f610

File tree

1 file changed

+25
-25
lines changed

1 file changed

+25
-25
lines changed

lib/src/diff.rs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -262,30 +262,30 @@ fn unchanged_ranges_lcs(
262262
return vec![];
263263
};
264264

265-
// [(index into left_ranges, word, occurrence #)]
266-
let mut left_positions = vec![];
267-
let mut right_positions = vec![];
268-
for uncommon_shared_word in uncommon_shared_words {
269-
let left_occurrences = &left_histogram.word_to_positions[uncommon_shared_word];
270-
let right_occurrences = &right_histogram.word_to_positions[uncommon_shared_word];
271-
assert_eq!(left_occurrences.len(), right_occurrences.len());
272-
for (occurrence, (&left_pos, &right_pos)) in
273-
iter::zip(left_occurrences, right_occurrences).enumerate()
274-
{
275-
left_positions.push((left_pos, uncommon_shared_word, occurrence));
276-
right_positions.push((right_pos, uncommon_shared_word, occurrence));
265+
// [(index into ranges, serial to identify {word, occurrence #})]
266+
let (mut left_positions, mut right_positions): (Vec<_>, Vec<_>) = uncommon_shared_words
267+
.iter()
268+
.flat_map(|word| {
269+
let left_occurrences = &left_histogram.word_to_positions[word];
270+
let right_occurrences = &right_histogram.word_to_positions[word];
271+
assert_eq!(left_occurrences.len(), right_occurrences.len());
272+
iter::zip(left_occurrences, right_occurrences)
273+
})
274+
.enumerate()
275+
.map(|(serial, (&left_pos, &right_pos))| ((left_pos, serial), (right_pos, serial)))
276+
.unzip();
277+
left_positions.sort_unstable_by_key(|&(pos, _serial)| pos);
278+
right_positions.sort_unstable_by_key(|&(pos, _serial)| pos);
279+
let left_index_by_right_index: Vec<usize> = {
280+
let mut left_index_map = vec![0; left_positions.len()];
281+
for (i, &(_pos, serial)) in left_positions.iter().enumerate() {
282+
left_index_map[serial] = i;
277283
}
278-
}
279-
left_positions.sort_unstable_by_key(|(pos, _word, _occurence)| *pos);
280-
right_positions.sort_unstable_by_key(|(pos, _word, _occurence)| *pos);
281-
let mut left_position_map = HashMap::new();
282-
for (i, (_pos, word, occurrence)) in left_positions.iter().enumerate() {
283-
left_position_map.insert((*word, *occurrence), i);
284-
}
285-
let mut left_index_by_right_index = vec![];
286-
for (_pos, word, occurrence) in &right_positions {
287-
left_index_by_right_index.push(*left_position_map.get(&(*word, *occurrence)).unwrap());
288-
}
284+
right_positions
285+
.iter()
286+
.map(|&(_pos, serial)| left_index_map[serial])
287+
.collect()
288+
};
289289

290290
let lcs = find_lcs(&left_index_by_right_index);
291291

@@ -295,8 +295,8 @@ fn unchanged_ranges_lcs(
295295
let mut previous_left_position = WordPosition(0);
296296
let mut previous_right_position = WordPosition(0);
297297
for (left_index, right_index) in lcs {
298-
let left_position = left_positions[left_index].0;
299-
let right_position = right_positions[right_index].0;
298+
let (left_position, _) = left_positions[left_index];
299+
let (right_position, _) = right_positions[right_index];
300300
let skipped_left_positions = previous_left_position..left_position;
301301
let skipped_right_positions = previous_right_position..right_position;
302302
if !skipped_left_positions.is_empty() || !skipped_right_positions.is_empty() {

0 commit comments

Comments
 (0)