Skip to content

Commit 5c52b4e

Browse files
committed
diff: omit construction of count-to-words map for right-side histogram
This also allows us to borrow Vec<WordPositions> from &self.
1 parent 493f610 commit 5c52b4e

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

lib/src/diff.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,12 @@ impl<'input, 'aux> DiffSource<'input, 'aux> {
103103
}
104104
}
105105

106-
struct Histogram<'a> {
107-
word_to_positions: HashMap<&'a BStr, Vec<WordPosition>>,
108-
count_to_words: BTreeMap<usize, Vec<&'a BStr>>,
106+
struct Histogram<'input> {
107+
word_to_positions: HashMap<&'input BStr, Vec<WordPosition>>,
109108
}
110109

111-
impl Histogram<'_> {
112-
fn calculate<'a>(source: &DiffSource<'a, '_>, max_occurrences: usize) -> Histogram<'a> {
110+
impl<'input> Histogram<'input> {
111+
fn calculate(source: &DiffSource<'input, '_>, max_occurrences: usize) -> Self {
113112
let mut word_to_positions: HashMap<&BStr, Vec<WordPosition>> = HashMap::new();
114113
for (i, range) in source.ranges.iter().enumerate() {
115114
let word = &source.text[range.clone()];
@@ -120,14 +119,15 @@ impl Histogram<'_> {
120119
positions.push(WordPosition(i));
121120
}
122121
}
122+
Histogram { word_to_positions }
123+
}
124+
125+
fn build_count_to_words(&self) -> BTreeMap<usize, Vec<&'input BStr>> {
123126
let mut count_to_words: BTreeMap<usize, Vec<&BStr>> = BTreeMap::new();
124-
for (word, ranges) in &word_to_positions {
127+
for (word, ranges) in &self.word_to_positions {
125128
count_to_words.entry(ranges.len()).or_default().push(word);
126129
}
127-
Histogram {
128-
word_to_positions,
129-
count_to_words,
130-
}
130+
count_to_words
131131
}
132132
}
133133

@@ -233,16 +233,16 @@ fn unchanged_ranges_lcs(
233233
) -> Vec<(Range<usize>, Range<usize>)> {
234234
let max_occurrences = 100;
235235
let left_histogram = Histogram::calculate(left, max_occurrences);
236-
if *left_histogram.count_to_words.keys().next().unwrap() > max_occurrences {
236+
let left_count_to_words = left_histogram.build_count_to_words();
237+
if *left_count_to_words.keys().next().unwrap() > max_occurrences {
237238
// If there are very many occurrences of all words, then we just give up.
238239
return vec![];
239240
}
240241
let right_histogram = Histogram::calculate(right, max_occurrences);
241242
// Look for words with few occurrences in `left` (could equally well have picked
242243
// `right`?). If any of them also occur in `right`, then we add the words to
243244
// the LCS.
244-
let Some(uncommon_shared_words) = left_histogram
245-
.count_to_words
245+
let Some(uncommon_shared_words) = left_count_to_words
246246
.iter()
247247
.map(|(left_count, left_words)| -> Vec<&BStr> {
248248
left_words

0 commit comments

Comments
 (0)