@@ -122,12 +122,13 @@ impl<'input> Histogram<'input> {
122
122
Histogram { word_to_positions }
123
123
}
124
124
125
- fn build_count_to_words ( & self ) -> BTreeMap < usize , Vec < & ' input BStr > > {
126
- let mut count_to_words: BTreeMap < usize , Vec < & BStr > > = BTreeMap :: new ( ) ;
127
- for ( word, ranges) in & self . word_to_positions {
128
- count_to_words. entry ( ranges. len ( ) ) . or_default ( ) . push ( word) ;
125
+ fn build_count_to_entries ( & self ) -> BTreeMap < usize , Vec < ( & ' input BStr , & Vec < WordPosition > ) > > {
126
+ let mut count_to_entries: BTreeMap < usize , Vec < _ > > = BTreeMap :: new ( ) ;
127
+ for ( word, positions) in & self . word_to_positions {
128
+ let entries = count_to_entries. entry ( positions. len ( ) ) . or_default ( ) ;
129
+ entries. push ( ( * word, positions) ) ;
129
130
}
130
- count_to_words
131
+ count_to_entries
131
132
}
132
133
}
133
134
@@ -233,47 +234,38 @@ fn unchanged_ranges_lcs(
233
234
) -> Vec < ( Range < usize > , Range < usize > ) > {
234
235
let max_occurrences = 100 ;
235
236
let left_histogram = Histogram :: calculate ( left, max_occurrences) ;
236
- let left_count_to_words = left_histogram. build_count_to_words ( ) ;
237
- if * left_count_to_words . keys ( ) . next ( ) . unwrap ( ) > max_occurrences {
237
+ let left_count_to_entries = left_histogram. build_count_to_entries ( ) ;
238
+ if * left_count_to_entries . keys ( ) . next ( ) . unwrap ( ) > max_occurrences {
238
239
// If there are very many occurrences of all words, then we just give up.
239
240
return vec ! [ ] ;
240
241
}
241
242
let right_histogram = Histogram :: calculate ( right, max_occurrences) ;
242
243
// Look for words with few occurrences in `left` (could equally well have picked
243
244
// `right`?). If any of them also occur in `right`, then we add the words to
244
245
// the LCS.
245
- let Some ( uncommon_shared_words) = left_count_to_words
246
- . iter ( )
247
- . map ( |( left_count, left_words) | -> Vec < & BStr > {
248
- left_words
246
+ let Some ( uncommon_shared_word_positions) =
247
+ left_count_to_entries. values ( ) . find_map ( |left_entries| {
248
+ let mut both_positions = left_entries
249
249
. iter ( )
250
- . copied ( )
251
- . filter ( |left_word| {
252
- let right_count = right_histogram
253
- . word_to_positions
254
- . get ( left_word)
255
- . map_or ( 0 , |right_positions| right_positions. len ( ) ) ;
256
- * left_count == right_count
250
+ . filter_map ( |& ( word, left_positions) | {
251
+ let right_positions = right_histogram. word_to_positions . get ( word) ?;
252
+ ( left_positions. len ( ) == right_positions. len ( ) )
253
+ . then_some ( ( left_positions, right_positions) )
257
254
} )
258
- . collect ( )
255
+ . peekable ( ) ;
256
+ both_positions. peek ( ) . is_some ( ) . then_some ( both_positions)
259
257
} )
260
- . find ( |words| !words. is_empty ( ) )
261
258
else {
262
259
return vec ! [ ] ;
263
260
} ;
264
261
265
262
// [(index into ranges, serial to identify {word, occurrence #})]
266
- let ( mut left_positions, mut right_positions) : ( Vec < _ > , Vec < _ > ) = uncommon_shared_words
267
- . iter ( )
268
- . flat_map ( |word| {
269
- let left_occurrences = & left_histogram. word_to_positions [ word] ;
270
- let right_occurrences = & right_histogram. word_to_positions [ word] ;
271
- assert_eq ! ( left_occurrences. len( ) , right_occurrences. len( ) ) ;
272
- iter:: zip ( left_occurrences, right_occurrences)
273
- } )
274
- . enumerate ( )
275
- . map ( |( serial, ( & left_pos, & right_pos) ) | ( ( left_pos, serial) , ( right_pos, serial) ) )
276
- . unzip ( ) ;
263
+ let ( mut left_positions, mut right_positions) : ( Vec < _ > , Vec < _ > ) =
264
+ uncommon_shared_word_positions
265
+ . flat_map ( |( lefts, rights) | iter:: zip ( lefts, rights) )
266
+ . enumerate ( )
267
+ . map ( |( serial, ( & left_pos, & right_pos) ) | ( ( left_pos, serial) , ( right_pos, serial) ) )
268
+ . unzip ( ) ;
277
269
left_positions. sort_unstable_by_key ( |& ( pos, _serial) | pos) ;
278
270
right_positions. sort_unstable_by_key ( |& ( pos, _serial) | pos) ;
279
271
let left_index_by_right_index: Vec < usize > = {
0 commit comments