@@ -10,25 +10,27 @@ def levenshtein
1010 field = params [ :field ] || "name"
1111
1212 by_size = items . group_by { |s | s . name . length }
13- @ pairs = [ ]
13+ pairs = [ ]
1414 ( 0 ..( by_size . keys . size - 1 ) ) . each do |k |
15- interesting_values = [ ]
16- ( ( -[ k , THRESHOLD - 1 ] . min ) ..0 ) . each do |p |
17- interesting_values << by_size [ by_size . keys . sort [ k + p ] ]
18- end
19- ( 0 ..interesting_values . length - 1 ) . each do |o |
20- ( 0 ..( interesting_values [ o ] . length - 1 ) ) . each do |i |
21- d = 0
22- ( ( i +1 ) ..( interesting_values [ interesting_values . length - 1 ] . length - 1 ) ) . each do |j |
23- p1 = interesting_values [ o ] [ i ]
24- p2 = interesting_values [ interesting_values . length - 1 ] [ j ]
25- d = Edits ::Levenshtein . distance_with_max ( p1 [ field ] , p2 [ field ] , THRESHOLD + 1 )
26- @pairs << OpenStruct . new ( item1 : p1 , item2 : p2 , distance : d ) if d < THRESHOLD
15+ interesting_values = [ ]
16+ ( ( -[ k , THRESHOLD - 1 ] . min ) ..0 ) . each do |p |
17+ interesting_values << by_size [ by_size . keys . sort [ k + p ] ]
18+ end
19+ ( 0 ..interesting_values . length - 1 ) . each do |o |
20+ ( 0 ..( interesting_values [ o ] . length - 1 ) ) . each do |i |
21+ d = 0
22+ ( ( i +1 ) ..( interesting_values [ interesting_values . length - 1 ] . length - 1 ) ) . each do |j |
23+ p1 = interesting_values [ o ] [ i ]
24+ p2 = interesting_values [ interesting_values . length - 1 ] [ j ]
25+ d = Edits ::Levenshtein . distance_with_max ( p1 . levenshtein_name , p2 . levenshtein_name , THRESHOLD + 1 )
26+ pairs << OpenStruct . new ( item1 : p1 , item2 : p2 , distance : d ) if d < THRESHOLD
27+ end
2728 end
2829 end
29- end
3030 end
31- @pairs . sort_by! ( &:distance )
31+ pairs . sort_by! ( &:distance )
32+
33+ @pairs = pairs
3234 end
3335
3436 def merge
0 commit comments