@@ -22,7 +22,6 @@ Class {
2222
2323{ #category : ' private' }
2424AIDamerauLevenshteinDistance >> calculateMinValue: cost at: i at: j using: lastMatchingRow and : lastMatchColumn [
25-
2625 " Here we calculate the value of all the possible operations we can do (addition, deletion, substitution and transposition).
2726
2827 - Each operation costs 1.
@@ -41,74 +40,71 @@ AIDamerauLevenshteinDistance >> calculateMinValue: cost at: i at: j using: lastM
4140 - We make sure our lastMatchingRow and lastMatchColumn variables are greater than 1 so when calculating the value of this we don't get a (0,0) cell - since it does not exist."
4241
4342 | addition deletion substitution transposition minValue upperCell leftCell upperLeftCell |
44-
45- upperCell := distanceMatrix at: i at: j - 1 .
46- addition := upperCell + 1 .
47-
48- leftCell := distanceMatrix at: i - 1 at: j .
49- deletion := leftCell + 1 .
50-
51- upperLeftCell := distanceMatrix at: i - 1 at: j - 1 .
52- substitution := upperLeftCell + cost.
53-
54- transposition := (lastMatchingRow > 1 and : [ lastMatchColumn > 1 ])
55- ifTrue: [ (distanceMatrix at: lastMatchingRow - 1 at: lastMatchColumn - 1 )
56- + (i - lastMatchingRow - 1 )
57- + 1
58- + (j - lastMatchColumn - 1 ) ]
59- ifFalse: [ distanceMatrix at: 1 at: 1 ].
60-
61- minValue := { addition . deletion . substitution . transposition } min.
62-
63- distanceMatrix at: i at: j put: minValue
43+
44+ upperCell := distanceMatrix atColumn: j atRow: i - 1 .
45+ addition := upperCell + 1 .
46+
47+ leftCell := distanceMatrix atColumn: j - 1 atRow: i .
48+ deletion := leftCell + 1 .
49+
50+ upperLeftCell := distanceMatrix atColumn: j - 1 atRow: i - 1 .
51+ substitution := upperLeftCell + cost.
52+
53+ transposition := Float infinity.
54+ ( lastMatchingRow > 1 and : [ lastMatchColumn > 1 ] )
55+ ifTrue: [
56+ transposition := (distanceMatrix atColumn: lastMatchColumn - 1 atRow: lastMatchingRow - 1 )
57+ + (i - lastMatchingRow - 1 )
58+ + 1
59+ + (j - lastMatchColumn - 1 ) ].
60+
61+ minValue := { addition . deletion . substitution . transposition } min.
62+ distanceMatrix atColumn: j atRow: i put: minValue
6463]
6564
6665{ #category : ' private' }
6766AIDamerauLevenshteinDistance >> damerauLevenshteinAlgorithmFor: firstString and : secondString [
68-
6967 " Here we implement the Damerau-Levenshtein algorithm.
7068
7169 - LastMatchingRow indicates the last row with the current column's character.
7270 - LastMatchColumn indicates the last column in this row where the characters matched.
7371 (Reminder : column's characters belong to the second string and row's characters to the first string) "
7472
75- | cost lastMatchColumn secondStringCurrentCharacter firstStringCurrentCharacter lastMatchingRow rowCharactersWithIndexes |
76-
77- self initializeDistanceMatrixWith: firstString and : secondString.
78-
79- rowCharactersWithIndexes := Dictionary new .
80-
81- 3 to: distanceMatrix rowCount do: [ :i |
82-
83- firstStringCurrentCharacter := firstString at: i - 2 .
84- lastMatchColumn := 1 .
85-
86- 3 to: distanceMatrix columnCount do: [ :j |
87-
88- secondStringCurrentCharacter := secondString at: j - 2 .
89- lastMatchingRow := rowCharactersWithIndexes at: secondStringCurrentCharacter ifAbsent: 1 .
90-
91- cost := secondStringCurrentCharacter = firstStringCurrentCharacter
92- ifTrue: [ 0 ]
93- ifFalse: [ 1 ].
94- cost = 0 ifTrue: [ lastMatchColumn := j ].
95-
96- self calculateMinValue: cost at: i at: j using: lastMatchingRow and : lastMatchColumn ].
97-
98- rowCharactersWithIndexes at: firstStringCurrentCharacter put: i ]
73+ | cost lastMatchColumn secondStringChar firstStringChar lastMatchingRow charPositions |
74+
75+ self initializeDistanceMatrixWith: firstString and : secondString.
76+ charPositions := Dictionary new .
77+
78+ 3 to: distanceMatrix height do: [ :i |
79+ firstStringChar := firstString at: i - 2 .
80+ lastMatchColumn := 1 .
81+
82+ 3 to: distanceMatrix width do: [ :j |
83+ secondStringChar := secondString at: j - 2 .
84+ lastMatchingRow := charPositions at: secondStringChar ifAbsent: 1 .
85+
86+ cost := secondStringChar = firstStringChar
87+ ifTrue: [ 0 ]
88+ ifFalse: [ 1 ].
89+
90+ cost = 0 ifTrue: [ lastMatchColumn := j ].
91+
92+ self calculateMinValue: cost at: i at: j using: lastMatchingRow and : lastMatchColumn ].
93+
94+ charPositions at: firstStringChar put: i ]
9995]
10096
10197{ #category : ' api' }
10298AIDamerauLevenshteinDistance >> distanceBetween: firstString and : secondString [
10399
104100 firstString isEmpty ifTrue: [ ^ secondString size ].
105- secondString isEmpty ifTrue: [ ^ firstString size ].
106-
107- self damerauLevenshteinAlgorithmFor: firstString and : secondString.
108-
109- ^ distanceMatrix
110- at : distanceMatrix rowCount
111- at : distanceMatrix columnCount
101+ secondString isEmpty ifTrue: [ ^ firstString size ].
102+
103+ self damerauLevenshteinAlgorithmFor: firstString and : secondString.
104+
105+ ^ distanceMatrix
106+ atColumn : distanceMatrix width
107+ atRow : distanceMatrix height
112108]
113109
114110{ #category : ' accessing' }
@@ -123,31 +119,33 @@ AIDamerauLevenshteinDistance >> distanceMatrix: aCollection [
123119
124120{ #category : ' private' }
125121AIDamerauLevenshteinDistance >> fillFirstTwoRowsAndColumnsWith: firstString and : secondString [
126-
127122 " It fills the first row and column with the maxDistance value and the second row and column with values starting with 0"
128123
129124 | maxDistance |
130- maxDistance := firstString size + secondString size.
131-
132- 1 to: distanceMatrix rowCount do: [ :i |
133- distanceMatrix at: i at: 1 put: maxDistance ].
134-
135- 1 to: distanceMatrix columnCount do: [ :j |
136- distanceMatrix at: 1 at: j put: maxDistance ].
137-
138- 2 to: distanceMatrix rowCount do: [ :i |
139- distanceMatrix at: i at: 2 put: i - 2 ].
140-
141- 2 to: distanceMatrix columnCount do: [ :j |
142- distanceMatrix at: 2 at: j put: j - 2 ]
125+ maxDistance := firstString size + secondString size.
126+
127+ 1 to: distanceMatrix height do: [ :i |
128+ distanceMatrix atColumn: 1 atRow: i put: maxDistance ].
129+
130+ 1 to: distanceMatrix width do: [ :j |
131+ distanceMatrix atColumn: j atRow: 1 put: maxDistance ].
132+
133+ 2 to: distanceMatrix height do: [ :i |
134+ distanceMatrix atColumn: 2 atRow: i put: i - 2 ].
135+
136+ 2 to: distanceMatrix width do: [ :j |
137+ distanceMatrix atColumn: j atRow: 2 put: j - 2 ].
138+
139+ distanceMatrix atColumn: 2 atRow: 2 put: 0 .
143140]
144141
145142{ #category : ' private' }
146143AIDamerauLevenshteinDistance >> initializeDistanceMatrixWith: firstString and : secondString [
147144
148- distanceMatrix := CTArray2D
149- rows: firstString size + 2
150- columns: secondString size + 2 .
151-
145+ distanceMatrix := CTArray2D
146+ width: secondString size + 2
147+ height: firstString size + 2 .
148+
149+ distanceMatrix atAllPut: 0 .
152150 self fillFirstTwoRowsAndColumnsWith: firstString and : secondString
153- ]
151+ ]
0 commit comments