Skip to content

Commit 9c7f505

Browse files
authored
Merge pull request #54 from Alokzh/fix-ci
Fixed CTArray2D implementation for making CI pass
2 parents 1779e63 + ac8daa0 commit 9c7f505

7 files changed

+173
-169
lines changed

src/AI-EditDistances-Tests/AIDamerauLevenshteinDistanceTest.class.st

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,28 @@ AIDamerauLevenshteinDistanceTest >> testDistanceToUsingAIDamerauLevenshteinDista
100100

101101
{ #category : 'tests' }
102102
AIDamerauLevenshteinDistanceTest >> testFillFirstTwoRowsAndColumns [
103+
103104
| max result |
104-
damerauLevenshtein distanceMatrix: (CTArray2D rows: 5 columns: 5).
105+
damerauLevenshtein distanceMatrix: (CTArray2D width: 5 height: 5).
105106
max := 10.
106-
result := CTArray2D rows: 5 columns: 5 contents:
107-
{ { max . max . max . max . max } .
108-
{ max . 0 . 1 . 2 . 3 } .
109-
{ max . 1 . nil . nil . nil } .
110-
{ max . 2 . nil . nil . nil } .
111-
{ max . 3 . nil . nil . nil } } flattened.
107+
108+
result := CTArray2D width: 5 height: 5.
109+
result atAllPut: nil.
110+
111+
1 to: 5 do: [:i |
112+
result atColumn: 1 atRow: i put: max.
113+
result atColumn: i atRow: 1 put: max.
114+
].
115+
116+
result atColumn: 2 atRow: 2 put: 0.
117+
result atColumn: 3 atRow: 2 put: 1.
118+
result atColumn: 4 atRow: 2 put: 2.
119+
result atColumn: 5 atRow: 2 put: 3.
120+
121+
result atColumn: 2 atRow: 3 put: 1.
122+
result atColumn: 2 atRow: 4 put: 2.
123+
result atColumn: 2 atRow: 5 put: 3.
124+
112125
damerauLevenshtein fillFirstTwoRowsAndColumnsWith: 'AAAAA' and: 'BBBBB'.
113126
self assert: damerauLevenshtein distanceMatrix equals: result
114127
]

src/AI-EditDistances-Tests/AIEuclideanDistanceTest.extension.st

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ AIEuclideanDistanceTest >> testEuclideanDistanceThreeDimensions [
1111

1212
self
1313
assert: (metric distanceBetween: #( -5.1 4 -3.1 ) and: #( 4 5.9 -2.2 ))
14-
closeTo: 9.3397
14+
closeTo: 9.339700209321496
1515
]
1616

1717
{ #category : '*AI-EditDistances-Tests' }
1818
AIEuclideanDistanceTest >> testEuclideanDistanceTwoDimensions [
1919

2020
self
2121
assert: (metric distanceBetween: #( -3.54 7 ) and: #( -11.64 9.9 ))
22-
closeTo: 8.603488.
22+
closeTo: 8.603487664894978.
2323

2424
self
2525
assert: (metric distanceBetween: #( 0 1 ) and: #( 1 0))

src/AI-EditDistances/AICosineSimilarityDistance.class.st

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,3 @@ AICosineSimilarityDistance >> distanceBetween: anArray and: anotherArray [
2222
^ num / (size1 * size2)
2323

2424
]
25-
26-
{ #category : 'api' }
27-
AICosineSimilarityDistance >> distanceBetween: firstCollection and: secondCollection[
28-
| dotProduct normA normB |
29-
firstCollection size = secondCollection size ifFalse: [
30-
self error: 'Collections must have the same length' ].
31-
(firstCollection allSatisfy: [ :x | x isNumber ]) ifFalse: [
32-
self error: 'First collection contains non-numeric elements' ].
33-
(secondCollection allSatisfy: [ :x | x isNumber ]) ifFalse: [
34-
self error: 'Second collection contains non-numeric elements' ].
35-
dotProduct := (firstCollection with: secondCollection collect: [ :a :b | a * b ]) sum.
36-
normA := (firstCollection collect: [ :x | x * x ]) sum sqrt.
37-
normB := (secondCollection collect: [ :x | x * x ]) sum sqrt.
38-
(normA = 0 and: [ normB = 0 ])
39-
ifTrue: [ ^ 1.0 ] "Zero vectors are considered identical."
40-
ifFalse: [ ^ dotProduct / (normA * normB) ]
41-
]

src/AI-EditDistances/AIDamerauLevenshteinDistance.class.st

Lines changed: 70 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ Class {
2222

2323
{ #category : 'private' }
2424
AIDamerauLevenshteinDistance >> calculateMinValue: cost at: i at: j using: lastMatchingRow and: lastMatchColumn [
25-
2625
"Here we calculate the value of all the possible operations we can do (addition, deletion, substitution and transposition).
2726
2827
- Each operation costs 1.
@@ -41,74 +40,71 @@ AIDamerauLevenshteinDistance >> calculateMinValue: cost at: i at: j using: lastM
4140
- We make sure our lastMatchingRow and lastMatchColumn variables are greater than 1 so when calculating the value of this we don't get a (0,0) cell - since it does not exist."
4241

4342
| addition deletion substitution transposition minValue upperCell leftCell upperLeftCell |
44-
45-
upperCell := distanceMatrix at: i at: j - 1.
46-
addition := upperCell + 1.
47-
48-
leftCell := distanceMatrix at: i - 1 at: j.
49-
deletion := leftCell + 1.
50-
51-
upperLeftCell := distanceMatrix at: i - 1 at: j - 1.
52-
substitution := upperLeftCell + cost.
53-
54-
transposition := (lastMatchingRow > 1 and: [ lastMatchColumn > 1 ])
55-
ifTrue: [ (distanceMatrix at: lastMatchingRow - 1 at: lastMatchColumn - 1)
56-
+ (i - lastMatchingRow - 1)
57-
+ 1
58-
+ (j - lastMatchColumn - 1) ]
59-
ifFalse: [ distanceMatrix at: 1 at: 1 ].
60-
61-
minValue := { addition . deletion . substitution . transposition } min.
62-
63-
distanceMatrix at: i at: j put: minValue
43+
44+
upperCell := distanceMatrix atColumn: j atRow: i - 1.
45+
addition := upperCell + 1.
46+
47+
leftCell := distanceMatrix atColumn: j - 1 atRow: i.
48+
deletion := leftCell + 1.
49+
50+
upperLeftCell := distanceMatrix atColumn: j - 1 atRow: i - 1.
51+
substitution := upperLeftCell + cost.
52+
53+
transposition := Float infinity.
54+
(lastMatchingRow > 1 and: [ lastMatchColumn > 1 ])
55+
ifTrue: [
56+
transposition := (distanceMatrix atColumn: lastMatchColumn - 1 atRow: lastMatchingRow - 1)
57+
+ (i - lastMatchingRow - 1)
58+
+ 1
59+
+ (j - lastMatchColumn - 1) ].
60+
61+
minValue := { addition . deletion . substitution . transposition } min.
62+
distanceMatrix atColumn: j atRow: i put: minValue
6463
]
6564

6665
{ #category : 'private' }
6766
AIDamerauLevenshteinDistance >> damerauLevenshteinAlgorithmFor: firstString and: secondString [
68-
6967
" Here we implement the Damerau-Levenshtein algorithm.
7068
7169
- LastMatchingRow indicates the last row with the current column's character.
7270
- LastMatchColumn indicates the last column in this row where the characters matched.
7371
(Reminder : column's characters belong to the second string and row's characters to the first string) "
7472

75-
| cost lastMatchColumn secondStringCurrentCharacter firstStringCurrentCharacter lastMatchingRow rowCharactersWithIndexes |
76-
77-
self initializeDistanceMatrixWith: firstString and: secondString.
78-
79-
rowCharactersWithIndexes := Dictionary new.
80-
81-
3 to: distanceMatrix rowCount do: [ :i |
82-
83-
firstStringCurrentCharacter := firstString at: i - 2.
84-
lastMatchColumn := 1.
85-
86-
3 to: distanceMatrix columnCount do: [ :j |
87-
88-
secondStringCurrentCharacter := secondString at: j - 2.
89-
lastMatchingRow := rowCharactersWithIndexes at: secondStringCurrentCharacter ifAbsent: 1.
90-
91-
cost := secondStringCurrentCharacter = firstStringCurrentCharacter
92-
ifTrue: [ 0 ]
93-
ifFalse: [ 1 ].
94-
cost = 0 ifTrue: [ lastMatchColumn := j ].
95-
96-
self calculateMinValue: cost at: i at: j using: lastMatchingRow and: lastMatchColumn ].
97-
98-
rowCharactersWithIndexes at: firstStringCurrentCharacter put: i ]
73+
| cost lastMatchColumn secondStringChar firstStringChar lastMatchingRow charPositions |
74+
75+
self initializeDistanceMatrixWith: firstString and: secondString.
76+
charPositions := Dictionary new.
77+
78+
3 to: distanceMatrix height do: [ :i |
79+
firstStringChar := firstString at: i - 2.
80+
lastMatchColumn := 1.
81+
82+
3 to: distanceMatrix width do: [ :j |
83+
secondStringChar := secondString at: j - 2.
84+
lastMatchingRow := charPositions at: secondStringChar ifAbsent: 1.
85+
86+
cost := secondStringChar = firstStringChar
87+
ifTrue: [ 0 ]
88+
ifFalse: [ 1 ].
89+
90+
cost = 0 ifTrue: [ lastMatchColumn := j ].
91+
92+
self calculateMinValue: cost at: i at: j using: lastMatchingRow and: lastMatchColumn ].
93+
94+
charPositions at: firstStringChar put: i ]
9995
]
10096

10197
{ #category : 'api' }
10298
AIDamerauLevenshteinDistance >> distanceBetween: firstString and: secondString [
10399

104100
firstString isEmpty ifTrue: [ ^ secondString size ].
105-
secondString isEmpty ifTrue: [ ^ firstString size ].
106-
107-
self damerauLevenshteinAlgorithmFor:firstString and: secondString.
108-
109-
^ distanceMatrix
110-
at: distanceMatrix rowCount
111-
at: distanceMatrix columnCount
101+
secondString isEmpty ifTrue: [ ^ firstString size ].
102+
103+
self damerauLevenshteinAlgorithmFor: firstString and: secondString.
104+
105+
^ distanceMatrix
106+
atColumn: distanceMatrix width
107+
atRow: distanceMatrix height
112108
]
113109

114110
{ #category : 'accessing' }
@@ -123,31 +119,33 @@ AIDamerauLevenshteinDistance >> distanceMatrix: aCollection [
123119

124120
{ #category : 'private' }
125121
AIDamerauLevenshteinDistance >> fillFirstTwoRowsAndColumnsWith: firstString and: secondString [
126-
127122
"It fills the first row and column with the maxDistance value and the second row and column with values starting with 0"
128123

129124
| maxDistance |
130-
maxDistance := firstString size + secondString size.
131-
132-
1 to: distanceMatrix rowCount do: [ :i |
133-
distanceMatrix at: i at: 1 put: maxDistance ].
134-
135-
1 to: distanceMatrix columnCount do: [ :j |
136-
distanceMatrix at: 1 at: j put: maxDistance ].
137-
138-
2 to: distanceMatrix rowCount do: [ :i |
139-
distanceMatrix at: i at: 2 put: i - 2 ].
140-
141-
2 to: distanceMatrix columnCount do: [ :j |
142-
distanceMatrix at: 2 at: j put: j - 2 ]
125+
maxDistance := firstString size + secondString size.
126+
127+
1 to: distanceMatrix height do: [ :i |
128+
distanceMatrix atColumn: 1 atRow: i put: maxDistance ].
129+
130+
1 to: distanceMatrix width do: [ :j |
131+
distanceMatrix atColumn: j atRow: 1 put: maxDistance ].
132+
133+
2 to: distanceMatrix height do: [ :i |
134+
distanceMatrix atColumn: 2 atRow: i put: i - 2 ].
135+
136+
2 to: distanceMatrix width do: [ :j |
137+
distanceMatrix atColumn: j atRow: 2 put: j - 2 ].
138+
139+
distanceMatrix atColumn: 2 atRow: 2 put: 0.
143140
]
144141

145142
{ #category : 'private' }
146143
AIDamerauLevenshteinDistance >> initializeDistanceMatrixWith: firstString and: secondString [
147144

148-
distanceMatrix := CTArray2D
149-
rows: firstString size + 2
150-
columns: secondString size + 2.
151-
145+
distanceMatrix := CTArray2D
146+
width: secondString size + 2
147+
height: firstString size + 2.
148+
149+
distanceMatrix atAllPut: 0.
152150
self fillFirstTwoRowsAndColumnsWith: firstString and: secondString
153-
]
151+
]

src/AI-EditDistances/AIEpisodeDistance.class.st

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,24 @@ AIEpisodeDistance >> longestCommonSubsequenceLength: firstString and: secondStri
4848
| m n dp |
4949
m := firstString size.
5050
n := secondString size.
51-
52-
dp := CTArray2D rows: m + 1 columns: n + 1.
53-
1 to: m + 1 do: [ :i | dp at: i at: 1 put: 0 ].
54-
1 to: n + 1 do: [ :j | dp at: 1 at: j put: 0 ].
55-
51+
52+
dp := CTArray2D width: n + 1 height: m + 1.
53+
54+
1 to: m + 1 do: [ :i | dp atColumn: 1 atRow: i put: 0 ].
55+
1 to: n + 1 do: [ :j | dp atColumn: j atRow: 1 put: 0 ].
56+
5657
1 to: m do: [ :i |
5758
1 to: n do: [ :j |
5859
(firstString at: i) = (secondString at: j)
59-
ifTrue: [ dp at: i + 1 at: j + 1 put: ((dp at: i at: j) + 1) ]
60-
ifFalse: [ dp at: i + 1 at: j + 1 put: ((dp at: i + 1 at: j) max: (dp at: i at: j + 1)) ]
61-
]
62-
].
63-
64-
^ dp at: m + 1 at: n + 1
60+
ifTrue: [
61+
dp atColumn: j + 1 atRow: i + 1 put: (dp atColumn: j atRow: i) + 1 ]
62+
ifFalse: [
63+
dp
64+
atColumn: j + 1
65+
atRow: i + 1
66+
put:
67+
((dp atColumn: j atRow: i + 1) max:
68+
(dp atColumn: j + 1 atRow: i)) ] ] ].
69+
70+
^ dp atColumn: n + 1 atRow: m + 1
6571
]

src/AI-EditDistances/AILevenshteinDistance.class.st

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,27 @@ Class {
1515
AILevenshteinDistance >> distanceBetween: firstString and: secondString [
1616

1717
| distanceMatrix |
18-
1918
"If one of the strings is empty, return the other string's size"
2019
firstString isEmpty ifTrue: [ ^ secondString size ].
2120
secondString isEmpty ifTrue: [ ^ firstString size ].
22-
23-
distanceMatrix := self distanceMatrixBasedOn: firstString and: secondString.
21+
22+
distanceMatrix := self
23+
distanceMatrixBasedOn: firstString
24+
and: secondString.
2425

2526
^ distanceMatrix
26-
at: distanceMatrix numberOfRows
27-
at: distanceMatrix numberOfColumns
27+
atColumn: distanceMatrix width
28+
atRow: distanceMatrix height
2829
]
2930

3031
{ #category : 'private' }
3132
AILevenshteinDistance >> distanceMatrixBasedOn: firstString and: secondString [
3233

33-
| distanceMatrix |
34+
| distanceMatrix |
3435
distanceMatrix := CTArray2D
35-
rows: secondString size + 1
36-
columns: firstString size + 1.
37-
36+
width: firstString size + 1
37+
height: secondString size + 1.
38+
3839
self fillFirstRowAndColumn: distanceMatrix.
3940

4041
self fillStartingFromSecondRowAndColumn: distanceMatrix
@@ -48,30 +49,31 @@ AILevenshteinDistance >> distanceMatrixBasedOn: firstString and: secondString [
4849
AILevenshteinDistance >> fillCellInMatrix: aMatrix at: i at: j basedOn: firstString and: secondString [
4950

5051
| cost leftCell upperCell upperLeftCell |
51-
5252
"Setting the cost"
5353
cost := (firstString at: j - 1) = (secondString at: i - 1)
54-
ifTrue: [ 0 ]
55-
ifFalse: [ 1 ].
56-
57-
leftCell := (aMatrix at: i at: j - 1) + 1.
58-
upperCell := (aMatrix at: i - 1 at: j) + 1.
59-
upperLeftCell := (aMatrix at: i - 1 at: j - 1) + cost.
60-
54+
ifTrue: [ 0 ]
55+
ifFalse: [ 1 ].
56+
57+
leftCell := (aMatrix atColumn: j - 1 atRow: i) + 1.
58+
upperCell := (aMatrix atColumn: j atRow: i - 1) + 1.
59+
upperLeftCell := (aMatrix atColumn: j - 1 atRow: i - 1) + cost.
60+
6161
"Calculate the min between the left, upper-left, and upper case of our current case"
62-
aMatrix at: i at: j put:
63-
{ leftCell . upperCell . upperLeftCell } min.
62+
aMatrix atColumn: j atRow: i put: {
63+
leftCell.
64+
upperCell.
65+
upperLeftCell } min
6466
]
6567

6668
{ #category : 'private' }
6769
AILevenshteinDistance >> fillFirstRowAndColumn: aMatrix [
68-
6970
"Fill the first row and column starting with 0"
70-
1 to: aMatrix rowCount do: [ :i |
71-
aMatrix at: i at: 1 put: i - 1 ].
71+
72+
1 to: aMatrix height do: [ :i |
73+
aMatrix atColumn: 1 atRow: i put: i - 1 ].
7274

73-
1 to: aMatrix columnCount do: [ :j |
74-
aMatrix at: 1 at: j put: j - 1 ].
75+
1 to: aMatrix width do: [ :j |
76+
aMatrix atColumn: j atRow: 1 put: j - 1 ].
7577
]
7678

7779
{ #category : 'private' }
@@ -85,4 +87,4 @@ AILevenshteinDistance >> fillStartingFromSecondRowAndColumn: aMatrix basedOn: fi
8587
at: j
8688
basedOn: firstString
8789
and: secondString ] ].
88-
]
90+
]

0 commit comments

Comments
 (0)