Skip to content

Commit 008e768

Browse files
authored
feat: Add levenshtein distance implementation (#7)
1 parent c8e1f60 commit 008e768

File tree

3 files changed

+98
-0
lines changed

3 files changed

+98
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package com.haroldadmin.lucilla.benchmarks
2+
3+
import com.haroldadmin.lucilla.core.rank.ld
4+
import org.openjdk.jmh.annotations.Benchmark
5+
import org.openjdk.jmh.annotations.Scope
6+
import org.openjdk.jmh.annotations.State
7+
import org.openjdk.jmh.infra.Blackhole
8+
9+
@State(Scope.Benchmark)
10+
class LevenshteinDistanceBenchmark {
11+
@Benchmark
12+
fun ldBenchmark(blackHole: Blackhole) {
13+
blackHole.consume(ld("levenshtein", "edit"))
14+
}
15+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.haroldadmin.lucilla.core.rank
2+
3+
/**
4+
* Calculates the Edit Distance (Levenshtein Distance) between
5+
* the given Strings.
6+
*
7+
* @param a The first string
8+
* @param b The second string
9+
* @param deletionCost The cost of a deletion operation
10+
* @param insertionCost The cost of an insertion operation
11+
* @param replacementCost The cost of a replacement operation
12+
*/
13+
public fun ld(
14+
a: String,
15+
b: String,
16+
deletionCost: Int = 1,
17+
insertionCost: Int = 1,
18+
replacementCost: Int = 1,
19+
): Int {
20+
if (a.isEmpty()) {
21+
return b.length
22+
}
23+
24+
if (b.isEmpty()) {
25+
return a.length
26+
}
27+
28+
val dpTable: Array<IntArray> = Array(a.length + 1) {
29+
IntArray(b.length + 1)
30+
}
31+
32+
for (i in 1..a.length) {
33+
dpTable[i][0] = i
34+
}
35+
for (i in 1..b.length) {
36+
dpTable[0][i] = i
37+
}
38+
39+
for (i in 1..a.length) {
40+
val aChar = a[i - 1]
41+
for (j in 1..b.length) {
42+
val bChar = b[j - 1]
43+
if (aChar == bChar) {
44+
val editCost = dpTable[i - 1][j - 1]
45+
dpTable[i][j] = editCost
46+
continue
47+
}
48+
49+
val costWithReplacement = dpTable[i - 1][j - 1] + replacementCost
50+
val costWithDeletion = dpTable[i][j - 1] + deletionCost
51+
val costWithInsertion = dpTable[i - 1][j] + insertionCost
52+
dpTable[i][j] = minOf(costWithReplacement, costWithDeletion, costWithInsertion)
53+
}
54+
}
55+
56+
return dpTable[a.length][b.length]
57+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.haroldadmin.lucilla.core.rank
2+
3+
import io.kotest.core.spec.style.DescribeSpec
4+
import io.kotest.matchers.shouldBe
5+
6+
class LevenshteinDistanceTest : DescribeSpec({
7+
it("should return 0 if the strings are empty") {
8+
val distance = ld("", "")
9+
distance shouldBe 0
10+
}
11+
12+
it("should return 0 if the strings are the same") {
13+
val distance = ld("foo", "foo")
14+
distance shouldBe 0
15+
}
16+
17+
it("should return the correct distance when one of the strings is empty") {
18+
val distance = ld("foo", "")
19+
distance shouldBe 3
20+
}
21+
22+
it("should return the correct distance when both strings are non-empty") {
23+
val distance = ld("ephrem", "benyam")
24+
distance shouldBe 5
25+
}
26+
})

0 commit comments

Comments
 (0)