Skip to content

Commit 50b1bcd

Browse files
authored
feat: add damerau-levenshtein distance algorithm (#6706)
* feat: add damerau-levenshtein distance algorithm * refactor: remove useless parentheses * refactor: add class link to test
1 parent b031a0b commit 50b1bcd

File tree

2 files changed

+379
-0
lines changed

2 files changed

+379
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
package com.thealgorithms.dynamicprogramming;
2+
3+
import java.util.HashMap;
4+
import java.util.Map;
5+
6+
/**
7+
* Implementation of the full Damerau–Levenshtein distance algorithm.
8+
*
9+
* This algorithm calculates the minimum number of operations required
10+
* to transform one string into another. Supported operations are:
11+
* insertion, deletion, substitution, and transposition of adjacent characters.
12+
*
13+
* Unlike the restricted version (OSA), this implementation allows multiple
14+
* edits on the same substring, computing the true edit distance.
15+
*
16+
* Time Complexity: O(n * m * max(n, m))
17+
* Space Complexity: O(n * m)
18+
*/
19+
public final class DamerauLevenshteinDistance {
20+
21+
private DamerauLevenshteinDistance() {
22+
// Utility class
23+
}
24+
25+
/**
26+
* Computes the full Damerau–Levenshtein distance between two strings.
27+
*
28+
* @param s1 the first string
29+
* @param s2 the second string
30+
* @return the minimum edit distance between the two strings
31+
* @throws IllegalArgumentException if either input string is null
32+
*/
33+
public static int distance(String s1, String s2) {
34+
validateInputs(s1, s2);
35+
36+
int n = s1.length();
37+
int m = s2.length();
38+
39+
Map<Character, Integer> charLastPosition = buildCharacterMap(s1, s2);
40+
int[][] dp = initializeTable(n, m);
41+
42+
fillTable(s1, s2, dp, charLastPosition);
43+
44+
return dp[n + 1][m + 1];
45+
}
46+
47+
/**
48+
* Validates that both input strings are not null.
49+
*
50+
* @param s1 the first string to validate
51+
* @param s2 the second string to validate
52+
* @throws IllegalArgumentException if either string is null
53+
*/
54+
private static void validateInputs(String s1, String s2) {
55+
if (s1 == null || s2 == null) {
56+
throw new IllegalArgumentException("Input strings must not be null.");
57+
}
58+
}
59+
60+
/**
61+
* Builds a character map containing all unique characters from both strings.
62+
* Each character is initialized with a position value of 0.
63+
*
64+
* This map is used to track the last occurrence position of each character
65+
* during the distance computation, which is essential for handling transpositions.
66+
*
67+
* @param s1 the first string
68+
* @param s2 the second string
69+
* @return a map containing all unique characters from both strings, initialized to 0
70+
*/
71+
private static Map<Character, Integer> buildCharacterMap(String s1, String s2) {
72+
Map<Character, Integer> charMap = new HashMap<>();
73+
for (char c : s1.toCharArray()) {
74+
charMap.putIfAbsent(c, 0);
75+
}
76+
for (char c : s2.toCharArray()) {
77+
charMap.putIfAbsent(c, 0);
78+
}
79+
return charMap;
80+
}
81+
82+
/**
83+
* Initializes the dynamic programming table for the algorithm.
84+
*
85+
* The table has dimensions (n+2) x (m+2) where n and m are the lengths
86+
* of the input strings. The extra rows and columns are used to handle
87+
* the transposition operation correctly.
88+
*
89+
* The first row and column are initialized with the maximum possible distance,
90+
* while the second row and column represent the base case of transforming
91+
* from an empty string.
92+
*
93+
* @param n the length of the first string
94+
* @param m the length of the second string
95+
* @return an initialized DP table ready for computation
96+
*/
97+
private static int[][] initializeTable(int n, int m) {
98+
int maxDist = n + m;
99+
int[][] dp = new int[n + 2][m + 2];
100+
101+
dp[0][0] = maxDist;
102+
103+
for (int i = 0; i <= n; i++) {
104+
dp[i + 1][0] = maxDist;
105+
dp[i + 1][1] = i;
106+
}
107+
108+
for (int j = 0; j <= m; j++) {
109+
dp[0][j + 1] = maxDist;
110+
dp[1][j + 1] = j;
111+
}
112+
113+
return dp;
114+
}
115+
116+
/**
117+
* Fills the dynamic programming table by computing the minimum edit distance
118+
* for each substring pair.
119+
*
120+
* This method implements the core algorithm logic, iterating through both strings
121+
* and computing the minimum cost of transforming substrings. It considers all
122+
* four operations: insertion, deletion, substitution, and transposition.
123+
*
124+
* The character position map is updated as we progress through the first string
125+
* to enable efficient transposition cost calculation.
126+
*
127+
* @param s1 the first string
128+
* @param s2 the second string
129+
* @param dp the dynamic programming table to fill
130+
* @param charLastPosition map tracking the last position of each character in s1
131+
*/
132+
private static void fillTable(String s1, String s2, int[][] dp, Map<Character, Integer> charLastPosition) {
133+
int n = s1.length();
134+
int m = s2.length();
135+
136+
for (int i = 1; i <= n; i++) {
137+
int lastMatchCol = 0;
138+
139+
for (int j = 1; j <= m; j++) {
140+
char char1 = s1.charAt(i - 1);
141+
char char2 = s2.charAt(j - 1);
142+
143+
int lastMatchRow = charLastPosition.get(char2);
144+
int cost = (char1 == char2) ? 0 : 1;
145+
146+
if (char1 == char2) {
147+
lastMatchCol = j;
148+
}
149+
150+
dp[i + 1][j + 1] = computeMinimumCost(dp, i, j, lastMatchRow, lastMatchCol, cost);
151+
}
152+
153+
charLastPosition.put(s1.charAt(i - 1), i);
154+
}
155+
}
156+
157+
/**
158+
* Computes the minimum cost among all possible operations at the current position.
159+
*
160+
* This method evaluates four possible operations:
161+
* 1. Substitution: replace character at position i with character at position j
162+
* 2. Insertion: insert character from s2 at position j
163+
* 3. Deletion: delete character from s1 at position i
164+
* 4. Transposition: swap characters that have been seen before
165+
*
166+
* The transposition cost accounts for the gap between the current position
167+
* and the last position where matching characters were found.
168+
*
169+
* @param dp the dynamic programming table
170+
* @param i the current position in the first string (1-indexed in the DP table)
171+
* @param j the current position in the second string (1-indexed in the DP table)
172+
* @param lastMatchRow the row index where the current character of s2 last appeared in s1
173+
* @param lastMatchCol the column index where the current character of s1 last matched in s2
174+
* @param cost the substitution cost (0 if characters match, 1 otherwise)
175+
* @return the minimum cost among all operations
176+
*/
177+
private static int computeMinimumCost(int[][] dp, int i, int j, int lastMatchRow, int lastMatchCol, int cost) {
178+
int substitution = dp[i][j] + cost;
179+
int insertion = dp[i + 1][j] + 1;
180+
int deletion = dp[i][j + 1] + 1;
181+
int transposition = dp[lastMatchRow][lastMatchCol] + i - lastMatchRow - 1 + 1 + j - lastMatchCol - 1;
182+
183+
return Math.min(Math.min(substitution, insertion), Math.min(deletion, transposition));
184+
}
185+
}
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
package com.thealgorithms.dynamicprogramming;
2+
3+
import static org.junit.jupiter.api.Assertions.assertEquals;
4+
import static org.junit.jupiter.api.Assertions.assertThrows;
5+
import static org.junit.jupiter.api.Assertions.assertTrue;
6+
7+
import org.junit.jupiter.api.DisplayName;
8+
import org.junit.jupiter.api.Test;
9+
10+
/**
11+
* Unit tests for the {@code DamerauLevenshteinDistance} class.
12+
* Tests cover edge cases, basic operations, and complex transposition scenarios.
13+
*/
14+
class DamerauLevenshteinDistanceTest {
15+
16+
@Test
17+
@DisplayName("Should throw exception for null first string")
18+
void testNullFirstString() {
19+
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, "test"); });
20+
}
21+
22+
@Test
23+
@DisplayName("Should throw exception for null second string")
24+
void testNullSecondString() {
25+
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance("test", null); });
26+
}
27+
28+
@Test
29+
@DisplayName("Should throw exception for both null strings")
30+
void testBothNullStrings() {
31+
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, null); });
32+
}
33+
34+
@Test
35+
@DisplayName("Should return 0 for identical strings")
36+
void testIdenticalStrings() {
37+
assertEquals(0, DamerauLevenshteinDistance.distance("", ""));
38+
assertEquals(0, DamerauLevenshteinDistance.distance("a", "a"));
39+
assertEquals(0, DamerauLevenshteinDistance.distance("abc", "abc"));
40+
assertEquals(0, DamerauLevenshteinDistance.distance("hello", "hello"));
41+
}
42+
43+
@Test
44+
@DisplayName("Should return length when one string is empty")
45+
void testEmptyStrings() {
46+
assertEquals(3, DamerauLevenshteinDistance.distance("", "abc"));
47+
assertEquals(5, DamerauLevenshteinDistance.distance("hello", ""));
48+
assertEquals(0, DamerauLevenshteinDistance.distance("", ""));
49+
}
50+
51+
@Test
52+
@DisplayName("Should handle single character insertions")
53+
void testSingleInsertion() {
54+
assertEquals(1, DamerauLevenshteinDistance.distance("cat", "cats"));
55+
assertEquals(1, DamerauLevenshteinDistance.distance("ab", "abc"));
56+
assertEquals(1, DamerauLevenshteinDistance.distance("", "a"));
57+
}
58+
59+
@Test
60+
@DisplayName("Should handle single character deletions")
61+
void testSingleDeletion() {
62+
assertEquals(1, DamerauLevenshteinDistance.distance("cats", "cat"));
63+
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "ab"));
64+
assertEquals(1, DamerauLevenshteinDistance.distance("a", ""));
65+
}
66+
67+
@Test
68+
@DisplayName("Should handle single character substitutions")
69+
void testSingleSubstitution() {
70+
assertEquals(1, DamerauLevenshteinDistance.distance("cat", "bat"));
71+
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "adc"));
72+
assertEquals(1, DamerauLevenshteinDistance.distance("x", "y"));
73+
}
74+
75+
@Test
76+
@DisplayName("Should handle adjacent character transpositions")
77+
void testAdjacentTransposition() {
78+
assertEquals(1, DamerauLevenshteinDistance.distance("ab", "ba"));
79+
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "bac"));
80+
assertEquals(1, DamerauLevenshteinDistance.distance("hello", "ehllo"));
81+
}
82+
83+
@Test
84+
@DisplayName("Should correctly compute distance for CA to ABC")
85+
void testCAtoABC() {
86+
// This is the critical test case that differentiates full DL from OSA
87+
// Full DL: 2 (insert A at start, insert B in middle)
88+
// OSA would give: 3
89+
assertEquals(2, DamerauLevenshteinDistance.distance("CA", "ABC"));
90+
}
91+
92+
@Test
93+
@DisplayName("Should handle non-adjacent transpositions")
94+
void testNonAdjacentTransposition() {
95+
assertEquals(2, DamerauLevenshteinDistance.distance("abc", "cba"));
96+
assertEquals(3, DamerauLevenshteinDistance.distance("abcd", "dcba"));
97+
}
98+
99+
@Test
100+
@DisplayName("Should handle multiple operations")
101+
void testMultipleOperations() {
102+
assertEquals(3, DamerauLevenshteinDistance.distance("kitten", "sitting"));
103+
assertEquals(3, DamerauLevenshteinDistance.distance("saturday", "sunday"));
104+
assertEquals(5, DamerauLevenshteinDistance.distance("intention", "execution"));
105+
}
106+
107+
@Test
108+
@DisplayName("Should handle completely different strings")
109+
void testCompletelyDifferentStrings() {
110+
assertEquals(3, DamerauLevenshteinDistance.distance("abc", "xyz"));
111+
assertEquals(4, DamerauLevenshteinDistance.distance("hello", "world"));
112+
}
113+
114+
@Test
115+
@DisplayName("Should handle strings with repeated characters")
116+
void testRepeatedCharacters() {
117+
assertEquals(0, DamerauLevenshteinDistance.distance("aaa", "aaa"));
118+
assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aab"));
119+
assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aba"));
120+
}
121+
122+
@Test
123+
@DisplayName("Should be symmetric")
124+
void testSymmetry() {
125+
assertEquals(DamerauLevenshteinDistance.distance("abc", "def"), DamerauLevenshteinDistance.distance("def", "abc"));
126+
assertEquals(DamerauLevenshteinDistance.distance("hello", "world"), DamerauLevenshteinDistance.distance("world", "hello"));
127+
}
128+
129+
@Test
130+
@DisplayName("Should handle case sensitivity")
131+
void testCaseSensitivity() {
132+
assertEquals(1, DamerauLevenshteinDistance.distance("Hello", "hello"));
133+
assertEquals(5, DamerauLevenshteinDistance.distance("HELLO", "hello"));
134+
}
135+
136+
@Test
137+
@DisplayName("Should handle single character strings")
138+
void testSingleCharacterStrings() {
139+
assertEquals(1, DamerauLevenshteinDistance.distance("a", "b"));
140+
assertEquals(0, DamerauLevenshteinDistance.distance("a", "a"));
141+
assertEquals(2, DamerauLevenshteinDistance.distance("a", "abc"));
142+
}
143+
144+
@Test
145+
@DisplayName("Should handle long strings efficiently")
146+
void testLongStrings() {
147+
String s1 = "abcdefghijklmnopqrstuvwxyz";
148+
String s2 = "abcdefghijklmnopqrstuvwxyz";
149+
assertEquals(0, DamerauLevenshteinDistance.distance(s1, s2));
150+
151+
String s3 = "abcdefghijklmnopqrstuvwxyz";
152+
String s4 = "zyxwvutsrqponmlkjihgfedcba";
153+
assertEquals(25, DamerauLevenshteinDistance.distance(s3, s4));
154+
}
155+
156+
@Test
157+
@DisplayName("Should satisfy triangle inequality")
158+
void testTriangleInequality() {
159+
// d(a,c) <= d(a,b) + d(b,c)
160+
String a = "cat";
161+
String b = "hat";
162+
String c = "rat";
163+
164+
int ab = DamerauLevenshteinDistance.distance(a, b);
165+
int bc = DamerauLevenshteinDistance.distance(b, c);
166+
int ac = DamerauLevenshteinDistance.distance(a, c);
167+
168+
assertTrue(ac <= ab + bc);
169+
}
170+
171+
@Test
172+
@DisplayName("Should handle special characters")
173+
void testSpecialCharacters() {
174+
assertEquals(0, DamerauLevenshteinDistance.distance("hello!", "hello!"));
175+
assertEquals(1, DamerauLevenshteinDistance.distance("hello!", "hello?"));
176+
assertEquals(1, DamerauLevenshteinDistance.distance("a@b", "a#b"));
177+
}
178+
179+
@Test
180+
@DisplayName("Should handle numeric strings")
181+
void testNumericStrings() {
182+
assertEquals(1, DamerauLevenshteinDistance.distance("123", "124"));
183+
assertEquals(1, DamerauLevenshteinDistance.distance("123", "213"));
184+
assertEquals(0, DamerauLevenshteinDistance.distance("999", "999"));
185+
}
186+
187+
@Test
188+
@DisplayName("Should handle unicode characters")
189+
void testUnicodeCharacters() {
190+
assertEquals(0, DamerauLevenshteinDistance.distance("café", "café"));
191+
assertEquals(1, DamerauLevenshteinDistance.distance("café", "cafe"));
192+
assertEquals(0, DamerauLevenshteinDistance.distance("你好", "你好"));
193+
}
194+
}

0 commit comments

Comments
 (0)