Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
package com.thealgorithms.dynamicprogramming;

import java.util.HashMap;
import java.util.Map;

/**
* Implementation of the full Damerau–Levenshtein distance algorithm.
*
* This algorithm calculates the minimum number of operations required
* to transform one string into another. Supported operations are:
* insertion, deletion, substitution, and transposition of adjacent characters.
*
* Unlike the restricted version (OSA), this implementation allows multiple
* edits on the same substring, computing the true edit distance.
*
* Time Complexity: O(n * m * max(n, m))
* Space Complexity: O(n * m)
*/
public final class DamerauLevenshteinDistance {

private DamerauLevenshteinDistance() {
// Utility class
}

/**
* Computes the full Damerau–Levenshtein distance between two strings.
*
* @param s1 the first string
* @param s2 the second string
* @return the minimum edit distance between the two strings
* @throws IllegalArgumentException if either input string is null
*/
public static int distance(String s1, String s2) {
validateInputs(s1, s2);

int n = s1.length();
int m = s2.length();

Map<Character, Integer> charLastPosition = buildCharacterMap(s1, s2);
int[][] dp = initializeTable(n, m);

fillTable(s1, s2, dp, charLastPosition);

return dp[n + 1][m + 1];
}

/**
* Validates that both input strings are not null.
*
* @param s1 the first string to validate
* @param s2 the second string to validate
* @throws IllegalArgumentException if either string is null
*/
private static void validateInputs(String s1, String s2) {
if (s1 == null || s2 == null) {
throw new IllegalArgumentException("Input strings must not be null.");
}
}

/**
* Builds a character map containing all unique characters from both strings.
* Each character is initialized with a position value of 0.
*
* This map is used to track the last occurrence position of each character
* during the distance computation, which is essential for handling transpositions.
*
* @param s1 the first string
* @param s2 the second string
* @return a map containing all unique characters from both strings, initialized to 0
*/
private static Map<Character, Integer> buildCharacterMap(String s1, String s2) {
Map<Character, Integer> charMap = new HashMap<>();
for (char c : s1.toCharArray()) {
charMap.putIfAbsent(c, 0);
}
for (char c : s2.toCharArray()) {
charMap.putIfAbsent(c, 0);
}
return charMap;
}

/**
* Initializes the dynamic programming table for the algorithm.
*
* The table has dimensions (n+2) x (m+2) where n and m are the lengths
* of the input strings. The extra rows and columns are used to handle
* the transposition operation correctly.
*
* The first row and column are initialized with the maximum possible distance,
* while the second row and column represent the base case of transforming
* from an empty string.
*
* @param n the length of the first string
* @param m the length of the second string
* @return an initialized DP table ready for computation
*/
private static int[][] initializeTable(int n, int m) {
int maxDist = n + m;
int[][] dp = new int[n + 2][m + 2];

dp[0][0] = maxDist;

for (int i = 0; i <= n; i++) {
dp[i + 1][0] = maxDist;
dp[i + 1][1] = i;
}

for (int j = 0; j <= m; j++) {
dp[0][j + 1] = maxDist;
dp[1][j + 1] = j;
}

return dp;
}

/**
* Fills the dynamic programming table by computing the minimum edit distance
* for each substring pair.
*
* This method implements the core algorithm logic, iterating through both strings
* and computing the minimum cost of transforming substrings. It considers all
* four operations: insertion, deletion, substitution, and transposition.
*
* The character position map is updated as we progress through the first string
* to enable efficient transposition cost calculation.
*
* @param s1 the first string
* @param s2 the second string
* @param dp the dynamic programming table to fill
* @param charLastPosition map tracking the last position of each character in s1
*/
private static void fillTable(String s1, String s2, int[][] dp, Map<Character, Integer> charLastPosition) {
int n = s1.length();
int m = s2.length();

for (int i = 1; i <= n; i++) {
int lastMatchCol = 0;

for (int j = 1; j <= m; j++) {
char char1 = s1.charAt(i - 1);
char char2 = s2.charAt(j - 1);

int lastMatchRow = charLastPosition.get(char2);
int cost = (char1 == char2) ? 0 : 1;

if (char1 == char2) {
lastMatchCol = j;
}

dp[i + 1][j + 1] = computeMinimumCost(dp, i, j, lastMatchRow, lastMatchCol, cost);
}

charLastPosition.put(s1.charAt(i - 1), i);
}
}

/**
* Computes the minimum cost among all possible operations at the current position.
*
* This method evaluates four possible operations:
* 1. Substitution: replace character at position i with character at position j
* 2. Insertion: insert character from s2 at position j
* 3. Deletion: delete character from s1 at position i
* 4. Transposition: swap characters that have been seen before
*
* The transposition cost accounts for the gap between the current position
* and the last position where matching characters were found.
*
* @param dp the dynamic programming table
* @param i the current position in the first string (1-indexed in the DP table)
* @param j the current position in the second string (1-indexed in the DP table)
* @param lastMatchRow the row index where the current character of s2 last appeared in s1
* @param lastMatchCol the column index where the current character of s1 last matched in s2
* @param cost the substitution cost (0 if characters match, 1 otherwise)
* @return the minimum cost among all operations
*/
private static int computeMinimumCost(int[][] dp, int i, int j, int lastMatchRow, int lastMatchCol, int cost) {
int substitution = dp[i][j] + cost;
int insertion = dp[i + 1][j] + 1;
int deletion = dp[i][j + 1] + 1;
int transposition = dp[lastMatchRow][lastMatchCol] + i - lastMatchRow - 1 + 1 + j - lastMatchCol - 1;

return Math.min(Math.min(substitution, insertion), Math.min(deletion, transposition));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package com.thealgorithms.dynamicprogramming;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;

/**
* Unit tests for the {@code DamerauLevenshteinDistance} class.
* Tests cover edge cases, basic operations, and complex transposition scenarios.
*/
class DamerauLevenshteinDistanceTest {

@Test
@DisplayName("Should throw exception for null first string")
void testNullFirstString() {
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, "test"); });
}

@Test
@DisplayName("Should throw exception for null second string")
void testNullSecondString() {
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance("test", null); });
}

@Test
@DisplayName("Should throw exception for both null strings")
void testBothNullStrings() {
assertThrows(IllegalArgumentException.class, () -> { DamerauLevenshteinDistance.distance(null, null); });
}

@Test
@DisplayName("Should return 0 for identical strings")
void testIdenticalStrings() {
assertEquals(0, DamerauLevenshteinDistance.distance("", ""));
assertEquals(0, DamerauLevenshteinDistance.distance("a", "a"));
assertEquals(0, DamerauLevenshteinDistance.distance("abc", "abc"));
assertEquals(0, DamerauLevenshteinDistance.distance("hello", "hello"));
}

@Test
@DisplayName("Should return length when one string is empty")
void testEmptyStrings() {
assertEquals(3, DamerauLevenshteinDistance.distance("", "abc"));
assertEquals(5, DamerauLevenshteinDistance.distance("hello", ""));
assertEquals(0, DamerauLevenshteinDistance.distance("", ""));
}

@Test
@DisplayName("Should handle single character insertions")
void testSingleInsertion() {
assertEquals(1, DamerauLevenshteinDistance.distance("cat", "cats"));
assertEquals(1, DamerauLevenshteinDistance.distance("ab", "abc"));
assertEquals(1, DamerauLevenshteinDistance.distance("", "a"));
}

@Test
@DisplayName("Should handle single character deletions")
void testSingleDeletion() {
assertEquals(1, DamerauLevenshteinDistance.distance("cats", "cat"));
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "ab"));
assertEquals(1, DamerauLevenshteinDistance.distance("a", ""));
}

@Test
@DisplayName("Should handle single character substitutions")
void testSingleSubstitution() {
assertEquals(1, DamerauLevenshteinDistance.distance("cat", "bat"));
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "adc"));
assertEquals(1, DamerauLevenshteinDistance.distance("x", "y"));
}

@Test
@DisplayName("Should handle adjacent character transpositions")
void testAdjacentTransposition() {
assertEquals(1, DamerauLevenshteinDistance.distance("ab", "ba"));
assertEquals(1, DamerauLevenshteinDistance.distance("abc", "bac"));
assertEquals(1, DamerauLevenshteinDistance.distance("hello", "ehllo"));
}

@Test
@DisplayName("Should correctly compute distance for CA to ABC")
void testCAtoABC() {
// This is the critical test case that differentiates full DL from OSA
// Full DL: 2 (insert A at start, insert B in middle)
// OSA would give: 3
assertEquals(2, DamerauLevenshteinDistance.distance("CA", "ABC"));
}

@Test
@DisplayName("Should handle non-adjacent transpositions")
void testNonAdjacentTransposition() {
assertEquals(2, DamerauLevenshteinDistance.distance("abc", "cba"));
assertEquals(3, DamerauLevenshteinDistance.distance("abcd", "dcba"));
}

@Test
@DisplayName("Should handle multiple operations")
void testMultipleOperations() {
assertEquals(3, DamerauLevenshteinDistance.distance("kitten", "sitting"));
assertEquals(3, DamerauLevenshteinDistance.distance("saturday", "sunday"));
assertEquals(5, DamerauLevenshteinDistance.distance("intention", "execution"));
}

@Test
@DisplayName("Should handle completely different strings")
void testCompletelyDifferentStrings() {
assertEquals(3, DamerauLevenshteinDistance.distance("abc", "xyz"));
assertEquals(4, DamerauLevenshteinDistance.distance("hello", "world"));
}

@Test
@DisplayName("Should handle strings with repeated characters")
void testRepeatedCharacters() {
assertEquals(0, DamerauLevenshteinDistance.distance("aaa", "aaa"));
assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aab"));
assertEquals(1, DamerauLevenshteinDistance.distance("aaa", "aba"));
}

@Test
@DisplayName("Should be symmetric")
void testSymmetry() {
assertEquals(DamerauLevenshteinDistance.distance("abc", "def"), DamerauLevenshteinDistance.distance("def", "abc"));
assertEquals(DamerauLevenshteinDistance.distance("hello", "world"), DamerauLevenshteinDistance.distance("world", "hello"));
}

@Test
@DisplayName("Should handle case sensitivity")
void testCaseSensitivity() {
assertEquals(1, DamerauLevenshteinDistance.distance("Hello", "hello"));
assertEquals(5, DamerauLevenshteinDistance.distance("HELLO", "hello"));
}

@Test
@DisplayName("Should handle single character strings")
void testSingleCharacterStrings() {
assertEquals(1, DamerauLevenshteinDistance.distance("a", "b"));
assertEquals(0, DamerauLevenshteinDistance.distance("a", "a"));
assertEquals(2, DamerauLevenshteinDistance.distance("a", "abc"));
}

@Test
@DisplayName("Should handle long strings efficiently")
void testLongStrings() {
String s1 = "abcdefghijklmnopqrstuvwxyz";
String s2 = "abcdefghijklmnopqrstuvwxyz";
assertEquals(0, DamerauLevenshteinDistance.distance(s1, s2));

String s3 = "abcdefghijklmnopqrstuvwxyz";
String s4 = "zyxwvutsrqponmlkjihgfedcba";
assertEquals(25, DamerauLevenshteinDistance.distance(s3, s4));
}

@Test
@DisplayName("Should satisfy triangle inequality")
void testTriangleInequality() {
// d(a,c) <= d(a,b) + d(b,c)
String a = "cat";
String b = "hat";
String c = "rat";

int ab = DamerauLevenshteinDistance.distance(a, b);
int bc = DamerauLevenshteinDistance.distance(b, c);
int ac = DamerauLevenshteinDistance.distance(a, c);

assertTrue(ac <= ab + bc);
}

@Test
@DisplayName("Should handle special characters")
void testSpecialCharacters() {
assertEquals(0, DamerauLevenshteinDistance.distance("hello!", "hello!"));
assertEquals(1, DamerauLevenshteinDistance.distance("hello!", "hello?"));
assertEquals(1, DamerauLevenshteinDistance.distance("a@b", "a#b"));
}

@Test
@DisplayName("Should handle numeric strings")
void testNumericStrings() {
assertEquals(1, DamerauLevenshteinDistance.distance("123", "124"));
assertEquals(1, DamerauLevenshteinDistance.distance("123", "213"));
assertEquals(0, DamerauLevenshteinDistance.distance("999", "999"));
}

@Test
@DisplayName("Should handle unicode characters")
void testUnicodeCharacters() {
assertEquals(0, DamerauLevenshteinDistance.distance("café", "café"));
assertEquals(1, DamerauLevenshteinDistance.distance("café", "cafe"));
assertEquals(0, DamerauLevenshteinDistance.distance("你好", "你好"));
}
}