|
| 1 | +package com.thealgorithms.dynamicprogramming; |
| 2 | + |
| 3 | +import java.util.HashMap; |
| 4 | +import java.util.Map; |
| 5 | + |
| 6 | +/** |
| 7 | + * Implementation of the full Damerau–Levenshtein distance algorithm. |
| 8 | + * |
| 9 | + * This algorithm calculates the minimum number of operations required |
| 10 | + * to transform one string into another. Supported operations are: |
| 11 | + * insertion, deletion, substitution, and transposition of adjacent characters. |
| 12 | + * |
| 13 | + * Unlike the restricted version (OSA), this implementation allows multiple |
| 14 | + * edits on the same substring, computing the true edit distance. |
| 15 | + * |
| 16 | + * Time Complexity: O(n * m * max(n, m)) |
| 17 | + * Space Complexity: O(n * m) |
| 18 | + */ |
| 19 | +public final class DamerauLevenshteinDistance { |
| 20 | + |
| 21 | + private DamerauLevenshteinDistance() { |
| 22 | + // Utility class |
| 23 | + } |
| 24 | + |
| 25 | + /** |
| 26 | + * Computes the full Damerau–Levenshtein distance between two strings. |
| 27 | + * |
| 28 | + * @param s1 the first string |
| 29 | + * @param s2 the second string |
| 30 | + * @return the minimum edit distance between the two strings |
| 31 | + * @throws IllegalArgumentException if either input string is null |
| 32 | + */ |
| 33 | + public static int distance(String s1, String s2) { |
| 34 | + validateInputs(s1, s2); |
| 35 | + |
| 36 | + int n = s1.length(); |
| 37 | + int m = s2.length(); |
| 38 | + |
| 39 | + Map<Character, Integer> charLastPosition = buildCharacterMap(s1, s2); |
| 40 | + int[][] dp = initializeTable(n, m); |
| 41 | + |
| 42 | + fillTable(s1, s2, dp, charLastPosition); |
| 43 | + |
| 44 | + return dp[n + 1][m + 1]; |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * Validates that both input strings are not null. |
| 49 | + * |
| 50 | + * @param s1 the first string to validate |
| 51 | + * @param s2 the second string to validate |
| 52 | + * @throws IllegalArgumentException if either string is null |
| 53 | + */ |
| 54 | + private static void validateInputs(String s1, String s2) { |
| 55 | + if (s1 == null || s2 == null) { |
| 56 | + throw new IllegalArgumentException("Input strings must not be null."); |
| 57 | + } |
| 58 | + } |
| 59 | + |
| 60 | + /** |
| 61 | + * Builds a character map containing all unique characters from both strings. |
| 62 | + * Each character is initialized with a position value of 0. |
| 63 | + * |
| 64 | + * This map is used to track the last occurrence position of each character |
| 65 | + * during the distance computation, which is essential for handling transpositions. |
| 66 | + * |
| 67 | + * @param s1 the first string |
| 68 | + * @param s2 the second string |
| 69 | + * @return a map containing all unique characters from both strings, initialized to 0 |
| 70 | + */ |
| 71 | + private static Map<Character, Integer> buildCharacterMap(String s1, String s2) { |
| 72 | + Map<Character, Integer> charMap = new HashMap<>(); |
| 73 | + for (char c : s1.toCharArray()) { |
| 74 | + charMap.putIfAbsent(c, 0); |
| 75 | + } |
| 76 | + for (char c : s2.toCharArray()) { |
| 77 | + charMap.putIfAbsent(c, 0); |
| 78 | + } |
| 79 | + return charMap; |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * Initializes the dynamic programming table for the algorithm. |
| 84 | + * |
| 85 | + * The table has dimensions (n+2) x (m+2) where n and m are the lengths |
| 86 | + * of the input strings. The extra rows and columns are used to handle |
| 87 | + * the transposition operation correctly. |
| 88 | + * |
| 89 | + * The first row and column are initialized with the maximum possible distance, |
| 90 | + * while the second row and column represent the base case of transforming |
| 91 | + * from an empty string. |
| 92 | + * |
| 93 | + * @param n the length of the first string |
| 94 | + * @param m the length of the second string |
| 95 | + * @return an initialized DP table ready for computation |
| 96 | + */ |
| 97 | + private static int[][] initializeTable(int n, int m) { |
| 98 | + int maxDist = n + m; |
| 99 | + int[][] dp = new int[n + 2][m + 2]; |
| 100 | + |
| 101 | + dp[0][0] = maxDist; |
| 102 | + |
| 103 | + for (int i = 0; i <= n; i++) { |
| 104 | + dp[i + 1][0] = maxDist; |
| 105 | + dp[i + 1][1] = i; |
| 106 | + } |
| 107 | + |
| 108 | + for (int j = 0; j <= m; j++) { |
| 109 | + dp[0][j + 1] = maxDist; |
| 110 | + dp[1][j + 1] = j; |
| 111 | + } |
| 112 | + |
| 113 | + return dp; |
| 114 | + } |
| 115 | + |
| 116 | + /** |
| 117 | + * Fills the dynamic programming table by computing the minimum edit distance |
| 118 | + * for each substring pair. |
| 119 | + * |
| 120 | + * This method implements the core algorithm logic, iterating through both strings |
| 121 | + * and computing the minimum cost of transforming substrings. It considers all |
| 122 | + * four operations: insertion, deletion, substitution, and transposition. |
| 123 | + * |
| 124 | + * The character position map is updated as we progress through the first string |
| 125 | + * to enable efficient transposition cost calculation. |
| 126 | + * |
| 127 | + * @param s1 the first string |
| 128 | + * @param s2 the second string |
| 129 | + * @param dp the dynamic programming table to fill |
| 130 | + * @param charLastPosition map tracking the last position of each character in s1 |
| 131 | + */ |
| 132 | + private static void fillTable(String s1, String s2, int[][] dp, Map<Character, Integer> charLastPosition) { |
| 133 | + int n = s1.length(); |
| 134 | + int m = s2.length(); |
| 135 | + |
| 136 | + for (int i = 1; i <= n; i++) { |
| 137 | + int lastMatchCol = 0; |
| 138 | + |
| 139 | + for (int j = 1; j <= m; j++) { |
| 140 | + char char1 = s1.charAt(i - 1); |
| 141 | + char char2 = s2.charAt(j - 1); |
| 142 | + |
| 143 | + int lastMatchRow = charLastPosition.get(char2); |
| 144 | + int cost = (char1 == char2) ? 0 : 1; |
| 145 | + |
| 146 | + if (char1 == char2) { |
| 147 | + lastMatchCol = j; |
| 148 | + } |
| 149 | + |
| 150 | + dp[i + 1][j + 1] = computeMinimumCost(dp, i, j, lastMatchRow, lastMatchCol, cost); |
| 151 | + } |
| 152 | + |
| 153 | + charLastPosition.put(s1.charAt(i - 1), i); |
| 154 | + } |
| 155 | + } |
| 156 | + |
| 157 | + /** |
| 158 | + * Computes the minimum cost among all possible operations at the current position. |
| 159 | + * |
| 160 | + * This method evaluates four possible operations: |
| 161 | + * 1. Substitution: replace character at position i with character at position j |
| 162 | + * 2. Insertion: insert character from s2 at position j |
| 163 | + * 3. Deletion: delete character from s1 at position i |
| 164 | + * 4. Transposition: swap characters that have been seen before |
| 165 | + * |
| 166 | + * The transposition cost accounts for the gap between the current position |
| 167 | + * and the last position where matching characters were found. |
| 168 | + * |
| 169 | + * @param dp the dynamic programming table |
| 170 | + * @param i the current position in the first string (1-indexed in the DP table) |
| 171 | + * @param j the current position in the second string (1-indexed in the DP table) |
| 172 | + * @param lastMatchRow the row index where the current character of s2 last appeared in s1 |
| 173 | + * @param lastMatchCol the column index where the current character of s1 last matched in s2 |
| 174 | + * @param cost the substitution cost (0 if characters match, 1 otherwise) |
| 175 | + * @return the minimum cost among all operations |
| 176 | + */ |
| 177 | + private static int computeMinimumCost(int[][] dp, int i, int j, int lastMatchRow, int lastMatchCol, int cost) { |
| 178 | + int substitution = dp[i][j] + cost; |
| 179 | + int insertion = dp[i + 1][j] + 1; |
| 180 | + int deletion = dp[i][j + 1] + 1; |
| 181 | + int transposition = dp[lastMatchRow][lastMatchCol] + i - lastMatchRow - 1 + 1 + j - lastMatchCol - 1; |
| 182 | + |
| 183 | + return Math.min(Math.min(substitution, insertion), Math.min(deletion, transposition)); |
| 184 | + } |
| 185 | +} |
0 commit comments