diff --git a/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java b/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java new file mode 100644 index 000000000000..bc19884988f3 --- /dev/null +++ b/src/main/java/com/thealgorithms/strings/KnuthMorrisPratt.java @@ -0,0 +1,88 @@ +package com.thealgorithms.strings; + +import java.util.ArrayList; +import java.util.List; + +/** + * Knuth-Morris-Pratt (KMP) algorithm for substring search. + * This algorithm searches for occurrences of a "pattern" within a main "text" string + * by employing the observation that when a mismatch occurs, the pattern itself + * embodies sufficient information to determine where the next match could begin, + * thus bypassing re-examination of previously matched characters. + *

+ * Time Complexity: O(n + m), where n is the length of the text and m is the length of the pattern. + * Space Complexity: O(m) for the longest prefix-suffix (LPS) array. + *

+ * Wikipedia: https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm + * + * @author Milad Sadeghi + */ +public final class KnuthMorrisPratt { + + // Private constructor to prevent instantiation + private KnuthMorrisPratt() { + } + + /** + * Searches for occurrences of a pattern within a text using the Knuth-Morris-Pratt algorithm. + * + * @param text the text to search within + * @param pattern the pattern to search for + * @return a list of starting indices where the pattern is found in the text + */ + public static List search(String text, String pattern) { + int n = text.length(); + int m = pattern.length(); + int[] lps = computeLPSArray(pattern); + int i = 0; // index for sample + int j = 0; // index for pattern + List result = new ArrayList<>(); + + while (i < n) { + if (pattern.charAt(j) == text.charAt(i)) { + i++; + j++; + } + if (j == m) { + System.out.println("Found pattern at index " + (i - j)); + result.add(i - j); + j = lps[j - 1]; + } else if (i < n && pattern.charAt(j) != text.charAt(i)) { + if (j != 0) { + j = lps[j - 1]; + } else { + i++; + } + } + } + return result; + } + + /** + * Preprocesses the pattern to create the longest prefix-suffix (LPS) array. + * + * @param pattern the pattern to preprocess + * @return the LPS array + */ + private static int[] computeLPSArray(String pattern) { + int m = pattern.length(); + int[] lps = new int[m]; + int j = 0; + int i = 1; + while (i < m) { + if (pattern.charAt(i) == pattern.charAt(j)) { + j++; + lps[i] = j; + i++; + } else { + if (j != 0) { + j = lps[j - 1]; + } else { + lps[i] = 0; + i++; + } + } + } + return lps; + } +} diff --git a/src/test/java/com/thealgorithms/strings/KnuthMorrisPrattTest.java b/src/test/java/com/thealgorithms/strings/KnuthMorrisPrattTest.java new file mode 100644 index 000000000000..795090eda385 --- /dev/null +++ b/src/test/java/com/thealgorithms/strings/KnuthMorrisPrattTest.java @@ -0,0 +1,25 @@ +package com.thealgorithms.strings; + +import static org.junit.jupiter.api.Assertions.assertIterableEquals; + +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +// Test class for Knuth-Morris-Pratt algorithm +class KnuthMorrisPrattTest { + + // Method to provide test data + private static Stream provideTestData() { + return Stream.of(new Object[] {"ababacababaad", "ababa", List.of(0, 6)}, new Object[] {"hello world", "world", List.of(6)}, new Object[] {"aaaaa", "b", List.of()}, new Object[] {"BBCDEFG", "AAB", List.of()}, new Object[] {"ABABDABACD", "ABABC", List.of()}); + } + + // Parameterized test method for the search function + @ParameterizedTest(name = "{0} and {1} should return {2}") + @MethodSource("provideTestData") + void searchTest(String test, String expected, Iterable expectedResult) { + List result = KnuthMorrisPratt.search(test, expected); + assertIterableEquals(result, expectedResult); + } +}