diff --git a/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java new file mode 100644 index 000000000..32716d496 --- /dev/null +++ b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java @@ -0,0 +1,125 @@ +package com.williamfiset.algorithms.strings; + +import java.util.*; + +/** + * Aho–Corasick algorithm + * + *

Multi-pattern string matching in O(n + m + z): n = text length, m = total pattern length, z = + * number of matches. + * + *

Typical use cases: spam filters, IDS, virus scanning, keyword detection. + */ +public class AhoCorasick { + + private final Node root = new Node(); + private boolean built = false; + + // Example usage + public static void main(String[] args) { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); + + ac.build(); + + String text = "ushers"; + List matches = ac.search(text); + + for (Match m : matches) { + System.out.println( + "Match \"" + text.substring(m.index, m.index + m.length) + "\" at index " + m.index); + } + } + + /** Insert a pattern (call build() once all patterns are added). */ + public void addPattern(String word) { + built = false; + Node node = root; + for (char c : word.toCharArray()) { + Node next = node.children.get(c); + if (next == null) { + next = new Node(); + node.children.put(c, next); + } + node = next; + } + node.output.add(word); + } + + /** Build failure links (BFS). Must be called before search(). */ + public void build() { + Queue q = new ArrayDeque(); + + // Level 1 → fail points to root + for (Node child : root.children.values()) { + child.fail = root; + q.add(child); + } + + while (!q.isEmpty()) { + Node node = q.remove(); + + for (Map.Entry entry : node.children.entrySet()) { + char c = entry.getKey(); + Node nxt = entry.getValue(); + q.add(nxt); + + Node f = node.fail; + while (f != null && !f.children.containsKey(c)) { + f = f.fail; + } + + nxt.fail = (f == null) ? root : f.children.get(c); + nxt.output.addAll(nxt.fail.output); // inherit matches + } + } + + built = true; + } + + /** Search text and return a list of matches (index, length). */ + public List search(String text) { + if (!built) throw new IllegalStateException("Call build() before search()."); + + List results = new ArrayList(); + Node node = root; + + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + while (node != null && !node.children.containsKey(c)) { + node = node.fail; + } + + if (node == null) node = root; + else node = node.children.get(c); + + for (String w : node.output) { + results.add(new Match(i - w.length() + 1, w.length())); + } + } + + return results; + } + + // Match result + public static class Match { + public final int index; + public final int length; + + Match(int index, int length) { + this.index = index; + this.length = length; + } + } + + // Trie node + private static class Node { + Map children = new HashMap<>(); + Node fail; + List output = new ArrayList<>(); + } +} diff --git a/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java new file mode 100644 index 000000000..f656cecb5 --- /dev/null +++ b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java @@ -0,0 +1,69 @@ +package com.williamfiset.algorithms.strings; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; +import org.junit.jupiter.api.Test; + +public class AhoCorasickTest { + + @Test + public void basicMatching() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); + ac.build(); + + List matches = ac.search("ushers"); + + // Expected: "she", "he", "hers" + assertEquals(3, matches.size()); + + assertEquals(1, matches.get(0).index); + assertEquals(3, matches.get(0).length); + + assertEquals(2, matches.get(1).index); + assertEquals(2, matches.get(1).length); + + assertEquals(2, matches.get(2).index); + assertEquals(4, matches.get(2).length); + } + + @Test + public void overlappingPatterns() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("aba"); + ac.addPattern("ba"); + ac.build(); + + List matches = ac.search("ababa"); + + assertEquals(4, matches.size()); + + assertEquals(0, matches.get(0).index); // "aba" + assertEquals(1, matches.get(1).index); // "ba" + assertEquals(2, matches.get(2).index); // "aba" + assertEquals(3, matches.get(3).index); // "ba" + } + + @Test + public void noMatches() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("cat"); + ac.addPattern("dog"); + ac.build(); + + List matches = ac.search("aaaaa"); + assertTrue(matches.isEmpty()); + } + + @Test + public void searchWithoutBuildThrows() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("abc"); + + assertThrows(IllegalStateException.class, () -> ac.search("abc")); + } +}