Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package com.williamfiset.algorithms.strings;

import java.util.*;

/**
* Aho–Corasick algorithm
*
* <p>Multi-pattern string matching in O(n + m + z): n = text length, m = total pattern length, z =
* number of matches.
*
* <p>Typical use cases: spam filters, IDS, virus scanning, keyword detection.
*/
public class AhoCorasick {

private final Node root = new Node();
private boolean built = false;

// Example usage
public static void main(String[] args) {
AhoCorasick ac = new AhoCorasick();
ac.addPattern("he");
ac.addPattern("she");
ac.addPattern("his");
ac.addPattern("hers");

ac.build();

String text = "ushers";
List<Match> matches = ac.search(text);

for (Match m : matches) {
System.out.println(
"Match \"" + text.substring(m.index, m.index + m.length) + "\" at index " + m.index);
}
}

/** Insert a pattern (call build() once all patterns are added). */
public void addPattern(String word) {
built = false;
Node node = root;
for (char c : word.toCharArray()) {
Node next = node.children.get(c);
if (next == null) {
next = new Node();
node.children.put(c, next);
}
node = next;
}
node.output.add(word);
}

/** Build failure links (BFS). Must be called before search(). */
public void build() {
Queue<Node> q = new ArrayDeque<Node>();

// Level 1 → fail points to root
for (Node child : root.children.values()) {
child.fail = root;
q.add(child);
}

while (!q.isEmpty()) {
Node node = q.remove();

for (Map.Entry<Character, Node> entry : node.children.entrySet()) {
char c = entry.getKey();
Node nxt = entry.getValue();
q.add(nxt);

Node f = node.fail;
while (f != null && !f.children.containsKey(c)) {
f = f.fail;
}

nxt.fail = (f == null) ? root : f.children.get(c);
nxt.output.addAll(nxt.fail.output); // inherit matches
}
}

built = true;
}

/** Search text and return a list of matches (index, length). */
public List<Match> search(String text) {
if (!built) throw new IllegalStateException("Call build() before search().");

List<Match> results = new ArrayList<Match>();
Node node = root;

for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);

while (node != null && !node.children.containsKey(c)) {
node = node.fail;
}

if (node == null) node = root;
else node = node.children.get(c);

for (String w : node.output) {
results.add(new Match(i - w.length() + 1, w.length()));
}
}

return results;
}

// Match result
public static class Match {
public final int index;
public final int length;

Match(int index, int length) {
this.index = index;
this.length = length;
}
}

// Trie node
private static class Node {
Map<Character, Node> children = new HashMap<>();
Node fail;
List<String> output = new ArrayList<>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package com.williamfiset.algorithms.strings;

import static org.junit.jupiter.api.Assertions.*;

import java.util.List;
import org.junit.jupiter.api.Test;

public class AhoCorasickTest {

@Test
public void basicMatching() {
AhoCorasick ac = new AhoCorasick();
ac.addPattern("he");
ac.addPattern("she");
ac.addPattern("his");
ac.addPattern("hers");
ac.build();

List<AhoCorasick.Match> matches = ac.search("ushers");

// Expected: "she", "he", "hers"
assertEquals(3, matches.size());

assertEquals(1, matches.get(0).index);
assertEquals(3, matches.get(0).length);

assertEquals(2, matches.get(1).index);
assertEquals(2, matches.get(1).length);

assertEquals(2, matches.get(2).index);
assertEquals(4, matches.get(2).length);
}

@Test
public void overlappingPatterns() {
AhoCorasick ac = new AhoCorasick();
ac.addPattern("aba");
ac.addPattern("ba");
ac.build();

List<AhoCorasick.Match> matches = ac.search("ababa");

assertEquals(4, matches.size());

assertEquals(0, matches.get(0).index); // "aba"
assertEquals(1, matches.get(1).index); // "ba"
assertEquals(2, matches.get(2).index); // "aba"
assertEquals(3, matches.get(3).index); // "ba"
}

@Test
public void noMatches() {
AhoCorasick ac = new AhoCorasick();
ac.addPattern("cat");
ac.addPattern("dog");
ac.build();

List<AhoCorasick.Match> matches = ac.search("aaaaa");
assertTrue(matches.isEmpty());
}

@Test
public void searchWithoutBuildThrows() {
AhoCorasick ac = new AhoCorasick();
ac.addPattern("abc");

assertThrows(IllegalStateException.class, () -> ac.search("abc"));
}
}
Loading