Skip to content

Commit f68c7b9

Browse files
authored
Merge pull request #138 from halafi/master
fix naive pattern searching + add more string matching algorithms
2 parents 0d28a32 + 318c44d commit f68c7b9

File tree

11 files changed

+1393
-29
lines changed

11 files changed

+1393
-29
lines changed
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"io/ioutil"
6+
"log"
7+
"strings"
8+
"time"
9+
)
10+
11+
// User defined.
12+
// Set to true to print various extra stuff out (slows down the execution)
13+
// Set to false for quick and quiet execution.
14+
const debugMode bool = true
15+
16+
// Implementation of Basic Aho-Corasick algorithm (Prefix based).
17+
// Searches for a set of strings (patterns.txt) in text.txt.
18+
func main() {
19+
patFile, err := ioutil.ReadFile("patterns.txt")
20+
if err != nil {
21+
log.Fatal(err)
22+
}
23+
textFile, err := ioutil.ReadFile("text.txt")
24+
if err != nil {
25+
log.Fatal(err)
26+
}
27+
patterns := strings.Split(string(patFile), " ")
28+
fmt.Printf("\nRunning: Basic Aho-Corasick algorithm.\n\n")
29+
if debugMode == true {
30+
fmt.Printf("Searching for %d patterns/words:\n", len(patterns))
31+
}
32+
for i := 0; i < len(patterns); i++ {
33+
if len(patterns[i]) > len(textFile) {
34+
log.Fatal("There is a pattern that is longer than text! Pattern number:", i+1)
35+
}
36+
if debugMode == true {
37+
fmt.Printf("%q ", patterns[i])
38+
}
39+
}
40+
if debugMode == true {
41+
fmt.Printf("\n\nIn text (%d chars long): \n%q\n\n", len(textFile), textFile)
42+
}
43+
ahoCorasick(string(textFile), patterns)
44+
}
45+
46+
// Function performing the Basic Aho-Corasick alghoritm.
47+
// Finds and prints occurences of each pattern.
48+
func ahoCorasick(t string, p []string) {
49+
startTime := time.Now()
50+
occurences := make(map[int][]int)
51+
ac, f, s := buildAc(p)
52+
if debugMode == true {
53+
fmt.Printf("\n\nAC:\n\n")
54+
}
55+
current := 0
56+
for pos := 0; pos < len(t); pos++ {
57+
if debugMode == true {
58+
fmt.Printf("Position: %d, we read: %c", pos, t[pos])
59+
}
60+
for getTransition(current, t[pos], ac) == -1 && s[current] != -1 {
61+
current = s[current]
62+
}
63+
if getTransition(current, t[pos], ac) != -1 {
64+
current = getTransition(current, t[pos], ac)
65+
fmt.Printf(" (Continue) \n")
66+
} else {
67+
current = 0
68+
if debugMode == true {
69+
fmt.Printf(" (FAIL) \n")
70+
}
71+
}
72+
_, ok := f[current]
73+
if ok {
74+
for i := range f[current] {
75+
if p[f[current][i]] == getWord(pos-len(p[f[current][i]])+1, pos, t) { //check for word match
76+
if debugMode == true {
77+
fmt.Printf("Occurence at position %d, %q = %q\n", pos-len(p[f[current][i]])+1, p[f[current][i]], p[f[current][i]])
78+
}
79+
newOccurences := intArrayCapUp(occurences[f[current][i]])
80+
occurences[f[current][i]] = newOccurences
81+
occurences[f[current][i]][len(newOccurences)-1] = pos - len(p[f[current][i]]) + 1
82+
}
83+
}
84+
}
85+
}
86+
elapsed := time.Since(startTime)
87+
fmt.Printf("\n\nElapsed %f secs\n", elapsed.Seconds())
88+
for key, value := range occurences { //prints all occurences of each pattern (if there was at least one)
89+
fmt.Printf("\nThere were %d occurences for word: %q at positions: ", len(value), p[key])
90+
for i := range value {
91+
fmt.Printf("%d", value[i])
92+
if i != len(value)-1 {
93+
fmt.Printf(", ")
94+
}
95+
}
96+
fmt.Printf(".")
97+
}
98+
return
99+
}
100+
101+
// Functions that builds Aho Corasick automaton.
102+
func buildAc(p []string) (acToReturn map[int]map[uint8]int, f map[int][]int, s []int) {
103+
acTrie, stateIsTerminal, f := constructTrie(p)
104+
s = make([]int, len(stateIsTerminal)) //supply function
105+
i := 0 //root of acTrie
106+
acToReturn = acTrie
107+
s[i] = -1
108+
if debugMode == true {
109+
fmt.Printf("\n\nAC construction: \n")
110+
}
111+
for current := 1; current < len(stateIsTerminal); current++ {
112+
o, parent := getParent(current, acTrie)
113+
down := s[parent]
114+
for stateExists(down, acToReturn) && getTransition(down, o, acToReturn) == -1 {
115+
down = s[down]
116+
}
117+
if stateExists(down, acToReturn) {
118+
s[current] = getTransition(down, o, acToReturn)
119+
if stateIsTerminal[s[current]] == true {
120+
stateIsTerminal[current] = true
121+
f[current] = arrayUnion(f[current], f[s[current]]) //F(Current) <- F(Current) union F(S(Current))
122+
if debugMode == true {
123+
fmt.Printf(" f[%d] set to: ", current)
124+
for i := range f[current] {
125+
fmt.Printf("%d\n", f[current][i])
126+
}
127+
}
128+
}
129+
} else {
130+
s[current] = i //initial state?
131+
}
132+
}
133+
if debugMode == true {
134+
fmt.Printf("\nsupply function: \n")
135+
for i := range s {
136+
fmt.Printf("\ns[%d]=%d", i, s[i])
137+
}
138+
fmt.Printf("\n\n")
139+
for i, j := range f {
140+
fmt.Printf("f[%d]=", i)
141+
for k := range j {
142+
fmt.Printf("%d\n", j[k])
143+
}
144+
}
145+
}
146+
return acToReturn, f, s
147+
}
148+
149+
// Function that constructs Trie as an automaton for a set of reversed & trimmed strings.
150+
func constructTrie(p []string) (trie map[int]map[uint8]int, stateIsTerminal []bool, f map[int][]int) {
151+
trie = make(map[int]map[uint8]int)
152+
stateIsTerminal = make([]bool, 1)
153+
f = make(map[int][]int)
154+
state := 1
155+
if debugMode == true {
156+
fmt.Printf("\n\nTrie construction: \n")
157+
}
158+
createNewState(0, trie)
159+
for i := 0; i < len(p); i++ {
160+
current := 0
161+
j := 0
162+
for j < len(p[i]) && getTransition(current, p[i][j], trie) != -1 {
163+
current = getTransition(current, p[i][j], trie)
164+
j++
165+
}
166+
for j < len(p[i]) {
167+
stateIsTerminal = boolArrayCapUp(stateIsTerminal)
168+
createNewState(state, trie)
169+
stateIsTerminal[state] = false
170+
createTransition(current, p[i][j], state, trie)
171+
current = state
172+
j++
173+
state++
174+
}
175+
if stateIsTerminal[current] {
176+
newArray := intArrayCapUp(f[current])
177+
newArray[len(newArray)-1] = i
178+
f[current] = newArray //F(Current) <- F(Current) union {i}
179+
if debugMode == true {
180+
fmt.Printf(" and %d", i)
181+
}
182+
} else {
183+
stateIsTerminal[current] = true
184+
f[current] = []int{i} //F(Current) <- {i}
185+
if debugMode == true {
186+
fmt.Printf("\n%d is terminal for word number %d", current, i)
187+
}
188+
}
189+
}
190+
return trie, stateIsTerminal, f
191+
}
192+
193+
/**
194+
Returns 'true' if arry of int's 's' contains int 'e', 'false' otherwise.
195+
196+
@author Mostafa http://stackoverflow.com/a/10485970
197+
*/
198+
func contains(s []int, e int) bool {
199+
for _, a := range s {
200+
if a == e {
201+
return true
202+
}
203+
}
204+
return false
205+
}
206+
207+
// Function that returns word found in text 't' at position range 'begin' to 'end'.
208+
func getWord(begin, end int, t string) string {
209+
for end >= len(t) {
210+
return ""
211+
}
212+
d := make([]uint8, end-begin+1)
213+
for j, i := 0, begin; i <= end; i, j = i+1, j+1 {
214+
d[j] = t[i]
215+
}
216+
return string(d)
217+
}
218+
219+
// Dynamically increases an array size of int's by 1.
220+
func intArrayCapUp(old []int) (new []int) {
221+
new = make([]int, cap(old)+1)
222+
copy(new, old) //copy(dst,src)
223+
old = new
224+
return new
225+
}
226+
227+
// Dynamically increases an array size of bool's by 1.
228+
func boolArrayCapUp(old []bool) (new []bool) {
229+
new = make([]bool, cap(old)+1)
230+
copy(new, old)
231+
old = new
232+
return new
233+
}
234+
235+
// Concats two arrays of int's into one.
236+
func arrayUnion(to, from []int) (concat []int) {
237+
concat = to
238+
for i := range from {
239+
if !contains(concat, from[i]) {
240+
concat = intArrayCapUp(concat)
241+
concat[len(concat)-1] = from[i]
242+
}
243+
}
244+
return concat
245+
}
246+
247+
// Function that finds the first previous state of a state and returns it.
248+
// Used for trie where there is only one parent.
249+
func getParent(state int, at map[int]map[uint8]int) (uint8, int) {
250+
for beginState, transitions := range at {
251+
for c, endState := range transitions {
252+
if endState == state {
253+
return c, beginState
254+
}
255+
}
256+
}
257+
return 0, 0 //unreachable
258+
}
259+
260+
// Automaton function for creating a new state 'state'.
261+
func createNewState(state int, at map[int]map[uint8]int) {
262+
at[state] = make(map[uint8]int)
263+
if debugMode == true {
264+
fmt.Printf("\ncreated state %d", state)
265+
}
266+
}
267+
268+
// Creates a transition for function σ(state,letter) = end.
269+
func createTransition(fromState int, overChar uint8, toState int, at map[int]map[uint8]int) {
270+
at[fromState][overChar] = toState
271+
if debugMode == true {
272+
fmt.Printf("\n σ(%d,%c)=%d;", fromState, overChar, toState)
273+
}
274+
}
275+
276+
// Returns ending state for transition σ(fromState,overChar), '-1' if there is none.
277+
func getTransition(fromState int, overChar uint8, at map[int]map[uint8]int) (toState int) {
278+
if !stateExists(fromState, at) {
279+
return -1
280+
}
281+
toState, ok := at[fromState][overChar]
282+
if ok == false {
283+
return -1
284+
}
285+
return toState
286+
}
287+
288+
// Checks if state 'state' exists. Returns 'true' if it does, 'false' otherwise.
289+
func stateExists(state int, at map[int]map[uint8]int) bool {
290+
_, ok := at[state]
291+
if !ok || state == -1 || at[state] == nil {
292+
return false
293+
}
294+
return true
295+
}

0 commit comments

Comments
 (0)