Skip to content

Commit 3341f6f

Browse files
Added test on multiple string matching algorithm (#232)
1 parent d9ca0e8 commit 3341f6f

File tree

9 files changed

+337
-64
lines changed

9 files changed

+337
-64
lines changed

.github/workflows/golangci-lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
- run: golangci-lint run --no-config searches || true
3030
- run: golangci-lint run --no-config sorts
3131
# - run: golangci-lint run --no-config strings/...
32-
- run: golangci-lint run --no-config "strings/multiple string matching" || true
32+
- run: golangci-lint run --no-config "strings/multiple-string-matching" || true
3333
# - run: golangci-lint run --no-config "strings/single string matching" || true
3434
- run: golangci-lint run --no-config strings/levenshteindistance
3535
- run: golangci-lint run --no-config strings/naivesearch

strings/multiple string matching/adac.go renamed to strings/multiple-string-matching/advanced-aho-corasick/adac.go

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package advancedahocorasick
22

33
import (
44
"fmt"
@@ -13,14 +13,18 @@ import (
1313
// Set to false for quick and quiet execution.
1414
const debugMode bool = true
1515

16+
type result struct {
17+
occurrences map[string][]int
18+
}
19+
1620
// Implementation of Advanced Aho-Corasick algorithm (Prefix based).
1721
// Searches for a set of strings (patterns.txt) in text.txt.
1822
func main() {
19-
patFile, err := ioutil.ReadFile("patterns.txt")
23+
patFile, err := ioutil.ReadFile("../patterns.txt")
2024
if err != nil {
2125
log.Fatal(err)
2226
}
23-
textFile, err := ioutil.ReadFile("text.txt")
27+
textFile, err := ioutil.ReadFile("../text.txt")
2428
if err != nil {
2529
log.Fatal(err)
2630
}
@@ -40,14 +44,25 @@ func main() {
4044
if debugMode == true {
4145
fmt.Printf("\n\nIn text (%d chars long): \n%q\n\n", len(textFile), textFile)
4246
}
43-
ahoCorasick(string(textFile), patterns)
47+
r := ahoCorasick(string(textFile), patterns)
48+
for key, value := range r.occurrences { //prints all occurrences of each pattern (if there was at least one)
49+
fmt.Printf("\nThere were %d occurences for word: %q at positions: ", len(value), key)
50+
for i := range value {
51+
fmt.Printf("%d", value[i])
52+
if i != len(value)-1 {
53+
fmt.Printf(", ")
54+
}
55+
}
56+
fmt.Printf(".")
57+
}
58+
return
4459
}
4560

4661
// Function performing the Advanced Aho-Corasick alghoritm.
47-
// Finds and prints occurences of each pattern.
48-
func ahoCorasick(t string, p []string) {
62+
// Finds and prints occurrences of each pattern.
63+
func ahoCorasick(t string, p []string) result {
4964
startTime := time.Now()
50-
occurences := make(map[int][]int)
65+
occurrences := make(map[int][]int)
5166
ac, f := buildExtendedAc(p)
5267
if debugMode == true {
5368
fmt.Printf("\n\nAC:\n\n")
@@ -66,26 +81,24 @@ func ahoCorasick(t string, p []string) {
6681
if debugMode == true {
6782
fmt.Printf("Occurence at position %d, %q = %q\n", pos-len(p[f[current][i]])+1, p[f[current][i]], p[f[current][i]])
6883
}
69-
newOccurences := intArrayCapUp(occurences[f[current][i]])
70-
occurences[f[current][i]] = newOccurences
71-
occurences[f[current][i]][len(newOccurences)-1] = pos - len(p[f[current][i]]) + 1
84+
newOccurrences := intArrayCapUp(occurrences[f[current][i]])
85+
occurrences[f[current][i]] = newOccurrences
86+
occurrences[f[current][i]][len(newOccurrences)-1] = pos - len(p[f[current][i]]) + 1
7287
}
7388
}
7489
}
7590
}
7691
elapsed := time.Since(startTime)
7792
fmt.Printf("\n\nElapsed %f secs\n", elapsed.Seconds())
78-
for key, value := range occurences { //prints all occurences of each pattern (if there was at least one)
79-
fmt.Printf("\nThere were %d occurences for word: %q at positions: ", len(value), p[key])
80-
for i := range value {
81-
fmt.Printf("%d", value[i])
82-
if i != len(value)-1 {
83-
fmt.Printf(", ")
84-
}
85-
}
86-
fmt.Printf(".")
93+
94+
var resultOccurrences = make(map[string][]int)
95+
for key, value := range occurrences {
96+
resultOccurrences[p[key]] = value
97+
}
98+
99+
return result{
100+
resultOccurrences,
87101
}
88-
return
89102
}
90103

91104
// Functions that builds extended Aho Corasick automaton.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package advancedahocorasick
2+
3+
import (
4+
"fmt"
5+
"reflect"
6+
"testing"
7+
)
8+
9+
var testCases = []struct {
10+
name string
11+
words []string
12+
text string
13+
expected result
14+
}{
15+
16+
{
17+
"String comparison on all patterns found",
18+
[]string{"announce", "annual", "annually"},
19+
"CPM_annual_conferenceannounce_announce_annually_announce",
20+
result{
21+
map[string][]int{
22+
"annual": {4, 39},
23+
"announce": {21, 30, 48},
24+
"annually": {39},
25+
},
26+
},
27+
},
28+
{
29+
"String comparison on not all patterns found",
30+
[]string{"announce", "annual", "annually"},
31+
"CPM_annual_conference_announce",
32+
result{
33+
map[string][]int{
34+
"annual": {4},
35+
"announce": {22},
36+
},
37+
},
38+
},
39+
{
40+
"String comparison on not all patterns found",
41+
[]string{"announce", "annual", "annually"},
42+
"CPM_annual_conference_announce",
43+
result{
44+
map[string][]int{
45+
"annual": {4},
46+
"announce": {22},
47+
},
48+
},
49+
},
50+
}
51+
52+
func TestAhoCorasick(t *testing.T) {
53+
for _, tc := range testCases {
54+
t.Run(tc.name, func(t *testing.T) {
55+
actual := ahoCorasick(tc.text, tc.words)
56+
if !reflect.DeepEqual(actual, tc.expected) {
57+
actualString := convertToString(actual)
58+
expectedString := convertToString(tc.expected)
59+
t.Errorf("Expected matches for patterns %s for string '%s' are: patterns and positions found %v, but actual matches are: patterns and positions found %v",
60+
tc.words, tc.text, actualString, expectedString)
61+
}
62+
})
63+
}
64+
}
65+
66+
func convertToString(res result) string {
67+
var r string
68+
for key, val := range res.occurrences {
69+
r = r + fmt.Sprintf("Word: '%s' at positions: ", key)
70+
for i := range val {
71+
r = r + fmt.Sprintf("%d", val[i])
72+
if i != len(val)-1 {
73+
r = r + fmt.Sprintf(", ")
74+
}
75+
}
76+
r = r + fmt.Sprintf(". ")
77+
}
78+
return r
79+
}

strings/multiple string matching/ac.go renamed to strings/multiple-string-matching/aho-corasick/ac.go

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package ahocorasick
22

33
import (
44
"fmt"
@@ -13,14 +13,18 @@ import (
1313
// Set to false for quick and quiet execution.
1414
const debugMode bool = true
1515

16+
type result struct {
17+
occurrences map[string][]int
18+
}
19+
1620
// Implementation of Basic Aho-Corasick algorithm (Prefix based).
1721
// Searches for a set of strings (patterns.txt) in text.txt.
1822
func main() {
19-
patFile, err := ioutil.ReadFile("patterns.txt")
23+
patFile, err := ioutil.ReadFile("../patterns.txt")
2024
if err != nil {
2125
log.Fatal(err)
2226
}
23-
textFile, err := ioutil.ReadFile("text.txt")
27+
textFile, err := ioutil.ReadFile("../text.txt")
2428
if err != nil {
2529
log.Fatal(err)
2630
}
@@ -40,14 +44,24 @@ func main() {
4044
if debugMode == true {
4145
fmt.Printf("\n\nIn text (%d chars long): \n%q\n\n", len(textFile), textFile)
4246
}
43-
ahoCorasick(string(textFile), patterns)
47+
r := ahoCorasick(string(textFile), patterns)
48+
for key, value := range r.occurrences { //prints all occurrences of each pattern (if there was at least one)
49+
fmt.Printf("\nThere were %d occurences for word: %q at positions: ", len(value), key)
50+
for i := range value {
51+
fmt.Printf("%d", value[i])
52+
if i != len(value)-1 {
53+
fmt.Printf(", ")
54+
}
55+
}
56+
fmt.Printf(".")
57+
}
4458
}
4559

46-
// Function performing the Basic Aho-Corasick alghoritm.
47-
// Finds and prints occurences of each pattern.
48-
func ahoCorasick(t string, p []string) {
60+
// Function performing the Basic Aho-Corasick algorithm.
61+
// Finds and prints occurrences of each pattern.
62+
func ahoCorasick(t string, p []string) result {
4963
startTime := time.Now()
50-
occurences := make(map[int][]int)
64+
occurrences := make(map[int][]int)
5165
ac, f, s := buildAc(p)
5266
if debugMode == true {
5367
fmt.Printf("\n\nAC:\n\n")
@@ -76,26 +90,23 @@ func ahoCorasick(t string, p []string) {
7690
if debugMode == true {
7791
fmt.Printf("Occurence at position %d, %q = %q\n", pos-len(p[f[current][i]])+1, p[f[current][i]], p[f[current][i]])
7892
}
79-
newOccurences := intArrayCapUp(occurences[f[current][i]])
80-
occurences[f[current][i]] = newOccurences
81-
occurences[f[current][i]][len(newOccurences)-1] = pos - len(p[f[current][i]]) + 1
93+
newOccurrences := intArrayCapUp(occurrences[f[current][i]])
94+
occurrences[f[current][i]] = newOccurrences
95+
occurrences[f[current][i]][len(newOccurrences)-1] = pos - len(p[f[current][i]]) + 1
8296
}
8397
}
8498
}
8599
}
86100
elapsed := time.Since(startTime)
87101
fmt.Printf("\n\nElapsed %f secs\n", elapsed.Seconds())
88-
for key, value := range occurences { //prints all occurences of each pattern (if there was at least one)
89-
fmt.Printf("\nThere were %d occurences for word: %q at positions: ", len(value), p[key])
90-
for i := range value {
91-
fmt.Printf("%d", value[i])
92-
if i != len(value)-1 {
93-
fmt.Printf(", ")
94-
}
95-
}
96-
fmt.Printf(".")
102+
var resultOccurrences = make(map[string][]int)
103+
for key, value := range occurrences {
104+
resultOccurrences[p[key]] = value
105+
}
106+
107+
return result{
108+
resultOccurrences,
97109
}
98-
return
99110
}
100111

101112
// Functions that builds Aho Corasick automaton.
@@ -191,7 +202,7 @@ func constructTrie(p []string) (trie map[int]map[uint8]int, stateIsTerminal []bo
191202
}
192203

193204
/**
194-
Returns 'true' if arry of int's 's' contains int 'e', 'false' otherwise.
205+
Returns 'true' if array of int's 's' contains int 'e', 'false' otherwise.
195206
196207
@author Mostafa http://stackoverflow.com/a/10485970
197208
*/
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
package ahocorasick
2+
3+
import (
4+
"fmt"
5+
"reflect"
6+
"testing"
7+
)
8+
9+
var testCases = []struct {
10+
name string
11+
words []string
12+
text string
13+
expected result
14+
}{
15+
16+
{
17+
"String comparison on all patterns found",
18+
[]string{"announce", "annual", "annually"},
19+
"CPM_annual_conferenceannounce_announce_annually_announce",
20+
result{
21+
map[string][]int{
22+
"annual": {4, 39},
23+
"announce": {21,30,48},
24+
"annually": {39},
25+
},
26+
},
27+
},
28+
{
29+
"String comparison on not all patterns found",
30+
[]string{"announce", "annual", "annually"},
31+
"CPM_annual_conference_announce",
32+
result{
33+
map[string][]int{
34+
"annual": {4},
35+
"announce": {22},
36+
},
37+
},
38+
},
39+
{
40+
"String comparison on not all patterns found",
41+
[]string{"announce", "annual", "annually"},
42+
"CPM_annual_conference_announce",
43+
result{
44+
map[string][]int{
45+
"annual": {4},
46+
"announce": {22},
47+
},
48+
},
49+
},
50+
}
51+
52+
func TestAhoCorasick(t *testing.T) {
53+
for _, tc := range testCases {
54+
t.Run(tc.name, func(t *testing.T) {
55+
actual := ahoCorasick(tc.text, tc.words)
56+
if !reflect.DeepEqual(actual, tc.expected) {
57+
actualString := convertToString(actual)
58+
expectedString := convertToString(tc.expected)
59+
t.Errorf("Expected matches for patterns %s for string '%s' are: patterns and positions found %v, but actual matches are: patterns and positions found %v",
60+
tc.words, tc.text, actualString, expectedString)
61+
}
62+
})
63+
}
64+
}
65+
66+
func convertToString(res result) string {
67+
var r string
68+
for key, val := range res.occurrences {
69+
r = r + fmt.Sprintf("Word: '%s' at positions: ", key)
70+
for i := range val {
71+
r = r + fmt.Sprintf("%d", val[i])
72+
if i != len(val)-1 {
73+
r = r + fmt.Sprintf(", ")
74+
}
75+
}
76+
r = r + fmt.Sprintf(". ")
77+
}
78+
return r
79+
}

0 commit comments

Comments
 (0)