-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathstreamregex.go
More file actions
53 lines (46 loc) · 1.3 KB
/
streamregex.go
File metadata and controls
53 lines (46 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// Package streamregex allows you to get the matched data of a regex on a io.Reader stream
package streamregex
import (
"bufio"
"context"
"io"
"regexp"
)
// SplitRegex takes a regex and returns a split function that will find that regex in a byte slice
func SplitRegex(re *regexp.Regexp, maxMatchLength int) bufio.SplitFunc {
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, io.EOF
}
if loc := re.FindIndex(data); loc != nil {
return loc[1], data[loc[0]:loc[1]], nil
}
if atEOF {
return 0, nil, io.EOF
}
if len(data) >= maxMatchLength {
return len(data) - maxMatchLength, nil, nil
}
return 0, nil, nil
}
}
// FindReader return channel of matched []byte from reader.
// This function will allocate maxMatchLength*2 bytes of memory
func FindReader(ctx context.Context, r *regexp.Regexp, maxMatchLength int, reader io.Reader) chan string {
allMatches := make(chan string)
buf := make([]byte, maxMatchLength*2)
go func() {
defer close(allMatches)
scanner := bufio.NewScanner(reader)
scanner.Buffer(buf, maxMatchLength)
scanner.Split(SplitRegex(r, maxMatchLength))
for scanner.Scan() {
select {
case <-ctx.Done():
return
case allMatches <- scanner.Text():
}
}
}()
return allMatches
}