Skip to content

Commit 56af719

Browse files
authored
Merge pull request #3252 from makiuchi-d/regex-pure-go
Add pure Go regex implementation for non-CGO builds
2 parents f7a3e38 + 682164e commit 56af719

File tree

8 files changed

+658
-8
lines changed

8 files changed

+658
-8
lines changed

enginetest/queries/regex_queries.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ package queries
2323
import (
2424
"gopkg.in/src-d/go-errors.v1"
2525

26-
regex "github.com/dolthub/go-icu-regex"
27-
26+
"github.com/dolthub/go-mysql-server/internal/regex"
2827
"github.com/dolthub/go-mysql-server/sql"
2928
)
3029

internal/regex/regex_cgo.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//go:build cgo && !gms_pure_go
2+
3+
package regex
4+
5+
import regex "github.com/dolthub/go-icu-regex"
6+
7+
type Regex = regex.Regex
8+
9+
var (
10+
ErrRegexNotYetSet = regex.ErrRegexNotYetSet
11+
ErrMatchNotYetSet = regex.ErrMatchNotYetSet
12+
ErrInvalidRegex = regex.ErrInvalidRegex
13+
)
14+
15+
type RegexFlags = regex.RegexFlags
16+
17+
const (
18+
RegexFlags_None = regex.RegexFlags_None
19+
RegexFlags_Case_Insensitive = regex.RegexFlags_Case_Insensitive
20+
RegexFlags_Comments = regex.RegexFlags_Comments
21+
RegexFlags_Dot_All = regex.RegexFlags_Dot_All
22+
RegexFlags_Literal = regex.RegexFlags_Literal
23+
RegexFlags_Multiline = regex.RegexFlags_Multiline
24+
RegexFlags_Unix_Lines = regex.RegexFlags_Unix_Lines
25+
RegexFlags_Unicode_Word = regex.RegexFlags_Unicode_Word
26+
RegexFlags_Error_On_Unknown_Escapes = regex.RegexFlags_Error_On_Unknown_Escapes
27+
)
28+
29+
func CreateRegex(stringBufferInBytes uint32) Regex {
30+
return regex.CreateRegex(stringBufferInBytes)
31+
}

internal/regex/regex_pure.go

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
//go:build !cgo || gms_pure_go
2+
3+
package regex
4+
5+
import (
6+
"context"
7+
"fmt"
8+
"regexp"
9+
10+
"gopkg.in/src-d/go-errors.v1"
11+
)
12+
13+
type Regex interface {
14+
SetRegexString(ctx context.Context, regexStr string, flags RegexFlags) error
15+
SetMatchString(ctx context.Context, matchStr string) error
16+
IndexOf(ctx context.Context, start int, occurrence int, endIndex bool) (int, error)
17+
Matches(ctx context.Context, start int, occurrence int) (bool, error)
18+
Replace(ctx context.Context, replacementStr string, position int, occurrence int) (string, error)
19+
Substring(ctx context.Context, start int, occurrence int) (string, bool, error)
20+
Close() error
21+
}
22+
23+
var (
24+
ErrRegexNotYetSet = errors.NewKind("SetRegexString must be called before any other function")
25+
ErrMatchNotYetSet = errors.NewKind("SetMatchString must be called as there is nothing to match against")
26+
ErrInvalidRegex = errors.NewKind("the given regular expression is invalid")
27+
)
28+
29+
type RegexFlags uint32
30+
31+
const (
32+
RegexFlags_None RegexFlags = 0
33+
RegexFlags_Case_Insensitive RegexFlags = 2
34+
RegexFlags_Comments RegexFlags = 4
35+
RegexFlags_Dot_All RegexFlags = 32
36+
RegexFlags_Literal RegexFlags = 16
37+
RegexFlags_Multiline RegexFlags = 8
38+
RegexFlags_Unix_Lines RegexFlags = 1
39+
RegexFlags_Unicode_Word RegexFlags = 256
40+
RegexFlags_Error_On_Unknown_Escapes RegexFlags = 512
41+
)
42+
43+
func CreateRegex(stringBufferInBytes uint32) Regex {
44+
return &privateRegex{}
45+
}
46+
47+
type privateRegex struct {
48+
re *regexp.Regexp
49+
str string
50+
sset bool
51+
52+
done bool
53+
start int
54+
locs [][]int
55+
}
56+
57+
var _ Regex = (*privateRegex)(nil)
58+
59+
func (pr *privateRegex) SetRegexString(ctx context.Context, regexStr string, flags RegexFlags) (err error) {
60+
// i : RegexFlags_Case_Insensitive
61+
// m : RegexFlags_Multiline
62+
// s : RegexFlags_Dot_All
63+
// RegexFlags_Unix_Lines
64+
var flg = "(?"
65+
if flags&RegexFlags_Case_Insensitive != 0 {
66+
flg += "i"
67+
}
68+
if flags&RegexFlags_Multiline != 0 {
69+
flg += "m"
70+
}
71+
if flags&RegexFlags_Dot_All != 0 {
72+
flg += "s"
73+
}
74+
if len(flg) > 2 {
75+
flg += ")"
76+
} else {
77+
flg = ""
78+
}
79+
80+
pr.done = false
81+
pr.sset = false
82+
pr.re, err = regexp.Compile(flg + regexStr)
83+
if err != nil {
84+
return ErrInvalidRegex.New()
85+
}
86+
return nil
87+
}
88+
89+
func (pr *privateRegex) SetMatchString(ctx context.Context, matchStr string) (err error) {
90+
if pr.re == nil {
91+
return ErrRegexNotYetSet.New()
92+
}
93+
pr.done = false
94+
pr.str = matchStr
95+
pr.sset = true
96+
return nil
97+
}
98+
99+
func (pr *privateRegex) do(start int) error {
100+
if start < 1 {
101+
start = 1
102+
}
103+
if !pr.done || pr.start != start {
104+
if pr.re == nil {
105+
return ErrRegexNotYetSet.New()
106+
}
107+
if !pr.sset {
108+
return ErrMatchNotYetSet.New()
109+
}
110+
pr.locs = pr.re.FindAllStringIndex(pr.str[start-1:], -1)
111+
pr.start = start
112+
pr.done = true
113+
}
114+
return nil
115+
}
116+
117+
func (pr *privateRegex) location(occurrence int) []int {
118+
occurrence--
119+
if occurrence < 0 {
120+
occurrence = 0
121+
}
122+
if len(pr.locs) < occurrence+1 {
123+
return nil
124+
}
125+
return pr.locs[occurrence]
126+
}
127+
128+
func (pr *privateRegex) IndexOf(ctx context.Context, start int, occurrence int, endIndex bool) (int, error) {
129+
err := pr.do(start)
130+
if err != nil {
131+
return 0, err
132+
}
133+
loc := pr.location(occurrence)
134+
if loc == nil {
135+
return 0, nil
136+
}
137+
pos := loc[0]
138+
if endIndex {
139+
pos = loc[1]
140+
}
141+
return pos + pr.start, nil
142+
}
143+
144+
func (pr *privateRegex) Matches(ctx context.Context, start int, occurrence int) (bool, error) {
145+
err := pr.do(start + 1) // start+1: issue #10 (https://github.com/dolthub/go-icu-regex/issues/10)
146+
if err != nil {
147+
return false, err
148+
}
149+
loc := pr.location(occurrence)
150+
return loc != nil, nil
151+
}
152+
153+
func (pr *privateRegex) Replace(ctx context.Context, replacement string, start int, occurrence int) (string, error) {
154+
err := pr.do(start)
155+
if err != nil {
156+
return "", err
157+
}
158+
159+
var locs [][]int
160+
if occurrence == 0 {
161+
locs = pr.locs
162+
} else {
163+
loc := pr.location(occurrence)
164+
if loc != nil {
165+
locs = [][]int{loc}
166+
}
167+
}
168+
offs := pr.start - 1
169+
pos := offs
170+
ret := []byte(pr.str[:pos])
171+
for _, loc := range locs {
172+
ret = fmt.Appendf(ret, "%s%s", pr.str[pos:loc[0]+offs], replacement)
173+
pos = loc[1] + offs
174+
}
175+
ret = fmt.Append(ret, pr.str[pos:])
176+
return string(ret), nil
177+
178+
loc := pr.location(occurrence)
179+
if loc == nil {
180+
return pr.str, nil
181+
}
182+
return pr.str[:loc[0]+pr.start-1] + replacement + pr.str[loc[1]+pr.start-1:], nil
183+
}
184+
185+
func (pr *privateRegex) Substring(ctx context.Context, start int, occurrence int) (string, bool, error) {
186+
err := pr.do(start)
187+
if err != nil {
188+
return "", false, err
189+
}
190+
loc := pr.location(occurrence)
191+
if loc == nil {
192+
return "", false, nil
193+
}
194+
return pr.str[loc[0]+pr.start-1 : loc[1]+pr.start-1], true, nil
195+
}
196+
197+
func (pr *privateRegex) Close() (err error) {
198+
pr.re = nil
199+
pr.str = ""
200+
pr.done = false
201+
pr.locs = nil
202+
return nil
203+
}

0 commit comments

Comments
 (0)