Skip to content

Commit 53f0697

Browse files
committed
faster pattern extraction
1 parent 661a290 commit 53f0697

File tree

1 file changed

+49
-8
lines changed

1 file changed

+49
-8
lines changed

pattern.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ var (
2525

2626
hex = regexp.MustCompile(`^[a-fA-F0-9]{4,}$`)
2727
uuid = regexp.MustCompile(`^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$`)
28-
num = regexp.MustCompile(`\d+`)
29-
word = regexp.MustCompile(`^[a-zA-Z][a-zA-Z\._-]*[a-zA-Z]$`)
3028
)
3129

3230
type Pattern struct {
@@ -77,20 +75,63 @@ func NewPattern(input string) *Pattern {
7775
if hex.MatchString(p) || uuid.MatchString(p) {
7876
continue
7977
}
80-
p = num.ReplaceAllLiteralString(p, "")
81-
if word.MatchString(p) {
78+
p = removeDigits(p)
79+
if isWord(p) {
8280
pattern.words = append(pattern.words, p)
8381
}
8482
}
8583
return pattern
8684
}
8785

88-
func removeQuotedAndBrackets(data string) string {
89-
var res bytes.Buffer
86+
// like regexp match to `^[a-zA-Z][a-zA-Z._-]*[a-zA-Z]$`, but much faster
87+
func isWord(s string) bool {
88+
l := len(s) - 1
89+
var firstLast int
90+
for i, r := range s {
91+
switch i {
92+
case 0, l:
93+
switch {
94+
case r >= 'A' && r <= 'Z':
95+
firstLast++
96+
case r >= 'a' && r <= 'z':
97+
firstLast++
98+
default:
99+
return false
100+
}
101+
default:
102+
switch {
103+
case r >= 'A' && r <= 'Z':
104+
case r >= 'a' && r <= 'z':
105+
case r == '.':
106+
case r == '_':
107+
case r == '-':
108+
default:
109+
return false
110+
}
111+
}
112+
}
113+
return firstLast == 2
114+
}
115+
116+
func removeDigits(s string) string {
117+
res := bytes.NewBufferString(s)
118+
res.Reset()
119+
for _, r := range s {
120+
if r >= '0' && r <= '9' {
121+
continue
122+
}
123+
res.WriteRune(r)
124+
}
125+
return res.String()
126+
}
127+
128+
func removeQuotedAndBrackets(s string) string {
129+
res := bytes.NewBufferString(s)
130+
res.Reset()
90131
var quote, prev rune
91132
var seenBrackets []rune
92133
var l int
93-
for i, r := range data {
134+
for i, r := range s {
94135
switch r {
95136
case lsbrack, lpar, lcur:
96137
if quote == 0 {
@@ -114,7 +155,7 @@ func removeQuotedAndBrackets(data string) string {
114155
case dquote, squote:
115156
prev = 0
116157
if i > 0 {
117-
prev = rune(data[i-1])
158+
prev = rune(s[i-1])
118159
}
119160
if prev != bslash && len(seenBrackets) == 0 {
120161
if quote == 0 {

0 commit comments

Comments
 (0)