Skip to content

Commit 1e71d5e

Browse files
authored
Move libbeat/common/match and libbeat/common/datetime (#34)
1 parent 7754c15 commit 1e71d5e

File tree

9 files changed

+1931
-0
lines changed

9 files changed

+1931
-0
lines changed

match/cmp.go

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
// Licensed to Elasticsearch B.V. under one or more contributor
2+
// license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright
4+
// ownership. Elasticsearch B.V. licenses this file to you under
5+
// the Apache License, Version 2.0 (the "License"); you may
6+
// not use this file except in compliance with the License.
7+
// You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package match
19+
20+
import "regexp/syntax"
21+
22+
// common predefined patterns
23+
var (
24+
patDotStar = mustParse(`.*`)
25+
patNullBeginDotStar = mustParse(`^.*`)
26+
patNullEndDotStar = mustParse(`.*$`)
27+
28+
patEmptyText = mustParse(`^$`)
29+
patEmptyWhiteText = mustParse(`^\s*$`)
30+
31+
// patterns matching any content
32+
patAny1 = patDotStar
33+
patAny2 = mustParse(`^.*`)
34+
patAny3 = mustParse(`^.*$`)
35+
patAny4 = mustParse(`.*$`)
36+
37+
patBeginText = mustParse(`^`)
38+
patEndText = mustParse(`$`)
39+
40+
patDigits = mustParse(`\d`)
41+
)
42+
43+
func isRegular(r *syntax.Regexp) bool {
44+
const irregular = syntax.FoldCase
45+
return (r.Flags & irregular) == 0
46+
}
47+
48+
func isRegularLiteral(r *syntax.Regexp) bool {
49+
return r.Op == syntax.OpLiteral && isRegular(r)
50+
}
51+
52+
func isSubstringLiteral(r *syntax.Regexp) bool {
53+
return isRegularLiteral(r)
54+
}
55+
56+
// isPrefixLiteral checks regular expression being literal checking string
57+
// starting with literal pattern (like '^PATTERN')
58+
func isPrefixLiteral(r *syntax.Regexp) bool {
59+
return r.Op == syntax.OpConcat &&
60+
len(r.Sub) == 2 &&
61+
r.Sub[0].Op == syntax.OpBeginText &&
62+
isRegularLiteral(r.Sub[1]) &&
63+
isRegular(r)
64+
}
65+
66+
func isAltLiterals(r *syntax.Regexp) bool {
67+
if r.Op != syntax.OpAlternate {
68+
return false
69+
}
70+
71+
for _, sub := range r.Sub {
72+
if !isRegularLiteral(sub) {
73+
return false
74+
}
75+
}
76+
77+
return true
78+
}
79+
80+
func isExactLiteral(r *syntax.Regexp) bool {
81+
return r.Op == syntax.OpConcat &&
82+
len(r.Sub) == 3 &&
83+
r.Sub[0].Op == syntax.OpBeginText &&
84+
isRegularLiteral(r.Sub[1]) &&
85+
r.Sub[2].Op == syntax.OpEndText &&
86+
isRegular(r)
87+
}
88+
89+
func isOneOfLiterals(r *syntax.Regexp) bool {
90+
return r.Op == syntax.OpConcat &&
91+
len(r.Sub) == 3 &&
92+
r.Sub[0].Op == syntax.OpBeginText &&
93+
isAltLiterals(r.Sub[1]) &&
94+
r.Sub[2].Op == syntax.OpEndText
95+
}
96+
97+
// isPrefixAltLiterals checks regular expression being alternative literals
98+
// starting with literal pattern (like '^PATTERN')
99+
func isPrefixAltLiterals(r *syntax.Regexp) bool {
100+
isPrefixAlt := r.Op == syntax.OpConcat &&
101+
len(r.Sub) == 2 &&
102+
r.Sub[0].Op == syntax.OpBeginText &&
103+
r.Sub[1].Op == syntax.OpAlternate
104+
if !isPrefixAlt {
105+
return false
106+
}
107+
108+
for _, sub := range r.Sub[1].Sub {
109+
if !isRegularLiteral(sub) {
110+
return false
111+
}
112+
}
113+
return true
114+
}
115+
116+
func isPrefixNumDate(r *syntax.Regexp) bool {
117+
if r.Op != syntax.OpConcat || r.Sub[0].Op != syntax.OpBeginText {
118+
return false
119+
}
120+
121+
i := 1
122+
if isRegularLiteral(r.Sub[i]) {
123+
i++
124+
}
125+
126+
// check starts with digits `\d{n}` or `[0-9]{n}`
127+
if !isMultiDigits(r.Sub[i]) {
128+
return false
129+
}
130+
i++
131+
132+
for i < len(r.Sub) {
133+
// check separator
134+
if !isRegularLiteral(r.Sub[i]) {
135+
return false
136+
}
137+
i++
138+
139+
// regex has 'OpLiteral' suffix, without any more digits/patterns following
140+
if i == len(r.Sub) {
141+
return true
142+
}
143+
144+
// check digits
145+
if !isMultiDigits(r.Sub[i]) {
146+
return false
147+
}
148+
i++
149+
}
150+
151+
return true
152+
}
153+
154+
func isEmptyText(r *syntax.Regexp) bool {
155+
return eqRegex(r, patEmptyText)
156+
}
157+
158+
func isEmptyTextWithWhitespace(r *syntax.Regexp) bool {
159+
return eqRegex(r, patEmptyWhiteText)
160+
}
161+
162+
func isAnyMatch(r *syntax.Regexp) bool {
163+
return eqRegex(r, patAny1) ||
164+
eqRegex(r, patAny2) ||
165+
eqRegex(r, patAny3) ||
166+
eqRegex(r, patAny4)
167+
}
168+
169+
func isDigitMatch(r *syntax.Regexp) bool {
170+
return eqRegex(r, patDigits)
171+
}
172+
173+
func isMultiDigits(r *syntax.Regexp) bool {
174+
return isConcatRepetition(r) && isDigitMatch(r.Sub[0])
175+
}
176+
177+
func isConcatRepetition(r *syntax.Regexp) bool {
178+
if r.Op != syntax.OpConcat {
179+
return false
180+
}
181+
182+
first := r.Sub[0]
183+
for _, other := range r.Sub {
184+
if other != first { // concat repetitions reuse references => compare pointers
185+
return false
186+
}
187+
}
188+
189+
return true
190+
}
191+
192+
func eqRegex(r, proto *syntax.Regexp) bool {
193+
unmatchable := r.Op != proto.Op || r.Flags != proto.Flags ||
194+
(r.Min != proto.Min) || (r.Max != proto.Max) ||
195+
(len(r.Sub) != len(proto.Sub)) ||
196+
(len(r.Rune) != len(proto.Rune))
197+
198+
if unmatchable {
199+
return false
200+
}
201+
202+
for i := range r.Sub {
203+
if !eqRegex(r.Sub[i], proto.Sub[i]) {
204+
return false
205+
}
206+
}
207+
208+
for i := range r.Rune {
209+
if r.Rune[i] != proto.Rune[i] {
210+
return false
211+
}
212+
}
213+
return true
214+
}
215+
216+
func eqPrefixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
217+
for _, proto := range protos {
218+
if eqPrefixRegex(r, proto) {
219+
return true
220+
}
221+
}
222+
return false
223+
}
224+
225+
func eqPrefixRegex(r, proto *syntax.Regexp) bool {
226+
if r.Op != syntax.OpConcat {
227+
return false
228+
}
229+
230+
if proto.Op != syntax.OpConcat {
231+
if len(r.Sub) == 0 {
232+
return false
233+
}
234+
return eqRegex(r.Sub[0], proto)
235+
}
236+
237+
if len(r.Sub) < len(proto.Sub) {
238+
return false
239+
}
240+
241+
for i := range proto.Sub {
242+
if !eqRegex(r.Sub[i], proto.Sub[i]) {
243+
return false
244+
}
245+
}
246+
return true
247+
}
248+
249+
func eqSuffixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
250+
for _, proto := range protos {
251+
if eqSuffixRegex(r, proto) {
252+
return true
253+
}
254+
}
255+
return false
256+
}
257+
258+
func eqSuffixRegex(r, proto *syntax.Regexp) bool {
259+
if r.Op != syntax.OpConcat {
260+
return false
261+
}
262+
263+
if proto.Op != syntax.OpConcat {
264+
i := len(r.Sub) - 1
265+
if i < 0 {
266+
return false
267+
}
268+
return eqRegex(r.Sub[i], proto)
269+
}
270+
271+
if len(r.Sub) < len(proto.Sub) {
272+
return false
273+
}
274+
275+
d := len(r.Sub) - len(proto.Sub)
276+
for i := range proto.Sub {
277+
if !eqRegex(r.Sub[d+i], proto.Sub[i]) {
278+
return false
279+
}
280+
}
281+
return true
282+
}
283+
284+
func mustParse(pattern string) *syntax.Regexp {
285+
r, err := syntax.Parse(pattern, syntax.Perl)
286+
if err != nil {
287+
panic(err)
288+
}
289+
return r
290+
}

0 commit comments

Comments
 (0)