Skip to content

Commit 344d270

Browse files
committed
perf(rule_engine): Move to iterative greedy wildcard matching
By switching from recursive backtracking to an iterative greedy matcher, we've eliminated: - all the recursion overhead. - repeated slice copies for every recursive call. - exponential branching when * appears in the pattern. The matcher is now linear-time in the length of the pattern and string and introduces the ASCII-fast path with UTF-8 fallback only when needed.
1 parent cc7973d commit 344d270

File tree

2 files changed

+101
-49
lines changed

2 files changed

+101
-49
lines changed

pkg/util/wildcard/wildcard.go

Lines changed: 77 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,89 @@
11
/*
2-
* MinIO Cloud Storage, (C) 2015, 2016 MinIO, Inc.
2+
* Copyright 2019-present by Nedim Sabic
3+
* http://rabbitstack.github.io
4+
* All Rights Reserved.
35
*
4-
* Licensed under the Apache License, Version 2.0 (the "License");
5-
* you may not use this file except in compliance with the License.
6-
* You may obtain a copy of the License at
6+
* Licensed under the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License. You may obtain
8+
* a copy of the License at
79
*
8-
* http://www.apache.org/licenses/LICENSE-2.0
9-
*
10-
* Unless required by applicable law or agreed to in writing, software
11-
* distributed under the License is distributed on an "AS IS" BASIS,
12-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
* See the License for the specific language governing permissions and
14-
* limitations under the License.
10+
* http://www.apache.org/licenses/LICENSE-2.0
1511
*/
1612

1713
package wildcard
1814

19-
// Match - finds whether the text matches/satisfies the pattern string.
20-
// supports '*' and '?' wildcards in the pattern string.
21-
// unlike path.Match(), considers a path as a flat name space while matching the pattern.
22-
// The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz .
23-
func Match(pattern, name string) (matched bool) {
24-
if pattern == "" {
25-
return name == pattern
26-
}
27-
if pattern == "*" {
28-
return true
29-
}
30-
// Does extended wildcard '*' and '?' match?
31-
return deepMatchRune(name, pattern, false)
32-
}
15+
import "unicode/utf8"
3316

34-
func deepMatchRune(s, pattern string, simple bool) bool {
35-
for len(pattern) > 0 {
36-
switch pattern[0] {
37-
default:
38-
if len(s) == 0 || s[0] != pattern[0] {
39-
return false
40-
}
41-
case '?':
42-
if len(s) == 0 && !simple {
43-
return false
17+
// Match performs ASCII-first, iterative wildcard matching with UTF-8 fallback.
18+
// It supports '*' and '?' wildcards in the pattern string.
19+
func Match(pattern, str string) bool {
20+
slen := len(str)
21+
plen := len(pattern)
22+
23+
var p, s int
24+
wildcardIdx, matchIdx := -1, 0
25+
26+
for s < slen {
27+
if p < plen {
28+
pb := pattern[p]
29+
30+
switch pb {
31+
case '?':
32+
// match exactly one character
33+
if str[s] < utf8.RuneSelf && pb < utf8.RuneSelf {
34+
p++
35+
s++
36+
} else {
37+
_, psize := utf8.DecodeRuneInString(pattern[p:])
38+
_, ssize := utf8.DecodeRuneInString(str[s:])
39+
p += psize
40+
s += ssize
41+
}
42+
continue
43+
44+
case '*':
45+
// record wildcard position
46+
wildcardIdx = p
47+
matchIdx = s
48+
p++
49+
continue
50+
51+
default:
52+
// literal match
53+
if pb < utf8.RuneSelf && str[s] < utf8.RuneSelf {
54+
if pb == str[s] {
55+
p++
56+
s++
57+
continue
58+
}
59+
} else {
60+
pr, psize := utf8.DecodeRuneInString(pattern[p:])
61+
sr, ssize := utf8.DecodeRuneInString(str[s:])
62+
if pr == sr {
63+
p += psize
64+
s += ssize
65+
continue
66+
}
67+
}
4468
}
45-
case '*':
46-
return deepMatchRune(s, pattern[1:], simple) ||
47-
(len(s) > 0 && deepMatchRune(s[1:], pattern, simple))
4869
}
49-
s = s[1:]
50-
pattern = pattern[1:]
70+
71+
// backtrack if there was a previous '*'
72+
if wildcardIdx != -1 {
73+
p = wildcardIdx + 1
74+
matchIdx++
75+
s = matchIdx
76+
continue
77+
}
78+
79+
// previous '*', and mismatch
80+
return false
81+
}
82+
83+
// Skip remaining stars in pattern
84+
for p < plen && pattern[p] == '*' {
85+
p++
5186
}
52-
return len(s) == 0 && len(pattern) == 0
87+
88+
return p == plen
5389
}

pkg/util/wildcard/wildcard_test.go

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2019-2020 by Nedim Sabic
2+
* Copyright 2019-present by Nedim Sabic
33
* http://rabbitstack.github.io
44
* All Rights Reserved.
55
*
@@ -13,15 +13,31 @@
1313
package wildcard
1414

1515
import (
16-
"github.com/stretchr/testify/assert"
1716
"testing"
17+
18+
"github.com/stretchr/testify/assert"
1819
)
1920

2021
func TestMatch(t *testing.T) {
21-
assert.True(t, Match("C:\\*\\lsass?.dmp", "C:\\Windows\\System32\\lsass2.dmp"))
22-
assert.True(t, Match("C:\\*\\ActionList.x?l", "C:\\Windows\\Setup\\LatentAcquisition\\ActionList.xml"))
23-
assert.True(t, Match("C:\\ProgramData\\*.dll", "C:\\ProgramData\\Directory\\OneMoreDirectory\\mal.dll"))
24-
assert.True(t, Match("C:\\ProgramData\\*.dll", "C:\\ProgramData\\Directory\\OneMoreDirectory\\mal.dll"))
25-
assert.True(t, Match("HKEY_USERS\\*\\Environment\\windir", "HKEY_USERS\\S-1-5-21-2271034452-2606270099-984871569-1001\\Environment\\windir"))
26-
assert.True(t, Match("C:\\Windows\\SoftwareDistribution\\*", "C:\\Windows\\SoftwareDistribution\\SLS\\7971F918-A847-4430-9279-4A52D1EFE18D\\sls.rar"))
22+
var tests = []struct {
23+
p string
24+
s string
25+
match bool
26+
}{
27+
{"C:\\*\\lsass?.dmp", "C:\\Windows\\System32\\lsass2.dmp", true},
28+
{"?:\\*\\lsass?.dmp", "C:\\Windows\\System32\\lsass2.dmp", true},
29+
{"?:\\*\\lsass?.dmp", "C:\\Windows\\System32\\cmd.exe", false},
30+
{"C:\\*\\ActionList.x?l", "C:\\Windows\\Setup\\LatentAcquisition\\ActionList.xml", true},
31+
{"C:\\ProgramData\\*.dll", "C:\\ProgramData\\Directory\\OneMoreDirectory\\mal.dll", true},
32+
{"HKEY_USERS\\*\\Environment\\windir", "HKEY_USERS\\S-1-5-21-2271034452-2606270099-984871569-1001\\Environment\\windir", true},
33+
{"C:\\Windows\\SoftwareDistribution\\*", "C:\\Windows\\SoftwareDistribution\\SLS\\7971F918-A847-4430-9279-4A52D1EFE18D\\sls.rar", true},
34+
{"HKEY_USERS\\S-1-5-21-*_CLASSES\\MS-SETTINGS\\CURVER", "HKEY_USERS\\S-1-5-21-2271034452-1207270099-244871569-1021_CLASSES\\MS-SETTINGS\\CURVER", true},
35+
{"ntdll.dll|KernelBase.dll|advapi32.dll|*", "ntdll.dll|KernelBase.dll|advapi32.dll|pe386.dll|com.dll|clr.dll|mmc.exe", true},
36+
}
37+
38+
for _, tt := range tests {
39+
t.Run(tt.p, func(t *testing.T) {
40+
assert.Equal(t, tt.match, Match(tt.p, tt.s))
41+
})
42+
}
2743
}

0 commit comments

Comments
 (0)