Skip to content

Commit 98e425f

Browse files
committed
perf(rule-engine): More efficient filter indexing
Move away from FNV hash-based filter indexing to two separate buckets - one for event types filters and one for category filters.
1 parent bc1de48 commit 98e425f

File tree

5 files changed

+139
-92
lines changed

5 files changed

+139
-92
lines changed

pkg/event/category.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package event
2020

2121
import (
22+
"github.com/bits-and-blooms/bitset"
2223
"github.com/rabbitstack/fibratus/pkg/util/hashers"
2324
)
2425

@@ -69,6 +70,56 @@ func (c Category) Hash() uint32 {
6970
return hashers.FnvUint32([]byte(c))
7071
}
7172

73+
// CategoryMasks allows setting and checking the category bit mask.
74+
type CategoryMasks struct {
75+
bs bitset.BitSet
76+
}
77+
78+
// Set sets the category bit in the bit mask.
79+
func (m *CategoryMasks) Set(c Category) {
80+
m.bs.Set(uint(c.Index()))
81+
}
82+
83+
// Test checks if the given category bit is set.
84+
func (m *CategoryMasks) Test(c Category) bool {
85+
return m.bs.Test(uint(c.Index()))
86+
}
87+
88+
// MaxCategoryIndex designates the maximum category index.
89+
const MaxCategoryIndex = 13
90+
91+
// Index returns a numerical category index.
92+
func (c Category) Index() uint8 {
93+
switch c {
94+
case Registry:
95+
return 1
96+
case File:
97+
return 2
98+
case Net:
99+
return 3
100+
case Process:
101+
return 4
102+
case Thread:
103+
return 5
104+
case Image:
105+
return 6
106+
case Handle:
107+
return 7
108+
case Driver:
109+
return 8
110+
case Mem:
111+
return 9
112+
case Object:
113+
return 10
114+
case Threadpool:
115+
return 11
116+
case Other:
117+
return 12
118+
default:
119+
return MaxCategoryIndex
120+
}
121+
}
122+
72123
// Categories returns all available categories.
73124
func Categories() []string {
74125
return []string{

pkg/event/category_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2021-present by Nedim Sabic Sabic
3+
* https://www.fibratus.io
4+
* All Rights Reserved.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package event
20+
21+
import (
22+
"github.com/stretchr/testify/assert"
23+
"testing"
24+
)
25+
26+
func TestCategoryMasks(t *testing.T) {
27+
var masks CategoryMasks
28+
masks.Set(File)
29+
masks.Set(Process)
30+
31+
assert.True(t, masks.Test(File))
32+
assert.True(t, masks.Test(Process))
33+
assert.False(t, masks.Test(Registry))
34+
}

pkg/filter/ql/literal.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ package ql
2121
import (
2222
"github.com/rabbitstack/fibratus/pkg/event"
2323
"github.com/rabbitstack/fibratus/pkg/filter/fields"
24-
"github.com/rabbitstack/fibratus/pkg/util/hashers"
2524
"golang.org/x/sys/windows"
2625
"net"
2726
"reflect"
@@ -280,12 +279,13 @@ type SequenceExpr struct {
280279
// Alias represents the sequence expression alias.
281280
Alias string
282281

283-
buckets map[uint32]bool
284-
types []event.Type
282+
emasks event.EventsetMasks
283+
cmasks event.CategoryMasks
284+
285+
types []event.Type
285286
}
286287

287288
func (e *SequenceExpr) init() {
288-
e.buckets = make(map[uint32]bool)
289289
e.types = make([]event.Type, 0)
290290
e.BoundFields = make([]*BoundFieldLiteral, 0)
291291
}
@@ -338,12 +338,15 @@ func (e *SequenceExpr) walk() {
338338

339339
// initialize event type/category buckets for every such field
340340
for name, values := range stringFields {
341-
if name == fields.EvtName || name == fields.EvtCategory {
342-
for _, v := range values {
343-
e.buckets[hashers.FnvUint32([]byte(v))] = true
344-
if etype := event.NameToType(v); etype.Exists() {
345-
e.types = append(e.types, etype)
341+
for _, v := range values {
342+
switch name {
343+
case fields.EvtName:
344+
for _, typ := range event.NameToTypes(v) {
345+
e.emasks.Set(typ)
346+
e.types = append(e.types, typ)
346347
}
348+
case fields.EvtCategory:
349+
e.cmasks.Set(event.Category(v))
347350
}
348351
}
349352
}
@@ -354,7 +357,7 @@ func (e *SequenceExpr) walk() {
354357
// to be evaluated when the incoming event type or category pertains to the one
355358
// defined in the field literal.
356359
func (e *SequenceExpr) IsEvaluable(evt *event.Event) bool {
357-
return e.buckets[evt.Type.Hash()] || e.buckets[evt.Category.Hash()]
360+
return e.emasks.Test(evt.Type.GUID(), evt.Type.HookID()) || e.cmasks.Test(evt.Category)
358361
}
359362

360363
// HasBoundFields determines if this sequence expression references any bound field.

pkg/rules/engine.go

Lines changed: 38 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"github.com/rabbitstack/fibratus/pkg/filter/fields"
2828
"github.com/rabbitstack/fibratus/pkg/ps"
2929
"github.com/rabbitstack/fibratus/pkg/rules/action"
30-
"github.com/rabbitstack/fibratus/pkg/util/hashers"
3130
log "github.com/sirupsen/logrus"
3231
"sync"
3332
"time"
@@ -53,7 +52,7 @@ var (
5352
// the collection of compiled filters that are derived
5453
// from the loaded ruleset.
5554
type Engine struct {
56-
filters compiledFilters
55+
filters *filterset
5756
config *config.Config
5857
psnap ps.Snapshotter
5958

@@ -65,82 +64,42 @@ type Engine struct {
6564

6665
compiler *compiler
6766

68-
hashCache *hashCache
69-
7067
matchFunc RuleMatchFunc
7168
}
7269

7370
type ruleMatch struct {
7471
ctx *config.ActionContext
7572
}
7673

77-
// hashCache caches the event type/category FNV hashes
78-
type hashCache struct {
79-
mu sync.RWMutex
80-
types map[event.Type]uint32
81-
cats map[event.Category]uint32
82-
lookupCategory bool
83-
}
84-
85-
func newHashCache() *hashCache {
86-
return &hashCache{types: make(map[event.Type]uint32), cats: make(map[event.Category]uint32)}
87-
}
88-
89-
func (c *hashCache) typeHash(e *event.Event) uint32 {
90-
c.mu.RLock()
91-
defer c.mu.RUnlock()
92-
return c.types[e.Type]
93-
}
94-
95-
func (c *hashCache) categoryHash(e *event.Event) uint32 {
96-
c.mu.RLock()
97-
defer c.mu.RUnlock()
98-
return c.cats[e.Category]
99-
}
100-
101-
func (c *hashCache) addTypeHash(e *event.Event) uint32 {
102-
c.mu.Lock()
103-
defer c.mu.Unlock()
104-
h := e.Type.Hash()
105-
c.types[e.Type] = h
106-
return h
107-
}
108-
109-
func (c *hashCache) addCategoryHash(e *event.Event) uint32 {
110-
c.mu.Lock()
111-
defer c.mu.Unlock()
112-
h := e.Category.Hash()
113-
c.cats[e.Category] = h
114-
return h
115-
}
116-
11774
type compiledFilter struct {
11875
filter filter.Filter
11976
config *config.FilterConfig
12077
ss *sequenceState
12178
}
12279

123-
type compiledFilters map[uint32][]*compiledFilter
80+
// filterset contains compiled filters indexed by event type and category.
81+
type filterset struct {
82+
types map[event.Type][]*compiledFilter
83+
categories map[uint8][]*compiledFilter
84+
}
12485

125-
// collect collects all compiled filters for a
126-
// particular event type or category. If no filters
127-
// are found, the event is not asserted against the
128-
// ruleset.
129-
func (filters compiledFilters) collect(hashCache *hashCache, e *event.Event) []*compiledFilter {
130-
h := hashCache.typeHash(e)
131-
if h == 0 {
132-
h = hashCache.addTypeHash(e)
86+
func newFilterset() *filterset {
87+
fs := &filterset{
88+
types: make(map[event.Type][]*compiledFilter),
89+
categories: make(map[uint8][]*compiledFilter),
13390
}
91+
return fs
92+
}
13493

135-
if !hashCache.lookupCategory {
136-
return filters[h]
137-
}
94+
func (f *filterset) empty() bool {
95+
return len(f.types) == 0 && len(f.categories) == 0
96+
}
13897

139-
c := hashCache.categoryHash(e)
140-
if c == 0 {
141-
c = hashCache.addCategoryHash(e)
98+
func (f *filterset) collect(e *event.Event) []*compiledFilter {
99+
if len(f.categories) == 0 {
100+
return f.types[e.Type]
142101
}
143-
return append(filters[h], filters[c]...)
102+
return append(f.types[e.Type], f.categories[e.Category.Index()]...)
144103
}
145104

146105
func newCompiledFilter(f filter.Filter, c *config.FilterConfig, ss *sequenceState) *compiledFilter {
@@ -172,14 +131,13 @@ func (f *compiledFilter) run(e *event.Event) bool {
172131
// NewEngine builds a fresh rules engine instance.
173132
func NewEngine(psnap ps.Snapshotter, config *config.Config) *Engine {
174133
e := &Engine{
175-
filters: make(map[uint32][]*compiledFilter),
134+
filters: newFilterset(),
176135
matches: make([]*ruleMatch, 0),
177136
sequences: make([]*sequenceState, 0),
178137
psnap: psnap,
179138
config: config,
180139
scavenger: time.NewTicker(sequenceGcInterval),
181140
compiler: newCompiler(psnap, config),
182-
hashCache: newHashCache(),
183141
}
184142

185143
go e.gcSequences()
@@ -217,6 +175,7 @@ func (e *Engine) Compile() (*config.RulesCompileResult, error) {
217175
// for more convenient tracking
218176
e.sequences = append(e.sequences, ss)
219177
}
178+
220179
if !fltr.isScoped() {
221180
log.Warnf("%q rule doesn't have "+
222181
"event type or event category condition! "+
@@ -227,18 +186,21 @@ func (e *Engine) Compile() (*config.RulesCompileResult, error) {
227186
c.Name)
228187
continue
229188
}
189+
230190
// traverse all event name or category fields and determine
231191
// the event type from the filter field name expression.
232-
// We end up with a map of rules indexed by event name
233-
// or event category hash
192+
// We end up with a map of rules indexed by event type
193+
// or event category
234194
for name, values := range f.GetStringFields() {
235195
for _, v := range values {
236-
if name == fields.EvtName || name == fields.EvtCategory {
237-
if name == fields.EvtCategory {
238-
e.hashCache.lookupCategory = true
196+
switch name {
197+
case fields.EvtName:
198+
for _, typ := range event.NameToTypes(v) {
199+
e.filters.types[typ] = append(e.filters.types[typ], fltr)
239200
}
240-
hash := hashers.FnvUint32([]byte(v))
241-
e.filters[hash] = append(e.filters[hash], fltr)
201+
case fields.EvtCategory:
202+
category := event.Category(v)
203+
e.filters.categories[category.Index()] = append(e.filters.categories[category.Index()], fltr)
242204
}
243205
}
244206
}
@@ -258,10 +220,10 @@ func (*Engine) CanEnqueue() bool { return true }
258220
// Filters can be simple direct-event matchers or sequence states that
259221
// track an ordered series of events over a short period of time.
260222
func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) {
261-
if len(e.filters) == 0 {
223+
if e.filters.empty() {
262224
return true, nil
263225
}
264-
var matches bool
226+
265227
if evt.IsTerminateProcess() {
266228
// expire all sequences if the
267229
// process referenced in any
@@ -270,7 +232,10 @@ func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) {
270232
seq.expire(evt)
271233
}
272234
}
273-
filters := e.filters.collect(e.hashCache, evt)
235+
236+
filters := e.filters.collect(evt)
237+
238+
var matches bool
274239
for _, f := range filters {
275240
match := f.run(evt)
276241
if !match {
@@ -293,6 +258,7 @@ func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) {
293258
return true, nil
294259
}
295260
}
261+
296262
return matches, nil
297263
}
298264

pkg/rules/engine_test.go

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ func TestCompileIndexableFilters(t *testing.T) {
141141

142142
compileRules(t, e)
143143

144-
assert.Len(t, e.filters, 3)
144+
assert.Len(t, e.filters.types, 5)
145+
assert.Len(t, e.filters.categories, 1)
145146

146147
var tests = []struct {
147148
evt *event.Event
@@ -156,17 +157,9 @@ func TestCompileIndexableFilters(t *testing.T) {
156157

157158
for _, tt := range tests {
158159
t.Run(tt.evt.Type.String(), func(t *testing.T) {
159-
assert.Len(t, e.filters.collect(e.hashCache, tt.evt), tt.wants)
160+
assert.Len(t, e.filters.collect(tt.evt), tt.wants)
160161
})
161162
}
162-
163-
assert.Len(t, e.hashCache.types, 4)
164-
165-
evt := &event.Event{Type: event.RecvTCPv4}
166-
167-
h1, h2 := e.hashCache.typeHash(evt), e.hashCache.categoryHash(evt)
168-
assert.Equal(t, uint32(0xfa4dab59), h1)
169-
assert.Equal(t, uint32(0x811c9dc5), h2)
170163
}
171164

172165
func TestRunSimpleRules(t *testing.T) {

0 commit comments

Comments
 (0)