Skip to content

Commit 12e572c

Browse files
committed
perf(processors): Speed up image file characteristics parsing
Optimize image file characteristics parsing by keeping the cache of processed image files by executable file path + image checksum.
1 parent 33d4a67 commit 12e572c

File tree

5 files changed

+118
-29
lines changed

5 files changed

+118
-29
lines changed

internal/etw/processors/fs_windows.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/rabbitstack/fibratus/pkg/sys"
3232
"github.com/rabbitstack/fibratus/pkg/util/va"
3333
"golang.org/x/sys/windows"
34+
"golang.org/x/time/rate"
3435
"sync"
3536
"time"
3637
)
@@ -43,6 +44,8 @@ var (
4344
fileObjectMisses = expvar.NewInt("fs.file.objects.misses")
4445
fileObjectHandleHits = expvar.NewInt("fs.file.object.handle.hits")
4546
fileReleaseCount = expvar.NewInt("fs.file.releases")
47+
48+
fsFileCharacteristicsRateLimits = expvar.NewInt("fs.file.characteristics.rate.limits")
4649
)
4750

4851
type fsProcessor struct {
@@ -65,6 +68,8 @@ type fsProcessor struct {
6568
purger *time.Ticker
6669

6770
quit chan struct{}
71+
// lim throttles the parsing of image characteristics
72+
lim *rate.Limiter
6873
}
6974

7075
// FileInfo stores file information obtained from event state.
@@ -91,6 +96,7 @@ func newFsProcessor(
9196
buckets: make(map[uint64][]*kevent.Kevent),
9297
purger: time.NewTicker(time.Second * 5),
9398
quit: make(chan struct{}, 1),
99+
lim: rate.NewLimiter(30, 40), // allow 30 parse ops per second or bursts of 40 ops
94100
}
95101

96102
go f.purge()
@@ -239,10 +245,19 @@ func (f *fsProcessor) processEvent(e *kevent.Kevent) (*kevent.Kevent, error) {
239245

240246
// parse PE data for created files and append parameters
241247
if ev.IsCreateDisposition() && ev.IsSuccess() {
242-
err := parseImageFileCharacteristics(ev)
248+
if !f.lim.Allow() {
249+
fsFileCharacteristicsRateLimits.Add(1)
250+
return ev, nil
251+
}
252+
path := ev.GetParamAsString(kparams.FilePath)
253+
c, err := parseImageFileCharacteristics(path)
243254
if err != nil {
244255
return ev, nil
245256
}
257+
e.AppendParam(kparams.FileIsDLL, kparams.Bool, c.isDLL)
258+
e.AppendParam(kparams.FileIsDriver, kparams.Bool, c.isDriver)
259+
e.AppendParam(kparams.FileIsExecutable, kparams.Bool, c.isExe)
260+
e.AppendParam(kparams.FileIsDotnet, kparams.Bool, c.isDotnet)
246261
}
247262

248263
return ev, nil

internal/etw/processors/image_windows.go

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,71 @@
1919
package processors
2020

2121
import (
22+
"expvar"
2223
"github.com/rabbitstack/fibratus/pkg/kevent"
2324
"github.com/rabbitstack/fibratus/pkg/kevent/kparams"
2425
"github.com/rabbitstack/fibratus/pkg/ps"
26+
"sync"
27+
"time"
2528
)
2629

30+
var imageFileCharacteristicsCacheHits = expvar.NewInt("image.file.characteristics.cache.hits")
31+
32+
var modTTL = time.Minute * 10
33+
2734
type imageProcessor struct {
28-
psnap ps.Snapshotter
35+
psnap ps.Snapshotter
36+
mods map[string]*imageFileCharacteristics
37+
mu sync.Mutex
38+
purger *time.Ticker
39+
quit chan struct{}
2940
}
3041

3142
func newImageProcessor(psnap ps.Snapshotter) Processor {
32-
return &imageProcessor{psnap: psnap}
43+
m := &imageProcessor{
44+
psnap: psnap,
45+
mods: make(map[string]*imageFileCharacteristics),
46+
purger: time.NewTicker(time.Minute),
47+
quit: make(chan struct{}, 1),
48+
}
49+
50+
go m.purge()
51+
52+
return m
3353
}
3454

35-
func (imageProcessor) Name() ProcessorType { return Image }
55+
func (*imageProcessor) Name() ProcessorType { return Image }
3656

3757
func (m *imageProcessor) ProcessEvent(e *kevent.Kevent) (*kevent.Kevent, bool, error) {
3858
if e.IsLoadImage() {
39-
// parse PE image data
40-
err := parseImageFileCharacteristics(e)
41-
if err != nil {
42-
return e, false, m.psnap.AddModule(e)
59+
// is image characteristics data cached?
60+
path := e.GetParamAsString(kparams.ImagePath)
61+
key := path + e.GetParamAsString(kparams.ImageCheckSum)
62+
63+
m.mu.Lock()
64+
defer m.mu.Unlock()
65+
c, ok := m.mods[key]
66+
if !ok {
67+
// parse PE image data
68+
var err error
69+
c, err = parseImageFileCharacteristics(path)
70+
if err != nil {
71+
return e, false, m.psnap.AddModule(e)
72+
}
73+
c.keepalive()
74+
m.mods[key] = c
75+
} else {
76+
imageFileCharacteristicsCacheHits.Add(1)
77+
c.keepalive()
4378
}
79+
80+
// append event parameters
81+
e.AppendParam(kparams.FileIsDLL, kparams.Bool, c.isDLL)
82+
e.AppendParam(kparams.FileIsDriver, kparams.Bool, c.isDriver)
83+
e.AppendParam(kparams.FileIsExecutable, kparams.Bool, c.isExe)
84+
e.AppendParam(kparams.FileIsDotnet, kparams.Bool, c.isDotnet)
4485
}
86+
4587
if e.IsUnloadImage() {
4688
pid := e.Kparams.MustGetPid()
4789
addr := e.Kparams.TryGetAddress(kparams.ImageBase)
@@ -50,10 +92,30 @@ func (m *imageProcessor) ProcessEvent(e *kevent.Kevent) (*kevent.Kevent, bool, e
5092
}
5193
return e, false, m.psnap.RemoveModule(pid, addr)
5294
}
95+
5396
if e.IsLoadImage() || e.IsImageRundown() {
5497
return e, false, m.psnap.AddModule(e)
5598
}
5699
return e, true, nil
57100
}
58101

59-
func (imageProcessor) Close() {}
102+
func (m *imageProcessor) Close() {
103+
m.quit <- struct{}{}
104+
}
105+
106+
func (m *imageProcessor) purge() {
107+
for {
108+
select {
109+
case <-m.purger.C:
110+
m.mu.Lock()
111+
for key, mod := range m.mods {
112+
if time.Since(mod.accessed) > modTTL {
113+
delete(m.mods, key)
114+
}
115+
}
116+
m.mu.Unlock()
117+
case <-m.quit:
118+
return
119+
}
120+
}
121+
}

internal/etw/processors/processor.go

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ package processors
2121
import (
2222
libntfs "github.com/rabbitstack/fibratus/pkg/fs/ntfs"
2323
"github.com/rabbitstack/fibratus/pkg/kevent"
24-
"github.com/rabbitstack/fibratus/pkg/kevent/kparams"
2524
"github.com/rabbitstack/fibratus/pkg/pe"
2625
"os"
26+
"time"
2727
)
2828

2929
// ProcessorType is an alias for the event processor type
@@ -83,6 +83,18 @@ func (typ ProcessorType) String() string {
8383
}
8484
}
8585

86+
type imageFileCharacteristics struct {
87+
isExe bool
88+
isDLL bool
89+
isDriver bool
90+
isDotnet bool
91+
accessed time.Time
92+
}
93+
94+
func (c *imageFileCharacteristics) keepalive() {
95+
c.accessed = time.Now()
96+
}
97+
8698
// parseImageFileCharacteristics parses the PE structure for the file path
8799
// residing in the given event parameters. The preferred method for reading
88100
// the PE metadata is by directly accessing the file.
@@ -91,40 +103,42 @@ func (typ ProcessorType) String() string {
91103
// The given event is decorated with various parameters extracted from PE
92104
// data. Most notably, parameters that indicate whether the file is a DLL,
93105
// executable image, or a Windows driver.
94-
func parseImageFileCharacteristics(e *kevent.Kevent) error {
106+
func parseImageFileCharacteristics(path string) (*imageFileCharacteristics, error) {
95107
var pefile *pe.PE
96-
filename := e.GetParamAsString(kparams.FilePath)
97-
f, err := os.Open(filename)
108+
109+
f, err := os.Open(path)
98110
if err != nil {
99111
// read file data blob from raw device
100112
// if the regular file access fails
101113
ntfs := libntfs.NewFS()
102-
data, n, err := ntfs.Read(filename, 0, int64(os.Getpagesize()))
114+
data, n, err := ntfs.Read(path, 0, int64(os.Getpagesize()))
103115
defer ntfs.Close()
104116
if err != nil {
105-
return err
117+
return nil, err
106118
}
107119
if n > 0 {
108120
data = data[:n]
109121
}
110122
// parse PE file from byte slice
111-
pefile, err = pe.ParseBytes(data, pe.WithSections(), pe.WithSymbols())
123+
pefile, err = pe.ParseBytes(data, pe.WithSections(), pe.WithSymbols(), pe.WithCLR())
112124
if err != nil {
113-
return err
125+
return nil, err
114126
}
115127
} else {
116128
defer f.Close()
117129
// parse PE file from on-disk file
118-
pefile, err = pe.ParseFile(filename, pe.WithSections(), pe.WithSymbols())
130+
pefile, err = pe.ParseFile(path, pe.WithSections(), pe.WithSymbols(), pe.WithCLR())
119131
if err != nil {
120-
return err
132+
return nil, err
121133
}
122134
}
123135

124-
// append parameters
125-
e.AppendParam(kparams.FileIsDLL, kparams.Bool, pefile.IsDLL)
126-
e.AppendParam(kparams.FileIsDriver, kparams.Bool, pefile.IsDriver)
127-
e.AppendParam(kparams.FileIsExecutable, kparams.Bool, pefile.IsExecutable)
136+
c := &imageFileCharacteristics{
137+
isExe: pefile.IsExecutable,
138+
isDLL: pefile.IsDLL,
139+
isDriver: pefile.IsDriver,
140+
isDotnet: pefile.IsDotnet,
141+
}
128142

129-
return nil
143+
return c, nil
130144
}

pkg/filter/accessor_windows.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -908,11 +908,7 @@ func (i *imageAccessor) Get(f Field, kevt *kevent.Kevent) (kparams.Value, error)
908908
case fields.ImageIsExecutable:
909909
return kevt.Kparams.GetBool(kparams.FileIsExecutable)
910910
case fields.ImageIsDotnet:
911-
p, err := pe.ParseFile(kevt.GetParamAsString(kparams.ImagePath), pe.WithCLR())
912-
if err != nil {
913-
return nil, err
914-
}
915-
return p.IsDotnet, nil
911+
return kevt.Kparams.GetBool(kparams.FileIsDotnet)
916912
}
917913

918914
return nil, nil

pkg/kevent/kparams/fields_windows.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ const (
125125
FileIsDriver = "is_driver"
126126
// FileIsExecutable is the parameter that indicates if the file is an executable
127127
FileIsExecutable = "is_exec"
128+
// FileIsDotnet is the parameter that indicates if the file is a .NET assembly
129+
FileIsDotnet = "is_dotnet"
128130

129131
// FileViewBase is the parameter that represents the base address of the mapped section.
130132
FileViewBase = "view_base"

0 commit comments

Comments
 (0)