diff --git a/pkg/symbolize/exports.go b/pkg/symbolize/exports.go new file mode 100644 index 000000000..bd56b680c --- /dev/null +++ b/pkg/symbolize/exports.go @@ -0,0 +1,97 @@ +/* + * Copyright 2021-present by Nedim Sabic Sabic + * https://www.fibratus.io + * All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package symbolize + +import ( + "sync" + + "github.com/rabbitstack/fibratus/pkg/util/va" +) + +// ModuleExports contains exports for the specific module +// indexed by RVA (Relative Virtual Address). +type ModuleExports struct { + exps map[uint32]string +} + +// SymbolFromRVA finds the closest export address before RVA. +func (m *ModuleExports) SymbolFromRVA(rva va.Address) string { + var exp uint32 + for f := range m.exps { + if uint64(f) <= rva.Uint64() { + if exp < f { + exp = f + } + } + } + if exp != 0 { + sym, ok := m.exps[exp] + if ok && sym == "" { + return "?" + } + return sym + } + return "" +} + +// ExportsCache stores the cached module exports extracted +// from the PE export directory. +type ExportsCache struct { + sync.RWMutex + exports map[string]*ModuleExports +} + +// NewExportsCache returns a fresh instance of the exports. +func NewExportsCache() *ExportsCache { + c := &ExportsCache{exports: make(map[string]*ModuleExports)} + go c.prune() + return c +} + +// Exports returns the exports for the given module path. If +// the exports can't be find, then the module PE is parsed +// and the exports cache updated. +func (e *ExportsCache) Exports(mod string) (*ModuleExports, bool) { + e.RLock() + exports, ok := e.exports[mod] + e.RUnlock() + if ok { + return exports, true + } + pe, err := parsePeFile(mod) + if err != nil { + return nil, false + } + e.Lock() + defer e.Unlock() + exports = &ModuleExports{exps: pe.Exports} + e.exports[mod] = exports + return exports, true +} + +// Clear removes all module exports from the cache. +func (e *ExportsCache) Clear() { + e.Lock() + defer e.Unlock() + e.exports = make(map[string]*ModuleExports) +} + +func (e *ExportsCache) prune() { + +} diff --git a/pkg/symbolize/exports_test.go b/pkg/symbolize/exports_test.go new file mode 100644 index 000000000..0eb5f7352 --- /dev/null +++ b/pkg/symbolize/exports_test.go @@ -0,0 +1,91 @@ +/* + * Copyright 2021-present by Nedim Sabic Sabic + * https://www.fibratus.io + * All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package symbolize + +import ( + "testing" + + "github.com/rabbitstack/fibratus/pkg/util/va" + "github.com/stretchr/testify/assert" +) + +func TestSymbolFromRVA(t *testing.T) { + var tests = []struct { + rva va.Address + exports map[uint32]string + expectedSymbol string + }{ + {va.Address(317949), map[uint32]string{ + 9824: "SHCreateScopeItemFromShellItem", + 23248: "SHCreateScopeItemFromIDList", + 165392: "DllGetClassObject", + 186368: "SHCreateSearchIDListFromAutoList", + 238048: "DllCanUnloadNow", + 240112: "IsShellItemInSearchIndex", + 240304: "IsMSSearchEnabled", + 272336: "SHSaveBinaryAutoListToStream", + 310672: "DllMain", + 317920: "", + 320864: "", + 434000: "SHCreateAutoList", + 434016: "SHCreateAutoListWithID", + 555040: "CreateDefaultProviderResolver", + 571136: "GetGatherAdmin", + 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, + "?", + }, + {va.Address(434011), map[uint32]string{ + 9824: "SHCreateScopeItemFromShellItem", + 23248: "SHCreateScopeItemFromIDList", + 165392: "DllGetClassObject", + 186368: "SHCreateSearchIDListFromAutoList", + 238048: "DllCanUnloadNow", + 240112: "IsShellItemInSearchIndex", + 240304: "IsMSSearchEnabled", + 272336: "SHSaveBinaryAutoListToStream", + 310672: "DllMain", + 317920: "", + 320864: "", + 434000: "SHCreateAutoList", + 434016: "SHCreateAutoListWithID", + 555040: "CreateDefaultProviderResolver", + 571136: "GetGatherAdmin", + 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, + "SHCreateAutoList", + }, + {va.Address(4532), map[uint32]string{ + 9824: "SHCreateScopeItemFromShellItem", + 23248: "SHCreateScopeItemFromIDList", + 165392: "DllGetClassObject", + 186368: "SHCreateSearchIDListFromAutoList", + 238048: "DllCanUnloadNow", + 240112: "IsShellItemInSearchIndex", + 240304: "IsMSSearchEnabled", + 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, + "", + }, + } + + for _, tt := range tests { + t.Run(tt.expectedSymbol, func(t *testing.T) { + exps := &ModuleExports{exps: tt.exports} + assert.Equal(t, tt.expectedSymbol, exps.SymbolFromRVA(tt.rva)) + }) + } +} diff --git a/pkg/symbolize/symbolizer.go b/pkg/symbolize/symbolizer.go index e2146f29c..e72dab8a5 100644 --- a/pkg/symbolize/symbolizer.go +++ b/pkg/symbolize/symbolizer.go @@ -53,8 +53,6 @@ var ( // symCleanups counts the number of symbol cleanups symCleanups = expvar.NewInt("symbolizer.symbol.cleanups") - // modCleanups counts the number of module cleanups - modCleanups = expvar.NewInt("symbolizer.module.cleanups") // symCacheHits counts the number of cache hits in the symbols cache symCacheHits = expvar.NewInt("symbolizer.cache.hits") @@ -85,9 +83,8 @@ var parsePeFile = func(name string, option ...pe.Option) (*pe.PE, error) { // its handle and symbol resources are disposed var procTTL = 15 * time.Second -// modTTL maximum time for the module to remain in -// the state until all its exports are removed -var modTTL = 8 * time.Minute +// expsCache is the global cache of module exports +var expsCache = NewExportsCache() type process struct { pid uint32 @@ -102,22 +99,11 @@ func (p *process) keepalive() { } type module struct { - exports map[uint32]string - accessed time.Time + exports *ModuleExports minExportRVA, maxExportRVA uint32 hasExports bool } -type syminfo struct { - module string - symbol string - moduleAddress va.Address // base module address -} - -func (m *module) keepalive() { - m.accessed = time.Now() -} - func (m *module) isUnexported(rva va.Address) bool { if m.minExportRVA == 0 || m.maxExportRVA == 0 { return false @@ -125,13 +111,19 @@ func (m *module) isUnexported(rva va.Address) bool { return rva.Uint64() < uint64(m.minExportRVA) || rva.Uint64() > uint64(m.maxExportRVA) } +type syminfo struct { + module string + symbol string + moduleAddress va.Address // base module address +} + // Symbolizer is responsible for converting raw addresses // into symbol names and modules with the assistance of the -// symbol resolver. +// export directory or symbol resolver. type Symbolizer struct { config *config.Config procs map[uint32]*process - mods map[va.Address]*module + mods map[uint32]map[va.Address]*module mu sync.Mutex // symbols stores the mapping of stack @@ -161,7 +153,7 @@ func NewSymbolizer(r Resolver, psnap ps.Snapshotter, config *config.Config, enqu sym := &Symbolizer{ config: config, procs: make(map[uint32]*process), - mods: make(map[va.Address]*module), + mods: make(map[uint32]map[va.Address]*module), symbols: make(map[uint32]map[va.Address]syminfo), cleaner: time.NewTicker(time.Second * 2), purger: time.NewTicker(time.Minute * 5), @@ -208,11 +200,9 @@ func (s *Symbolizer) ProcessEvent(e *event.Event) (bool, error) { pid := e.Params.MustGetPid() s.mu.Lock() defer s.mu.Unlock() - if _, ok := s.symbols[pid]; !ok { - return true, nil - } - symCachedSymbols.Add(-int64(len(s.symbols[pid]))) + delete(s.mods, pid) delete(s.symbols, pid) + symCachedSymbols.Add(-int64(len(s.symbols[pid]))) proc, ok := s.procs[pid] if !ok { return true, nil @@ -240,9 +230,7 @@ func (s *Symbolizer) ProcessEvent(e *event.Event) (bool, error) { } } - // remove module if it has been unmapped from - // all process VAS. If the new module is loaded - // populate its export directory entries + // remove module if it has been unmapped from the process VAS err := s.syncModules(e) if err != nil { log.Error(err) @@ -263,48 +251,35 @@ func (s *Symbolizer) ProcessEvent(e *event.Event) (bool, error) { } // syncModules reconciles the state of loaded modules. -// When the module is unloaded from all processes in -// the snapshost state, its exports map is pruned. If -// the new module is loaded and not already present in -// the map, we parse its export directory and insert +// When the module is unloaded from the process address +// space, its information is pruned from the cache. +// If the new module is loaded and not already present in +// the map, we get its export directory and insert // into the map. func (s *Symbolizer) syncModules(e *event.Event) error { - filename := e.GetParamAsString(params.ImagePath) - addr := e.Params.TryGetAddress(params.ImageBase) - s.mu.Lock() - defer s.mu.Unlock() + base := e.Params.TryGetAddress(params.ImageBase) + size := e.Params.TryGetUint64(params.ImageSize) if e.IsUnloadImage() { - ok, _ := s.psnap.FindModule(addr) - if !ok { + s.mu.Lock() + defer s.mu.Unlock() + if _, ok := s.mods[e.PID][base]; ok { symModulesCount.Add(-1) - delete(s.mods, addr) - } - // remove executable images - if strings.EqualFold(filepath.Ext(filename), ".exe") { - delete(s.mods, addr) + delete(s.mods[e.PID], base) + // prune symbol entry from the cache if + // the symbol address falls within the + // unmapped module + syms, ok := s.symbols[e.PID] + if !ok { + return nil + } + for addr := range syms { + if addr >= base && addr <= base.Inc(size) { + delete(s.symbols[e.PID], base) + } + } } - return nil - } - - if s.mods[addr] != nil { - return nil - } - px, err := parsePeFile(filename, pe.WithSections(), pe.WithExports()) - if err != nil { - return fmt.Errorf("unable to parse PE exports for module [%s]: %v", filename, err) - } - - symModulesCount.Add(1) - - m := &module{exports: px.Exports, accessed: time.Now(), hasExports: true} - exportRVAs := convert.MapKeysToSlice(m.exports) - if len(exportRVAs) > 0 { - m.minExportRVA, m.maxExportRVA = slices.Min(exportRVAs), slices.Max(exportRVAs) - } else { - m.hasExports = false } - s.mods[addr] = m return nil } @@ -485,31 +460,34 @@ func (s *Symbolizer) produceFrame(addr va.Address, e *event.Event) callstack.Fra } } } + if mod != nil { frame.Module = mod.Name frame.ModuleAddress = mod.BaseAddress - m, ok := s.mods[mod.BaseAddress] + m, ok := s.mods[pid][mod.BaseAddress] peOK := true if !ok { - // parse export directory to resolve symbols - m = &module{exports: make(map[uint32]string), accessed: time.Now(), hasExports: true} - px, err := parsePeFile(mod.Name, pe.WithSections(), pe.WithExports()) - if err != nil { - peOK = false - m.hasExports = false - } else { - m.exports = px.Exports - m.hasExports = len(m.exports) > 0 - exportRVAs := convert.MapKeysToSlice(m.exports) + var exports *ModuleExports + exports, peOK = expsCache.Exports(mod.Name) + m = &module{ + hasExports: true, + exports: &ModuleExports{exps: make(map[uint32]string)}, + } + if exports != nil { + m.exports = exports + m.hasExports = len(m.exports.exps) > 0 + exportRVAs := convert.MapKeysToSlice(m.exports.exps) if m.hasExports { m.minExportRVA, m.maxExportRVA = slices.Min(exportRVAs), slices.Max(exportRVAs) } + } else { + m.hasExports = false } symModulesCount.Add(1) - s.mods[mod.BaseAddress] = m + s.cacheModule(e.PID, mod.BaseAddress, m) } rva := addr.Dec(mod.BaseAddress.Uint64()) - frame.Symbol = symbolFromRVA(rva, m.exports) + frame.Symbol = m.exports.SymbolFromRVA(rva) // permit unknown symbols for executable modules if frame.Symbol == "" && strings.EqualFold(filepath.Ext(mod.Name), ".exe") { frame.Symbol = "?" @@ -523,9 +501,8 @@ func (s *Symbolizer) produceFrame(addr va.Address, e *event.Event) callstack.Fra if frame.Symbol == "" && (m.isUnexported(rva) || (!m.hasExports && peOK)) { frame.Symbol = "?" } - // keep to module alive from purger - m.keepalive() } + if frame.Module != "" && frame.Symbol != "" { // store resolved symbol information in cache s.cacheSymbol(pid, addr, &frame) @@ -571,33 +548,6 @@ func (s *Symbolizer) produceFrame(addr va.Address, e *event.Event) callstack.Fra return frame } -func (s *Symbolizer) cacheSymbol(pid uint32, addr va.Address, frame *callstack.Frame) { - if sym, ok := s.symbols[pid]; ok { - if _, ok := sym[addr]; !ok { - symCachedSymbols.Add(1) - s.symbols[pid][addr] = syminfo{module: frame.Module, symbol: frame.Symbol, moduleAddress: frame.ModuleAddress} - } - } else { - symCachedSymbols.Add(1) - s.symbols[pid] = map[va.Address]syminfo{addr: {module: frame.Module, symbol: frame.Symbol, moduleAddress: frame.ModuleAddress}} - } -} - -// resolveSymbolFromExportDirectory parses the module PE -// export directory and attempts to locate the closest -// symbol before the relative virtual callstack address. -func (s *Symbolizer) resolveSymbolFromExportDirectory(addr va.Address, mod *pstypes.Module) string { - if mod == nil { - return "" - } - px, err := parsePeFile(mod.Name, pe.WithSections(), pe.WithExports()) - if err != nil { - return "" - } - rva := addr.Dec(mod.BaseAddress.Uint64()) - return symbolFromRVA(rva, px.Exports) -} - // symbolizeAddress resolves the given address to a symbol. If the symbol // for this address was resolved previously, we fetch it from the cache. // On the contrary, the symbol is first consulted in the export directory. @@ -610,9 +560,15 @@ func (s *Symbolizer) symbolizeAddress(pid uint32, addr va.Address, mod *pstypes. symbol, ok := s.symbols[pid][addr] if !ok && mod != nil { // resolve symbol from the export directory - symbol.symbol = s.resolveSymbolFromExportDirectory(addr, mod) + exports, ok := expsCache.Exports(mod.Name) + if !ok { + goto fallback + } + rva := addr.Dec(mod.BaseAddress.Uint64()) + symbol.symbol = exports.SymbolFromRVA(rva) } +fallback: // try to get the symbol via Debug Help API if symbol.symbol == "" { proc, ok := s.procs[pid] @@ -658,37 +614,31 @@ func (s *Symbolizer) symbolizeAddress(pid uint32, addr va.Address, mod *pstypes. return symbol.symbol } -// symbolFromRVA finds the closest export address before RVA. -func symbolFromRVA(rva va.Address, exports map[uint32]string) string { - var exp uint32 - for f := range exports { - if uint64(f) <= rva.Uint64() { - if exp < f { - exp = f - } +func (s *Symbolizer) cacheModule(pid uint32, addr va.Address, m *module) { + if mod, ok := s.mods[pid]; ok { + if _, ok := mod[addr]; !ok { + s.mods[pid][addr] = m } + } else { + s.mods[pid] = map[va.Address]*module{addr: m} } - if exp != 0 { - sym, ok := exports[exp] - if ok && sym == "" { - return "?" +} + +func (s *Symbolizer) cacheSymbol(pid uint32, addr va.Address, frame *callstack.Frame) { + if sym, ok := s.symbols[pid]; ok { + if _, ok := sym[addr]; !ok { + symCachedSymbols.Add(1) + s.symbols[pid][addr] = syminfo{module: frame.Module, symbol: frame.Symbol, moduleAddress: frame.ModuleAddress} } - return sym + } else { + symCachedSymbols.Add(1) + s.symbols[pid] = map[va.Address]syminfo{addr: {module: frame.Module, symbol: frame.Symbol, moduleAddress: frame.ModuleAddress}} } - return "" } func (s *Symbolizer) cleanSym() { s.mu.Lock() defer s.mu.Unlock() - for addr, m := range s.mods { - if time.Since(m.accessed) > modTTL { - modCleanups.Add(1) - symModulesCount.Add(-1) - log.Debugf("removing module exports for addr [%s]", addr) - delete(s.mods, addr) - } - } for _, proc := range s.procs { if time.Since(proc.accessed) > procTTL { symCleanups.Add(1) diff --git a/pkg/symbolize/symbolizer_test.go b/pkg/symbolize/symbolizer_test.go index 163d8b4e9..1efdf5eae 100644 --- a/pkg/symbolize/symbolizer_test.go +++ b/pkg/symbolize/symbolizer_test.go @@ -21,7 +21,6 @@ package symbolize import ( "math/rand" "os" - "path/filepath" "testing" "time" @@ -192,53 +191,33 @@ func TestProcessCallstackPeExports(t *testing.T) { assert.True(t, e.Callstack.ContainsUnbacked()) // check internal state - assert.Len(t, s.mods, 3) + assert.Len(t, s.mods, 1) + assert.Len(t, s.mods[e.PID], 3) // should have populated the symbols cache assert.Len(t, s.symbols, 1) assert.Equal(t, syminfo{module: "unbacked", symbol: "?"}, s.symbols[e.PID][0x2638e59e0a5]) - // image load event should add module exports - // and when the image is unloaded and there are - // no processes with the image section mapped - // inside their VAS, we can remove the module - e2 := &event.Event{ - Type: event.LoadImage, - Tid: 2484, - PID: uint32(12328), - CPU: 1, - Seq: 2, - Name: "LoadImage", - Timestamp: time.Now(), - Category: event.Image, - Params: event.Params{ - params.ImageBase: {Name: params.ImageBase, Type: params.Address, Value: uint64(0x12345f)}, - params.FilePath: {Name: params.FilePath, Type: params.UnicodeString, Value: "C:\\Windows\\System32\\bcrypt32.dll"}, - }, - PS: proc, - } - _, err = s.ProcessEvent(e2) - require.NoError(t, err) - assert.Len(t, s.mods, 4) - e3 := &event.Event{ Type: event.UnloadImage, Tid: 2484, - PID: uint32(12328), + PID: uint32(os.Getpid()), CPU: 1, Seq: 2, Name: "UnloadImage", Timestamp: time.Now(), Category: event.Image, Params: event.Params{ - params.ImageBase: {Name: params.ImageBase, Type: params.Address, Value: uint64(0x12345f)}, - params.FilePath: {Name: params.FilePath, Type: params.UnicodeString, Value: filepath.Join(os.Getenv("SystemRoot"), "System32", "bcrypt32.dll")}, + params.ImageBase: {Name: params.ImageBase, Type: params.Address, Value: uint64(0x7ffb5d8e11c4)}, + params.FilePath: {Name: params.FilePath, Type: params.UnicodeString, Value: `C:\Windows\System32\user32.dll`}, }, PS: proc, } + + // dll is unloaded, the number of modules should decrement _, err = s.ProcessEvent(e3) require.NoError(t, err) - assert.Len(t, s.mods, 3) + assert.Len(t, s.mods[e.PID], 2) } func TestProcessCallstack(t *testing.T) { @@ -325,6 +304,7 @@ func TestProcessCallstack(t *testing.T) { } func TestSymbolizeEventParamAddress(t *testing.T) { + expsCache.Clear() r := new(MockResolver) c := &config.Config{} @@ -481,67 +461,3 @@ func TestProcessCallstackProcsTTL(t *testing.T) { r.AssertNumberOfCalls(t, "Cleanup", 1) assert.Equal(t, 0, s.procsSize()) } - -func TestSymbolFromRVA(t *testing.T) { - var tests = []struct { - rva va.Address - exports map[uint32]string - expectedSymbol string - }{ - {va.Address(317949), map[uint32]string{ - 9824: "SHCreateScopeItemFromShellItem", - 23248: "SHCreateScopeItemFromIDList", - 165392: "DllGetClassObject", - 186368: "SHCreateSearchIDListFromAutoList", - 238048: "DllCanUnloadNow", - 240112: "IsShellItemInSearchIndex", - 240304: "IsMSSearchEnabled", - 272336: "SHSaveBinaryAutoListToStream", - 310672: "DllMain", - 317920: "", - 320864: "", - 434000: "SHCreateAutoList", - 434016: "SHCreateAutoListWithID", - 555040: "CreateDefaultProviderResolver", - 571136: "GetGatherAdmin", - 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, - "?", - }, - {va.Address(434011), map[uint32]string{ - 9824: "SHCreateScopeItemFromShellItem", - 23248: "SHCreateScopeItemFromIDList", - 165392: "DllGetClassObject", - 186368: "SHCreateSearchIDListFromAutoList", - 238048: "DllCanUnloadNow", - 240112: "IsShellItemInSearchIndex", - 240304: "IsMSSearchEnabled", - 272336: "SHSaveBinaryAutoListToStream", - 310672: "DllMain", - 317920: "", - 320864: "", - 434000: "SHCreateAutoList", - 434016: "SHCreateAutoListWithID", - 555040: "CreateDefaultProviderResolver", - 571136: "GetGatherAdmin", - 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, - "SHCreateAutoList", - }, - {va.Address(4532), map[uint32]string{ - 9824: "SHCreateScopeItemFromShellItem", - 23248: "SHCreateScopeItemFromIDList", - 165392: "DllGetClassObject", - 186368: "SHCreateSearchIDListFromAutoList", - 238048: "DllCanUnloadNow", - 240112: "IsShellItemInSearchIndex", - 240304: "IsMSSearchEnabled", - 572592: "SEARCH_RemoteLocationsCscStateCache_IsRemoteLocationInCsc"}, - "", - }, - } - - for _, tt := range tests { - t.Run(tt.expectedSymbol, func(t *testing.T) { - assert.Equal(t, tt.expectedSymbol, symbolFromRVA(tt.rva, tt.exports)) - }) - } -}