Skip to content

Commit ebd8894

Browse files
apanginkrk
andauthored
async-profiler#1125: Use dlopen instead of dl_iterate_phdr for parsing libraries (async-profiler#1220)
Co-authored-by: Kerem Kat <keremkat@gmail.com>
1 parent 2dba71f commit ebd8894

File tree

4 files changed

+102
-59
lines changed

4 files changed

+102
-59
lines changed

src/hooks.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ void Hooks::patchLibraries() {
182182

183183
while (_patched_libs < native_lib_count) {
184184
CodeCache* cc = (*native_libs)[_patched_libs++];
185-
cc->patchImport(im_dlopen, (void*)dlopen_hook);
185+
if (!cc->contains((const void*)Hooks::init)) {
186+
// Let libasyncProfiler always use original dlopen
187+
cc->patchImport(im_dlopen, (void*)dlopen_hook);
188+
}
186189
cc->patchImport(im_pthread_create, (void*)pthread_create_hook);
187190
cc->patchImport(im_pthread_exit, (void*)pthread_exit_hook);
188191
}

src/symbols.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class Symbols {
1414
private:
1515
static Mutex _parse_lock;
1616
static bool _have_kernel_symbols;
17+
static bool _libs_limit_reported;
1718

1819
public:
1920
static void parseKernelSymbols(CodeCache* cc);

src/symbols_linux.cpp

Lines changed: 90 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55

66
#ifdef __linux__
77

8-
#include <set>
8+
#include <unordered_map>
9+
#include <unordered_set>
910
#include <stdio.h>
1011
#include <stdlib.h>
1112
#include <string.h>
1213
#include <sys/types.h>
1314
#include <sys/stat.h>
1415
#include <sys/mman.h>
16+
#include <dlfcn.h>
1517
#include <elf.h>
1618
#include <errno.h>
1719
#include <unistd.h>
@@ -119,6 +121,13 @@ class MemoryMapDesc {
119121
}
120122
};
121123

124+
struct SharedLibrary {
125+
char* file;
126+
const char* map_start;
127+
const char* map_end;
128+
const char* image_base;
129+
};
130+
122131

123132
#ifdef __LP64__
124133
const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64;
@@ -642,8 +651,8 @@ void ElfParser::addRelocationSymbols(ElfSection* reltab, const char* plt) {
642651

643652
Mutex Symbols::_parse_lock;
644653
bool Symbols::_have_kernel_symbols = false;
645-
static std::set<const void*> _parsed_libraries;
646-
static std::set<u64> _parsed_inodes;
654+
bool Symbols::_libs_limit_reported = false;
655+
static std::unordered_set<u64> _parsed_inodes;
647656

648657
void Symbols::parseKernelSymbols(CodeCache* cc) {
649658
int fd;
@@ -690,91 +699,67 @@ void Symbols::parseKernelSymbols(CodeCache* cc) {
690699
fclose(f);
691700
}
692701

693-
static int parseLibrariesCallback(struct dl_phdr_info* info, size_t size, void* data) {
702+
static void collectSharedLibraries(std::unordered_map<u64, SharedLibrary>& libs, int max_count) {
694703
FILE* f = fopen("/proc/self/maps", "r");
695704
if (f == NULL) {
696-
return 1;
705+
return;
697706
}
698707

699-
CodeCacheArray* array = (CodeCacheArray*)data;
700-
CodeCache* cc = NULL;
701708
const char* image_base = NULL;
702709
u64 last_inode = 0;
703-
u64 cc_inode = 0;
704710
char* str = NULL;
705711
size_t str_size = 0;
706712
ssize_t len;
707713

708-
while ((len = getline(&str, &str_size, f)) > 0) {
714+
while (max_count > 0 && (len = getline(&str, &str_size, f)) > 0) {
709715
str[len - 1] = 0;
710716

711717
MemoryMapDesc map(str);
712718
if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) {
713719
continue;
714720
}
715721

716-
const char* map_start = map.addr();
717-
unsigned long map_offs = map.offs();
718722
u64 inode = u64(map.dev()) << 32 | map.inode();
719-
720-
if (map_offs == 0 && inode != last_inode) {
721-
image_base = map_start;
722-
last_inode = inode;
723+
if (_parsed_inodes.find(inode) != _parsed_inodes.end()) {
724+
continue; // shared object is already parsed
723725
}
724-
725-
if (!map.isExecutable() || !_parsed_libraries.insert(map_start).second) {
726-
// Not an executable segment or it has been already parsed
727-
continue;
726+
if (inode == 0 && strcmp(map.file(), "[vdso]") != 0) {
727+
continue; // all shared libraries have inode, except vDSO
728728
}
729729

730+
const char* map_start = map.addr();
730731
const char* map_end = map.end();
731-
if (inode != 0 && !_parsed_inodes.insert(inode).second) {
732-
// Do not parse the same executable twice
733-
if (inode == cc_inode) {
734-
cc->updateBounds(map_start, map_end);
735-
}
736-
continue;
737-
}
738-
739-
int count = array->count();
740-
if (count >= MAX_NATIVE_LIBS) {
741-
break;
732+
if (inode != last_inode && map.offs() == 0) {
733+
image_base = map_start;
734+
last_inode = inode;
742735
}
743736

744-
cc = new CodeCache(map.file(), count, false, map_start, map_end);
745-
cc_inode = inode;
746-
747-
if (strchr(map.file(), ':') != NULL) {
748-
// Do not try to parse pseudofiles like anon_inode:name, /memfd:name
749-
} else if (inode != 0) {
750-
if (inode == last_inode) {
751-
// If last_inode is set, image_base is known to be valid and readable
752-
ElfParser::parseFile(cc, image_base, map.file(), true);
753-
// Parse program headers after the file to ensure debug symbols are parsed first
754-
ElfParser::parseProgramHeaders(cc, image_base, map_end, OS::isMusl());
737+
if (map.isExecutable()) {
738+
SharedLibrary& lib = libs[inode];
739+
if (lib.file == nullptr) {
740+
lib.file = strdup(map.file());
741+
lib.map_start = map_start;
742+
lib.map_end = map_end;
743+
lib.image_base = inode == last_inode ? image_base : NULL;
744+
max_count--;
755745
} else {
756-
// Unlikely case when image_base has not been found.
757-
// Be careful: executable file is not always ELF, e.g. classes.jsa
758-
ElfParser::parseFile(cc, map_start, map.file(), true);
746+
// The same library may have multiple executable segments mapped
747+
lib.map_end = map_end;
759748
}
760-
} else if (strcmp(map.file(), "[vdso]") == 0) {
761-
ElfParser::parseProgramHeaders(cc, map_start, map_end, true);
762749
}
763-
764-
cc->sort();
765-
applyPatch(cc);
766-
array->add(cc);
767750
}
768751

769752
free(str);
770753
fclose(f);
771-
772-
return 1;
773754
}
774755

775756
void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) {
776757
MutexLocker ml(_parse_lock);
777758

759+
if (array->count() >= MAX_NATIVE_LIBS) {
760+
return;
761+
}
762+
778763
if (kernel_symbols && !haveKernelSymbols()) {
779764
CodeCache* cc = new CodeCache("[kernel]");
780765
parseKernelSymbols(cc);
@@ -787,10 +772,59 @@ void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) {
787772
}
788773
}
789774

790-
// In glibc, dl_iterate_phdr() holds dl_load_write_lock, therefore preventing
791-
// concurrent loading and unloading of shared libraries.
792-
// Without it, we may access memory of a library that is being unloaded.
793-
dl_iterate_phdr(parseLibrariesCallback, array);
775+
std::unordered_map<u64, SharedLibrary> libs;
776+
collectSharedLibraries(libs, MAX_NATIVE_LIBS - array->count());
777+
778+
for (auto& it : libs) {
779+
u64 inode = it.first;
780+
_parsed_inodes.insert(inode);
781+
782+
SharedLibrary& lib = it.second;
783+
CodeCache* cc = new CodeCache(lib.file, array->count(), false, lib.map_start, lib.map_end);
784+
785+
// Strip " (deleted)" suffix so that removed library can be reopened
786+
size_t len = strlen(lib.file);
787+
if (len > 10 && strcmp(lib.file + len - 10, " (deleted)") == 0) {
788+
lib.file[len - 10] = 0;
789+
}
790+
791+
if (strchr(lib.file, ':') != NULL) {
792+
// Do not try to parse pseudofiles like anon_inode:name, /memfd:name
793+
} else if (strcmp(lib.file, "[vdso]") == 0) {
794+
ElfParser::parseProgramHeaders(cc, lib.map_start, lib.map_end, true);
795+
} else if (lib.image_base == NULL) {
796+
// Unlikely case when image base has not been found: not safe to access program headers.
797+
// Be careful: executable file is not always ELF, e.g. classes.jsa
798+
ElfParser::parseFile(cc, lib.map_start, lib.file, true);
799+
} else {
800+
// Parse debug symbols first
801+
ElfParser::parseFile(cc, lib.image_base, lib.file, true);
802+
803+
dlerror(); // reset any error from previous dl function calls
804+
805+
// Protect library from unloading while parsing in-memory ELF program headers.
806+
// Also, dlopen() ensures the library is fully loaded.
807+
// Main executable and ld-linux interpreter cannot be dlopen'ed, but dlerror() returns NULL for them.
808+
void* handle = dlopen(lib.file, RTLD_LAZY | RTLD_NOLOAD);
809+
if (handle != NULL || dlerror() == NULL) {
810+
ElfParser::parseProgramHeaders(cc, lib.image_base, lib.map_end, OS::isMusl());
811+
if (handle != NULL) {
812+
dlclose(handle);
813+
}
814+
}
815+
}
816+
817+
free(lib.file);
818+
819+
cc->sort();
820+
applyPatch(cc);
821+
array->add(cc);
822+
}
823+
824+
if (array->count() >= MAX_NATIVE_LIBS && !_libs_limit_reported) {
825+
Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS);
826+
_libs_limit_reported = true;
827+
}
794828
}
795829

796830
#endif // __linux__

src/symbols_macos.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#ifdef __APPLE__
77

8-
#include <set>
8+
#include <unordered_set>
99
#include <dlfcn.h>
1010
#include <string.h>
1111
#include <mach-o/dyld.h>
@@ -126,7 +126,8 @@ class MachOParser {
126126

127127
Mutex Symbols::_parse_lock;
128128
bool Symbols::_have_kernel_symbols = false;
129-
static std::set<const void*> _parsed_libraries;
129+
bool Symbols::_libs_limit_reported = false;
130+
static std::unordered_set<const void*> _parsed_libraries;
130131

131132
void Symbols::parseKernelSymbols(CodeCache* cc) {
132133
}
@@ -143,6 +144,10 @@ void Symbols::parseLibraries(CodeCacheArray* array, bool kernel_symbols) {
143144

144145
int count = array->count();
145146
if (count >= MAX_NATIVE_LIBS) {
147+
if (!_libs_limit_reported) {
148+
Log::warn("Number of parsed libraries reached the limit of %d", MAX_NATIVE_LIBS);
149+
_libs_limit_reported = true;
150+
}
146151
break;
147152
}
148153

0 commit comments

Comments
 (0)