Skip to content

Commit 4923c7c

Browse files
kpdevTamarChristinaArm
authored andcommitted
Use segment's off and vaddr fields instead of IsSharedObject heuristic
This patch unifies the handling of ET_EXEC and ET_DYN ELF files by clarifying the difference between "offset" of function's code in ELF file, its "virtual address" that is recorded in the file (according to ELF specification, it is the "symbol value" written to st_value field in a symbol table entry in case of executable and shared object ELF files) and the actual address of function in the address space of a process after dynamic relocations took place. Please note that file offset and virtual address are usually the same in ET_DYN files (such as shared objects and position-independent executables) but it is not required and this assumption is sometimes violated in real-life scenarios. Reviewed By: tnfchris Differential Revision: https://reviews.llvm.org/D144852 ~~ Huawei RRI, OS Lab Change-Id: I2af9fd5428ef766d5540d3ee68d6400631b78cd3
1 parent b408072 commit 4923c7c

20 files changed

+2555
-63
lines changed

lnt/testing/profile/cPerf.cpp

Lines changed: 52 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -178,30 +178,6 @@ void Assert(bool Expr, const char *ExprStr, const char *File, int Line) {
178178
throw std::logic_error(Str);
179179
}
180180

181-
// Returns true if the ELF file given by filename
182-
// is a shared object (DYN).
183-
bool IsSharedObject(const std::string &Fname) {
184-
// We replicate the first part of an ELF header here
185-
// so as not to rely on <elf.h>.
186-
struct PartialElfHeader {
187-
unsigned char e_ident[16];
188-
uint16_t e_type;
189-
};
190-
const int ET_DYN = 3;
191-
192-
FILE *stream = fopen(Fname.c_str(), "r");
193-
if (stream == NULL)
194-
return false;
195-
196-
PartialElfHeader H;
197-
auto NumRead = fread(&H, 1, sizeof(H), stream);
198-
assert(NumRead == sizeof(H));
199-
200-
fclose(stream);
201-
202-
return H.e_type == ET_DYN;
203-
}
204-
205181
//===----------------------------------------------------------------------===//
206182
// Perf structures. Taken from https://lwn.net/Articles/644919/
207183
//===----------------------------------------------------------------------===//
@@ -360,9 +336,20 @@ static const char* sw_event_names[PERF_COUNT_SW_MAX] = {
360336
//===----------------------------------------------------------------------===//
361337

362338
struct Map {
363-
uint64_t Start, End, Adjust;
364-
bool isSO;
339+
Map(uint64_t Start, uint64_t End, const char *Filename)
340+
: Start(Start), End(End), Filename(Filename) {}
341+
342+
uint64_t Start, End;
365343
const char *Filename;
344+
345+
// Mapping-related adjustments. Here FileOffset(func) is the offset of func
346+
// in the ELF file, VAddr(func) is the virtual address associated with this
347+
// symbol (in case of executable and shared object ELF files, st_value field
348+
// of a symbol table's entry is symbol's virtual address) and &func is the
349+
// actual memory address after relocations took place in the address space of
350+
// the process being profiled.
351+
uint64_t FileToPCOffset; // FileOffset(func) + FileToPCOffset == &func
352+
uint64_t VAddrToFileOffset; // VAddr(func) + VAddrToFileOffset == FileOffset(func)
366353
};
367354

368355
struct EventDesc {
@@ -389,7 +376,7 @@ class SymTabOutput : public std::vector<Symbol> {
389376
SymTabOutput(std::string Objdump, std::string BinaryCacheRoot)
390377
: Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) {}
391378

392-
uint64_t fetchExecSegment(Map *M) {
379+
void fetchExecSegment(Map *M, uint64_t *FileOffset, uint64_t *VAddr) {
393380
std::string Cmd = Objdump + " -p -C " +
394381
BinaryCacheRoot + std::string(M->Filename) +
395382
#ifdef _WIN32
@@ -401,7 +388,7 @@ class SymTabOutput : public std::vector<Symbol> {
401388

402389
char *Line = nullptr, *PrevLine = nullptr;
403390
size_t LineLen = 0;
404-
uint64_t offset = 0;
391+
*FileOffset = *VAddr = 0;
405392
while (true) {
406393
if (PrevLine)
407394
free (PrevLine);
@@ -411,17 +398,22 @@ class SymTabOutput : public std::vector<Symbol> {
411398
if (Len == -1)
412399
break;
413400

414-
char* pos;
415-
if ((pos = strstr (Line, "flags r-x")) == NULL
416-
&& (pos = strstr (Line, "flags rwx")) == NULL)
401+
if (!strstr(Line, "flags r-x") && !strstr(Line, "flags rwx"))
417402
continue;
418403

419404
/* Format is weird.. but we did find the section so punt. */
420-
if ((pos = strstr (PrevLine, "vaddr ")) == NULL)
405+
const char *OFFSET_LABEL = "off ";
406+
const char *VADDR_LABEL = "vaddr ";
407+
char *pos_offset = strstr(PrevLine, OFFSET_LABEL);
408+
char *pos_vaddr = strstr(PrevLine, VADDR_LABEL);
409+
if (!pos_offset || !pos_vaddr)
421410
break;
422411

423-
pos += 6;
424-
offset = strtoull (pos, NULL, 16);
412+
pos_offset += strlen(OFFSET_LABEL);
413+
pos_vaddr += strlen(VADDR_LABEL);
414+
*FileOffset = strtoull(pos_offset, NULL, 16);
415+
*VAddr = strtoull(pos_vaddr, NULL, 16);
416+
425417
break;
426418
}
427419
if (Line)
@@ -435,7 +427,6 @@ class SymTabOutput : public std::vector<Symbol> {
435427
fclose(Stream);
436428
wait(NULL);
437429
#endif
438-
return offset;
439430
}
440431

441432
void fetchSymbols(Map *M) {
@@ -528,16 +519,14 @@ class SymTabOutput : public std::vector<Symbol> {
528519

529520
void reset(Map *M) {
530521
clear();
522+
523+
// Take possible difference between "offset" and "virtual address" of
524+
// the executable segment into account.
525+
uint64_t FileOffset, VAddr;
526+
fetchExecSegment(M, &FileOffset, &VAddr);
527+
M->VAddrToFileOffset = FileOffset - VAddr;
528+
531529
// Fetch both dynamic and static symbols, sort and unique them.
532-
/* If we're a relocatable object then take the actual start of the text
533-
segment into account. */
534-
if (M->isSO)
535-
{
536-
uint64_t segmentStart = fetchExecSegment (M);
537-
/* Adjust the symbol to a value relative to the start of the load address
538-
to match up with registerNewMapping. */
539-
M->Adjust -= segmentStart;
540-
}
541530
fetchSymbols(M);
542531

543532
std::sort(begin(), end());
@@ -671,8 +660,7 @@ class PerfReader {
671660
void emitSymbol(
672661
Symbol &Sym, Map &M,
673662
std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event,
674-
std::map<const char *, uint64_t> &SymEvents,
675-
uint64_t Adjust);
663+
std::map<const char *, uint64_t> &SymEvents);
676664
PyObject *complete();
677665

678666
private:
@@ -852,13 +840,11 @@ static uint64_t getTimeFromSampleId(unsigned char *EndOfStruct,
852840
void PerfReader::registerNewMapping(unsigned char *Buf, const char *Filename) {
853841
perf_event_mmap_common *E = (perf_event_mmap_common *)Buf;
854842
auto MapID = Maps.size();
855-
// EXEC ELF objects aren't relocated. DYN ones are,
856-
// so if it's a DYN object adjust by subtracting the
857-
// map base.
858-
bool IsSO = IsSharedObject(BinaryCacheRoot + std::string(Filename));
843+
859844
uint64_t End = E->start + E->extent;
860-
uint64_t Adjust = IsSO ? E->start - E->pgoff : 0;
861-
Maps.push_back({E->start, End, Adjust, IsSO, Filename});
845+
Map NewMapping(E->start, End, Filename);
846+
NewMapping.FileToPCOffset = E->start - E->pgoff;
847+
Maps.push_back(NewMapping);
862848

863849
unsigned char *EndOfEvent = Buf + E->header.size;
864850
// FIXME: The first EventID is used for every event.
@@ -1026,24 +1012,25 @@ void PerfReader::emitMaps() {
10261012
if (AllUnderThreshold)
10271013
continue;
10281014

1015+
Map &M = Maps[MapID];
10291016
SymTabOutput Syms(Objdump, BinaryCacheRoot);
1030-
Syms.reset(&Maps[MapID]);
1017+
Syms.reset(&M);
10311018

1032-
uint64_t Adjust = Maps[MapID].Adjust;
1019+
uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset;
10331020

10341021
// Accumulate the event totals for each symbol
10351022
auto Sym = Syms.begin();
10361023
auto Event = MapEvents.begin();
10371024
std::map<uint64_t, std::map<const char*, uint64_t>> SymToEventTotals;
10381025
while (Event != MapEvents.end() && Sym != Syms.end()) {
10391026
// Skip events until we find one after the start of Sym
1040-
auto PC = Event->first - Adjust;
1041-
if (PC < Sym->Start) {
1027+
auto VAddr = Event->first - VAddrToPCOffset;
1028+
if (VAddr < Sym->Start) {
10421029
++Event;
10431030
continue;
10441031
}
10451032
// Skip symbols until the event is before the end of Sym
1046-
if (PC >= Sym->End) {
1033+
if (VAddr >= Sym->End) {
10471034
++Sym;
10481035
continue;
10491036
}
@@ -1063,26 +1050,28 @@ void PerfReader::emitMaps() {
10631050
}
10641051
}
10651052
if (Keep)
1066-
emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start),
1067-
SymToEventTotals[Sym.Start], Adjust);
1053+
emitSymbol(Sym, M, MapEvents.lower_bound(Sym.Start + VAddrToPCOffset),
1054+
SymToEventTotals[Sym.Start]);
10681055
}
10691056
}
10701057
}
10711058

10721059
void PerfReader::emitSymbol(
10731060
Symbol &Sym, Map &M,
10741061
std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event,
1075-
std::map<const char *, uint64_t> &SymEvents,
1076-
uint64_t Adjust) {
1062+
std::map<const char *, uint64_t> &SymEvents) {
1063+
uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset;
10771064
ObjdumpOutput Dump(Objdump, BinaryCacheRoot);
10781065
Dump.reset(&M, Sym.Start, Sym.End);
10791066

10801067
emitFunctionStart(Sym.Name);
1068+
assert(Sym.Start <= Event->first - VAddrToPCOffset &&
1069+
Event->first - VAddrToPCOffset < Sym.End);
10811070
for (uint64_t I = Dump.next(); I < Sym.End; I = Dump.next()) {
1082-
auto PC = Event->first - Adjust;
1071+
auto VAddr = Event->first - VAddrToPCOffset;
10831072

10841073
auto Text = Dump.getText();
1085-
if (PC == I) {
1074+
if (VAddr == I) {
10861075
emitLine(I, &Event->second, Text);
10871076
++Event;
10881077
} else {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <stdlib.h>
2+
3+
volatile unsigned n = 0;
4+
5+
__attribute__((noinline))
6+
__attribute__((section(".text.correct")))
7+
__attribute__((aligned(0x1000)))
8+
void correct(long count) {
9+
for (long i = 0; i < count; ++i) {
10+
n += 1;
11+
}
12+
}
13+
14+
int main(int argc, const char *argv[]) {
15+
correct(atol(argv[1]));
16+
return 0;
17+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
SECTIONS {
2+
.text (. + 0x1000) : {
3+
*(.text)
4+
*(.text.correct)
5+
}
6+
} INSERT BEFORE .init;
7+
/* .init is the first section placed to the executable segment in a binary
8+
* produced by clang at the time of writing
9+
*/
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/bin/bash
2+
3+
# While it is quite common for ET_DYN ELF files to have virtual addresses equal
4+
# to file offsets, these are different entities. For example, the code segment
5+
# is sometimes shifted by one page or so.
6+
#
7+
# This script prepares an executable file with code contained in a section
8+
# that has VirtAddr == FileOffset + 0x1000.
9+
#
10+
# In addition, this script also creates two regular executables:
11+
# a position-independent executable and a static one to check the handling of
12+
# the more traditional layout of ELF segments for ET_DYN and ET_EXEC binaries.
13+
#
14+
# A few simple checks are performed to make sure the heuristics used to create
15+
# the required segment layouts still work.
16+
17+
cd "$(dirname $0)"
18+
19+
save_objdump_output() {
20+
local path_to_elf="$1"
21+
local addr_correct="$2"
22+
23+
local basename="$(basename "$path_to_elf")"
24+
25+
llvm-objdump "$path_to_elf" -t > "../${basename}.objdump.out"
26+
llvm-objdump "$path_to_elf" -p > "../${basename}.objdump.p.out"
27+
llvm-objdump "$path_to_elf" -j .text --disassemble-symbols=correct > "../${basename}.objdump.${addr_correct}.out"
28+
}
29+
30+
record_perf_data() {
31+
local path_to_elf="$1"
32+
local basename="$(basename "$path_to_elf")"
33+
local path_to_perf_data="../${basename}.perf_data"
34+
local num_of_iterations=100000000
35+
36+
rm -f "$path_to_perf_data"
37+
perf record -e cpu-clock -o "$path_to_perf_data" "$path_to_elf" $num_of_iterations
38+
39+
# It is probably not a good idea to put very large *.perf_data files to git
40+
size_in_bytes=$(stat --format='%s' "$path_to_perf_data")
41+
if [ $size_in_bytes -gt 50000 ]; then
42+
echo "perf produced too large output file ${path_to_perf_data}, try decreasing"
43+
echo "the number of iterations or passing -F option to 'perf record'."
44+
exit 1
45+
fi
46+
}
47+
48+
save_test_case() {
49+
local path_to_elf="$1"
50+
local addr_correct="$2"
51+
52+
record_perf_data "$path_to_elf"
53+
save_objdump_output "$path_to_elf" $addr_correct
54+
}
55+
56+
check_file() {
57+
local file="$1"
58+
local line="$2"
59+
60+
# Use pcregrep to simplify handling of newlines (it is possible to embed \n
61+
# into the regex and not have them being matched by a dot)
62+
if ! pcregrep -M "$line" "$file"; then
63+
echo "Unexpected test case generated: file '$file' should contain '$line'"
64+
exit 1
65+
fi
66+
}
67+
68+
clang -Os -o /tmp/segments-shifted segments.c -pie -Wl,-T,segments.lds
69+
clang -Os -o /tmp/segments-dyn segments.c -pie
70+
clang -Os -o /tmp/segments-exec segments.c -static
71+
72+
save_test_case /tmp/segments-shifted 0x2000
73+
check_file ../segments-shifted.objdump.out "00002000 .* correct"
74+
# The expected objdump -p output is something like this (note off != vaddr):
75+
# LOAD off 0x0000000000000618 vaddr 0x0000000000001618 paddr 0x0000000000001618 align 2**12
76+
# filesz 0x0000000000002a3d memsz 0x0000000000002a3d flags r-x
77+
check_file ../segments-shifted.objdump.p.out "LOAD off 0x(0+)0000(...) vaddr 0x\g{1}0001\g{2} paddr.*\n.*flags r-x"
78+
79+
# Feel free to update the value of "correct" symbol in the static case if it is changed
80+
save_test_case /tmp/segments-exec 0x403000
81+
check_file ../segments-exec.objdump.out "00403000 .* correct"
82+
check_file ../segments-exec.objdump.p.out "LOAD off 0x(0+)0001000 vaddr 0x(0+)0401000 paddr.*\n.*flags r-x"
83+
84+
save_test_case /tmp/segments-dyn 0x3000
85+
check_file ../segments-dyn.objdump.out "00003000 .* correct"
86+
check_file ../segments-dyn.objdump.p.out "LOAD off 0x(0+)0001000 vaddr 0x(0+)0001000 paddr.*\n.*flags r-x"

tests/testing/Inputs/fake-objdump.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,7 @@
1313
sys.stdout.write(open(fname).read())
1414
sys.exit(0)
1515

16+
if arg.startswith('-p'):
17+
exit_with_fake_output('p.out')
18+
1619
sys.exit(1)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Fake "objdump -p" output.
2+
3+
The original test case was added when ET_EXEC and ET_DYN ELF binaries were
4+
handled differently (assuming ET_EXEC by default - if IsSharedObject() function
5+
cannot find the file).
6+
7+
This test input was added to fix the existing tests after the removal of the
8+
heuristic relying on virtual addresses being equal to file offsets for ET_DYN
9+
case and to final addresses in the process' address space for ET_EXEC case,
10+
respectively.
11+
12+
The "off" and "vaddr" fields are set to some reasonable values based on the
13+
mmap2 records from *.perf_data file.
14+
15+
LOAD off 0x00000000 vaddr 0x00400000 paddr ...
16+
... ... ... ... flags r-x
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Fake "objdump -p" output.
2+
3+
The original test case was added when ET_EXEC and ET_DYN ELF binaries were
4+
handled differently (assuming ET_EXEC by default - if IsSharedObject() function
5+
cannot find the file).
6+
7+
This test input was added to fix the existing tests after the removal of the
8+
heuristic relying on virtual addresses being equal to file offsets for ET_DYN
9+
case and to final addresses in the process' address space for ET_EXEC case,
10+
respectively.
11+
12+
The "off" and "vaddr" fields are set to some reasonable values based on the
13+
mmap2 records from *.perf_data file.
14+
15+
LOAD off 0x00000000 vaddr 0x00400000 paddr ...
16+
... ... ... ... flags r-x
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
/tmp/segments-dyn: file format elf64-x86-64
3+
4+
Disassembly of section .text:
5+
6+
0000000000003000 <correct>:
7+
3000: 48 85 ff testq %rdi, %rdi
8+
3003: 7e 0b jle 0x3010 <correct+0x10>
9+
3005: ff 05 11 30 00 00 incl 12305(%rip) # 0x601c <n>
10+
300b: 48 ff cf decq %rdi
11+
300e: 75 f5 jne 0x3005 <correct+0x5>
12+
3010: c3 retq

0 commit comments

Comments
 (0)