Skip to content

Commit 6209b07

Browse files
wlei-llvmtstellar
authored andcommitted
[CSSPGO][llvm-profgen] Compress recursive cycles in calling context
This change compresses the context string by removing cycles due to recursive function for CS profile generation. Removing recursion cycles is a way to normalize the calling context which will be better for the sample aggregation and also make the context promoting deterministic. Specifically for implementation, we recognize adjacent repeated frames as cycles and deduplicated them through multiple round of iteration. For example: Considering a input context string stack: [“a”, “a”, “b”, “c”, “a”, “b”, “c”, “b”, “c”, “d”] For first iteration,, it removed all adjacent repeated frames of size 1: [“a”, “b”, “c”, “a”, “b”, “c”, “b”, “c”, “d”] For second iteration, it removed all adjacent repeated frames of size 2: [“a”, “b”, “c”, “a”, “b”, “c”, “d”] So in the end, we get compressed output: [“a”, “b”, “c”, “d”] Compression will be called in two place: one for sample's context key right after unwinding, one is for the eventual context string id in the ProfileGenerator. Added a switch `compress-recursion` to control the size of duplicated frames, default -1 means no size limit. Added unit tests and regression test for this. Differential Revision: https://reviews.llvm.org/D93556
1 parent 78b35e2 commit 6209b07

16 files changed

+498
-71
lines changed
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
PERF_RECORD_MMAP2 3019402/3019402: [0x400000(0x1000) @ 0 00:1d 265650677 1451231]: r-xp recursion-compression-noprobe.perfbin
2+
3+
4007e1
4+
0x4007d6/0x4007e1/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x4007c7/0x4007c0/P/-/-/0 0x400795/0x4007b0/P/-/-/0 0x40079c/0x400790/P/-/-/0 0x400801/0x400770/P/-/-/0 0x400698/0x400801/P/-/-/0 0x400673/0x400696/P/-/-/0
Binary file not shown.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
PERF_RECORD_MMAP2 3367317/3367317: [0x201000(0x1000) @ 0 00:1d 238458915 1121070]: r-xp recursion-compression-pseudoprobe.perfbin
2+
3+
2017db
4+
2017ba
5+
2017e5
6+
2017ba
7+
2017e5
8+
2017d9
9+
2017ba
10+
2017b0
11+
2017b0
12+
2017b0
13+
2017b0
14+
2017b0
15+
2017b0
16+
2017b0
17+
2017b0
18+
2017e5
19+
2017d9
20+
201847
21+
7fcb072a67c3
22+
5541f689495641d7
23+
0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017e0/0x2017a0/P/-/-/0 0x2017cd/0x2017db/P/-/-/0 0x2017d4/0x2017c0/P/-/-/0 0x2017b5/0x2017c0/P/-/-/0 0x2017a7/0x2017b2/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0 0x2017ab/0x2017a0/P/-/-/0
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; Firstly test uncompression(--compress-recursion=0)
2+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0
3+
; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
4+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t
5+
; RUN: FileCheck %s --input-file %t
6+
7+
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0
8+
; CHECK-UNCOMPRESS: 1: 1
9+
; CHECK-UNCOMPRESS: 2: 13 fb:11
10+
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
11+
; CHECK-UNCOMPRESS: 1: 11
12+
; CHECK-UNCOMPRESS: 2: 1 fa:1
13+
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
14+
; CHECK-UNCOMPRESS: 1: 1
15+
; CHECK-UNCOMPRESS: 2: 2 fb:1
16+
; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0
17+
; CHECK-UNCOMPRESS: 2: 1
18+
; CHECK-UNCOMPRESS: 3: 2 fa:1
19+
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
20+
; CHECK-UNCOMPRESS: 4: 1
21+
; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
22+
; CHECK-UNCOMPRESS: 2: 1 fa:1
23+
24+
; CHECK: [main:1 @ foo:3 @ fa]:14:0
25+
; CHECK: 1: 1
26+
; CHECK: 2: 13 fb:11
27+
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0
28+
; CHECK: 1: 11
29+
; CHECK: 2: 1 fa:1
30+
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0
31+
; CHECK: 1: 1
32+
; CHECK: 2: 2 fb:1
33+
; CHECK: 4: 1
34+
; CHECK: [main:1 @ foo]:3:0
35+
; CHECK: 2: 1
36+
; CHECK: 3: 2 fa:1
37+
; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0
38+
39+
40+
; original code:
41+
; clang -O3 -g test.c -o a.out
42+
#include <stdio.h>
43+
44+
int fb(int n) {
45+
if(n > 10) return fb(n / 2);
46+
return fa(n - 1);
47+
}
48+
49+
int fa(int n) {
50+
if(n < 2) return n;
51+
if(n % 2) return fb(n - 1);
52+
return fa(n - 1);
53+
}
54+
55+
void foo() {
56+
int s, i = 0;
57+
while (i++ < 10000)
58+
s += fa(i);
59+
printf("sum is %d\n", s);
60+
}
61+
62+
int main() {
63+
foo();
64+
return 0;
65+
}
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
; Firstly test uncompression(--compress-recursion=0)
2+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0
3+
; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
4+
; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
5+
; RUN: FileCheck %s --input-file %t
6+
7+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
8+
; CHECK-UNCOMPRESS: 1: 1
9+
; CHECK-UNCOMPRESS: 3: 1
10+
; CHECK-UNCOMPRESS: 4: 1
11+
; CHECK-UNCOMPRESS: 7: 1 fb:1
12+
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
13+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
14+
; CHECK-UNCOMPRESS: 1: 1
15+
; CHECK-UNCOMPRESS: 3: 1
16+
; CHECK-UNCOMPRESS: 4: 1
17+
; CHECK-UNCOMPRESS: 7: 1 fb:1
18+
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
19+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
20+
; CHECK-UNCOMPRESS: 1: 1
21+
; CHECK-UNCOMPRESS: 3: 1
22+
; CHECK-UNCOMPRESS: 5: 1
23+
; CHECK-UNCOMPRESS: 8: 1 fa:1
24+
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
25+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
26+
; CHECK-UNCOMPRESS: 1: 1
27+
; CHECK-UNCOMPRESS: 3: 1
28+
; CHECK-UNCOMPRESS: 6: 1 fa:1
29+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
30+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
31+
; CHECK-UNCOMPRESS: 1: 1
32+
; CHECK-UNCOMPRESS: 3: 1
33+
; CHECK-UNCOMPRESS: 6: 1 fa:1
34+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
35+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
36+
; CHECK-UNCOMPRESS: 1: 1
37+
; CHECK-UNCOMPRESS: 3: 1
38+
; CHECK-UNCOMPRESS: 6: 1 fa:1
39+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
40+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
41+
; CHECK-UNCOMPRESS: 1: 1
42+
; CHECK-UNCOMPRESS: 2: 1
43+
; CHECK-UNCOMPRESS: 5: 1 fb:1
44+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
45+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
46+
; CHECK-UNCOMPRESS: 1: 1
47+
; CHECK-UNCOMPRESS: 2: 1
48+
; CHECK-UNCOMPRESS: 5: 1 fb:1
49+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
50+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
51+
; CHECK-UNCOMPRESS: 1: 1
52+
; CHECK-UNCOMPRESS: 2: 1
53+
; CHECK-UNCOMPRESS: 5: 1 fb:1
54+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
55+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1
56+
; CHECK-UNCOMPRESS: 1: 1
57+
; CHECK-UNCOMPRESS: 3: 1
58+
; CHECK-UNCOMPRESS: !CFGChecksum: 120515930909
59+
; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0
60+
; CHECK-UNCOMPRESS: 5: 1 fb:1
61+
; CHECK-UNCOMPRESS: !CFGChecksum: 72617220756
62+
63+
64+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
65+
; CHECK: 1: 4
66+
; CHECK: 2: 3
67+
; CHECK: 3: 1
68+
; CEHCK: 5: 4 fb:4
69+
; CHECK: 6: 1 fa:1
70+
; CHECK !CFGChecksum: 72617220756
71+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2
72+
; CHECK: 1: 2
73+
; CHECK: 3: 2
74+
; CHECK: 4: 1
75+
; CHECK: 7: 1 fb:1
76+
; CHECK: !CFGChecksum: 120515930909
77+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
78+
; CHECK: 1: 1
79+
; CHECK: 3: 1
80+
; CHECK: 4: 1
81+
; CHECK: 7: 1 fb:1
82+
; CHECK: !CFGChecksum: 120515930909
83+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1
84+
; CHECK: 1: 1
85+
; CHECK: 3: 1
86+
; CHECK: 5: 1
87+
; CHECK: 8: 1 fa:1
88+
; CHECK: !CFGChecksum: 120515930909
89+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
90+
; CHECK: 1: 1
91+
; CHECK: 3: 1
92+
; CHECK: 6: 1 fa:1
93+
; CHECK: !CFGChecksum: 72617220756
94+
; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
95+
; CHECK: 1: 1
96+
; CHECK: 3: 1
97+
; CHECK: 6: 1 fa:1
98+
; CHECK: !CFGChecksum: 72617220756
99+
100+
101+
; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter:
102+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
103+
; CHECK-UNWINDER: (7a0, 7a7): 1
104+
; CHECK-UNWINDER: (7a0, 7ab): 3
105+
; CHECK-UNWINDER: (7b2, 7b5): 1
106+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
107+
; CHECK-UNWINDER: (7c0, 7d4): 1
108+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
109+
; CHECK-UNWINDER: (7c0, 7cd): 1
110+
; CHECK-UNWINDER: (7db, 7e0): 1
111+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
112+
; CHECK-UNWINDER: (7a0, 7a7): 1
113+
; CHECK-UNWINDER: (7b2, 7b5): 1
114+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
115+
; CHECK-UNWINDER: (7c0, 7cd): 2
116+
; CHECK-UNWINDER: (7db, 7e0): 1
117+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
118+
; CHECK-UNWINDER: (7a0, 7a7): 1
119+
; CHECK-UNWINDER: (7b2, 7b5): 1
120+
121+
; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Branch Counter:
122+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
123+
; CHECK-UNWINDER: (7a7, 7b2): 1
124+
; CHECK-UNWINDER: (7ab, 7a0): 4
125+
; CHECK-UNWINDER: (7b5, 7c0): 1
126+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
127+
; CHECK-UNWINDER: (7d4, 7c0): 1
128+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
129+
; CHECK-UNWINDER: (7cd, 7db): 1
130+
; CHECK-UNWINDER: (7e0, 7a0): 1
131+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
132+
; CHECK-UNWINDER: (7a7, 7b2): 1
133+
; CHECK-UNWINDER: (7b5, 7c0): 1
134+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
135+
; CHECK-UNWINDER: (7cd, 7db): 2
136+
; CHECK-UNWINDER: (7e0, 7a0): 1
137+
; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
138+
; CHECK-UNWINDER: (7a7, 7b2): 1
139+
; CHECK-UNWINDER: (7b5, 7c0): 1
140+
141+
142+
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
143+
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
144+
; -g test.c -o a.out
145+
146+
#include <stdio.h>
147+
148+
int fb(int n) {
149+
if(n > 10) return fb(n / 2);
150+
return fa(n - 1);
151+
}
152+
153+
int fa(int n) {
154+
if(n < 2) return n;
155+
if(n % 2) return fb(n - 1);
156+
return fa(n - 1);
157+
}
158+
159+
void foo() {
160+
int s, i = 0;
161+
while (i++ < 10000)
162+
s += fa(i);
163+
printf("sum is %d\n", s);
164+
}
165+
166+
int main() {
167+
foo();
168+
return 0;
169+
}

llvm/tools/llvm-profgen/PerfReader.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88
#include "PerfReader.h"
9+
#include "ProfileGenerator.h"
910

1011
static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
1112
cl::init(false), cl::ZeroOrMore,
@@ -124,6 +125,8 @@ VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
124125
ProbeBasedKey->Probes.emplace_back(CallProbe);
125126
}
126127
}
128+
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
129+
ProbeBasedKey->Probes);
127130
ProbeBasedKey->genHashCode();
128131
Hashable<ContextKey> ContextId(ProbeBasedKey);
129132
auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());

0 commit comments

Comments
 (0)