|
| 1 | +; REQUIRES: x86_64-linux |
| 2 | +; REQUIRES: asserts |
| 3 | +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/non-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s |
| 4 | + |
| 5 | +; The profiled source code: |
| 6 | + |
| 7 | +; volatile int x = 1; |
| 8 | +; __attribute__((noinline)) int bar(int p) { |
| 9 | +; return p; |
| 10 | +; } |
| 11 | + |
| 12 | +; __attribute__((always_inline)) int foo(int i, int p) { |
| 13 | +; if (i % 10) return bar(p); |
| 14 | +; else return bar(p + 1); |
| 15 | +; } |
| 16 | + |
| 17 | +; int main() { |
| 18 | +; for (int i = 0; i < 1000 * 1000; i++) { |
| 19 | +; x += foo(i, x); |
| 20 | +; x += bar(x); |
| 21 | +; x += foo(i, x); |
| 22 | +; x += bar(x); |
| 23 | +; } |
| 24 | +; } |
| 25 | + |
| 26 | +; The source code for the current build: |
| 27 | + |
| 28 | +; volatile int x = 1; |
| 29 | +; __attribute__((noinline)) int bar(int p) { |
| 30 | +; return p; |
| 31 | +; } |
| 32 | + |
| 33 | +; __attribute__((always_inline)) int foo(int i, int p) { |
| 34 | +; if (i % 10) return bar(p); |
| 35 | +; else return bar(p + 1); |
| 36 | +; } |
| 37 | + |
| 38 | +; int main() { |
| 39 | +; if (x == 0) // code change |
| 40 | +; return 0; // code change |
| 41 | +; for (int i = 0; i < 1000 * 1000; i++) { |
| 42 | +; x += foo(i, x); |
| 43 | +; x += bar(x); |
| 44 | +; if (i < 0) // code change |
| 45 | +; return 0; // code change |
| 46 | +; x += foo(i, x); |
| 47 | +; x += bar(x); |
| 48 | +; } |
| 49 | +; } |
| 50 | + |
| 51 | +; CHECK: Run stale profile matching for bar |
| 52 | + |
| 53 | +; CHECK: Run stale profile matching for foo |
| 54 | +; CHECK: Callsite with callee:bar is matched from 1.1 to 1.1 |
| 55 | +; CHECK: Callsite with callee:bar is matched from 2 to 2 |
| 56 | + |
| 57 | +; CHECK: Run stale profile matching for main |
| 58 | +; CHECK: Callsite with callee:foo is matched from 4 to 2 |
| 59 | +; CHECK: Callsite with callee:bar is matched from 5 to 3 |
| 60 | +; CHECK: Callsite with callee:foo is matched from 8 to 4 |
| 61 | +; CHECK: Callsite with callee:bar is matched from 9 to 5 |
| 62 | + |
| 63 | +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| 64 | +target triple = "x86_64-unknown-linux-gnu" |
| 65 | + |
| 66 | +@x = dso_local global i32 1, align 4 |
| 67 | + |
| 68 | +; Function Attrs: noinline nounwind uwtable |
| 69 | +define dso_local i32 @bar(i32 noundef %p) #0 !dbg !9 { |
| 70 | +entry: |
| 71 | + ret i32 %p, !dbg !13 |
| 72 | +} |
| 73 | + |
| 74 | +; Function Attrs: alwaysinline nounwind uwtable |
| 75 | +define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #1 !dbg !14 { |
| 76 | +entry: |
| 77 | + %rem = srem i32 %i, 10, !dbg !15 |
| 78 | + %tobool = icmp ne i32 %rem, 0, !dbg !15 |
| 79 | + br i1 %tobool, label %if.then, label %if.else, !dbg !16 |
| 80 | + |
| 81 | +if.then: ; preds = %entry |
| 82 | + %call = call i32 @bar(i32 noundef %p), !dbg !17 |
| 83 | + br label %return, !dbg !19 |
| 84 | + |
| 85 | +if.else: ; preds = %entry |
| 86 | + %add = add nsw i32 %p, 1, !dbg !20 |
| 87 | + %call1 = call i32 @bar(i32 noundef %add), !dbg !21 |
| 88 | + br label %return, !dbg !22 |
| 89 | + |
| 90 | +return: ; preds = %if.else, %if.then |
| 91 | + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !23 |
| 92 | + ret i32 %retval.0, !dbg !24 |
| 93 | +} |
| 94 | + |
| 95 | +; Function Attrs: nounwind uwtable |
| 96 | +define dso_local i32 @main() #2 !dbg !25 { |
| 97 | +entry: |
| 98 | + %0 = load volatile i32, ptr @x, align 4, !dbg !26, !tbaa !27 |
| 99 | + %cmp = icmp eq i32 %0, 0, !dbg !31 |
| 100 | + br i1 %cmp, label %if.then, label %if.end, !dbg !26 |
| 101 | + |
| 102 | +if.then: ; preds = %entry |
| 103 | + br label %for.end, !dbg !32 |
| 104 | + |
| 105 | +if.end: ; preds = %entry |
| 106 | + br label %for.cond, !dbg !33 |
| 107 | + |
| 108 | +for.cond: ; preds = %if.end6, %if.end |
| 109 | + %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !34 |
| 110 | + %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !35 |
| 111 | + br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !37 |
| 112 | + |
| 113 | +for.cond.cleanup: ; preds = %for.cond |
| 114 | + br label %cleanup, !dbg !38 |
| 115 | + |
| 116 | +for.body: ; preds = %for.cond |
| 117 | + %1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !27 |
| 118 | + %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !41 |
| 119 | + %2 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !27 |
| 120 | + %add = add nsw i32 %2, %call, !dbg !42 |
| 121 | + store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !27 |
| 122 | + %3 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !27 |
| 123 | + %call2 = call i32 @bar(i32 noundef %3), !dbg !44 |
| 124 | + %4 = load volatile i32, ptr @x, align 4, !dbg !45, !tbaa !27 |
| 125 | + %add3 = add nsw i32 %4, %call2, !dbg !45 |
| 126 | + store volatile i32 %add3, ptr @x, align 4, !dbg !45, !tbaa !27 |
| 127 | + br i1 false, label %if.then5, label %if.end6, !dbg !46 |
| 128 | + |
| 129 | +if.then5: ; preds = %for.body |
| 130 | + br label %cleanup, !dbg !47 |
| 131 | + |
| 132 | +if.end6: ; preds = %for.body |
| 133 | + %5 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !27 |
| 134 | + %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !49 |
| 135 | + %6 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !27 |
| 136 | + %add8 = add nsw i32 %6, %call7, !dbg !50 |
| 137 | + store volatile i32 %add8, ptr @x, align 4, !dbg !50, !tbaa !27 |
| 138 | + %7 = load volatile i32, ptr @x, align 4, !dbg !51, !tbaa !27 |
| 139 | + %call9 = call i32 @bar(i32 noundef %7), !dbg !52 |
| 140 | + %8 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !27 |
| 141 | + %add10 = add nsw i32 %8, %call9, !dbg !53 |
| 142 | + store volatile i32 %add10, ptr @x, align 4, !dbg !53, !tbaa !27 |
| 143 | + %inc = add nsw i32 %i.0, 1, !dbg !54 |
| 144 | + br label %for.cond, !dbg !56, !llvm.loop !57 |
| 145 | + |
| 146 | +cleanup: ; preds = %if.then5, %for.cond.cleanup |
| 147 | + br label %for.end |
| 148 | + |
| 149 | +for.end: ; preds = %cleanup, %if.then |
| 150 | + ret i32 0, !dbg !61 |
| 151 | +} |
| 152 | + |
| 153 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) |
| 154 | +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 |
| 155 | + |
| 156 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) |
| 157 | +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 |
| 158 | + |
| 159 | +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } |
| 160 | +attributes #1 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } |
| 161 | +attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } |
| 162 | +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } |
| 163 | + |
| 164 | +!llvm.dbg.cu = !{!0} |
| 165 | +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} |
| 166 | +!llvm.ident = !{!8} |
| 167 | + |
| 168 | +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) |
| 169 | +!1 = !DIFile(filename: "test.c", directory: "path") |
| 170 | +!2 = !{i32 7, !"Dwarf Version", i32 5} |
| 171 | +!3 = !{i32 2, !"Debug Info Version", i32 3} |
| 172 | +!4 = !{i32 1, !"wchar_size", i32 4} |
| 173 | +!5 = !{i32 8, !"PIC Level", i32 2} |
| 174 | +!6 = !{i32 7, !"PIE Level", i32 2} |
| 175 | +!7 = !{i32 7, !"uwtable", i32 2} |
| 176 | +!8 = !{!"clang version 19.0.0git"} |
| 177 | +!9 = distinct !DISubprogram(name: "bar", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) |
| 178 | +!10 = !DIFile(filename: "test.c", directory: "path") |
| 179 | +!11 = !DISubroutineType(types: !12) |
| 180 | +!12 = !{} |
| 181 | +!13 = !DILocation(line: 3, column: 3, scope: !9) |
| 182 | +!14 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 6, type: !11, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) |
| 183 | +!15 = !DILocation(line: 7, column: 9, scope: !14) |
| 184 | +!16 = !DILocation(line: 7, column: 7, scope: !14) |
| 185 | +!17 = !DILocation(line: 7, column: 23, scope: !18) |
| 186 | +!18 = !DILexicalBlockFile(scope: !14, file: !10, discriminator: 2) |
| 187 | +!19 = !DILocation(line: 7, column: 15, scope: !18) |
| 188 | +!20 = !DILocation(line: 8, column: 21, scope: !14) |
| 189 | +!21 = !DILocation(line: 8, column: 15, scope: !14) |
| 190 | +!22 = !DILocation(line: 8, column: 8, scope: !14) |
| 191 | +!23 = !DILocation(line: 0, scope: !14) |
| 192 | +!24 = !DILocation(line: 9, column: 1, scope: !14) |
| 193 | +!25 = distinct !DISubprogram(name: "main", scope: !10, file: !10, line: 11, type: !11, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) |
| 194 | +!26 = !DILocation(line: 12, column: 7, scope: !25) |
| 195 | +!27 = !{!28, !28, i64 0} |
| 196 | +!28 = !{!"int", !29, i64 0} |
| 197 | +!29 = !{!"omnipotent char", !30, i64 0} |
| 198 | +!30 = !{!"Simple C/C++ TBAA"} |
| 199 | +!31 = !DILocation(line: 12, column: 9, scope: !25) |
| 200 | +!32 = !DILocation(line: 13, column: 5, scope: !25) |
| 201 | +!33 = !DILocation(line: 14, column: 8, scope: !25) |
| 202 | +!34 = !DILocation(line: 14, scope: !25) |
| 203 | +!35 = !DILocation(line: 14, column: 21, scope: !36) |
| 204 | +!36 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 2) |
| 205 | +!37 = !DILocation(line: 14, column: 3, scope: !36) |
| 206 | +!38 = !DILocation(line: 14, column: 3, scope: !39) |
| 207 | +!39 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 4) |
| 208 | +!40 = !DILocation(line: 15, column: 18, scope: !25) |
| 209 | +!41 = !DILocation(line: 15, column: 11, scope: !25) |
| 210 | +!42 = !DILocation(line: 15, column: 8, scope: !25) |
| 211 | +!43 = !DILocation(line: 16, column: 15, scope: !25) |
| 212 | +!44 = !DILocation(line: 16, column: 11, scope: !25) |
| 213 | +!45 = !DILocation(line: 16, column: 8, scope: !25) |
| 214 | +!46 = !DILocation(line: 17, column: 10, scope: !25) |
| 215 | +!47 = !DILocation(line: 18, column: 8, scope: !25) |
| 216 | +!48 = !DILocation(line: 19, column: 18, scope: !25) |
| 217 | +!49 = !DILocation(line: 19, column: 11, scope: !25) |
| 218 | +!50 = !DILocation(line: 19, column: 8, scope: !25) |
| 219 | +!51 = !DILocation(line: 20, column: 15, scope: !25) |
| 220 | +!52 = !DILocation(line: 20, column: 11, scope: !25) |
| 221 | +!53 = !DILocation(line: 20, column: 8, scope: !25) |
| 222 | +!54 = !DILocation(line: 14, column: 37, scope: !55) |
| 223 | +!55 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 6) |
| 224 | +!56 = !DILocation(line: 14, column: 3, scope: !55) |
| 225 | +!57 = distinct !{!57, !58, !59, !60} |
| 226 | +!58 = !DILocation(line: 14, column: 3, scope: !25) |
| 227 | +!59 = !DILocation(line: 21, column: 3, scope: !25) |
| 228 | +!60 = !{!"llvm.loop.mustprogress"} |
| 229 | +!61 = !DILocation(line: 22, column: 1, scope: !25) |
0 commit comments