1414; secondly, and then hot_func. Specifically, tests that
1515; - If a constant is accessed by hot functions, all constant pools for this
1616; constant (e.g., from an unprofiled function, or cold function) should have
17- ; `.hot` suffix.
17+ ; `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
18+ ; @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
19+ ; suffix.
1820; - Similarly if a constant is accessed by both cold function and un-profiled
1921; function, constant pools for this constant should not have `.unlikely` suffix.
2022
21- ; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
22- ; CHECK: .LCPI0_0:
23- ; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
24- ; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8
25- ; CHECK: .LCPI0_1:
26- ; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
27- ; CHECK: .section .rodata.cst8,"aM",@progbits,8
28- ; CHECK: .LCPI0_2:
29- ; CHECK: .byte 0 // 0x0
30- ; CHECK: .byte 4 // 0x4
31- ; CHECK: .byte 8 // 0x8
32- ; CHECK: .byte 12 // 0xc
33- ; CHECK: .byte 255 // 0xff
34- ; CHECK: .byte 255 // 0xff
35- ; CHECK: .byte 255 // 0xff
36- ; CHECK: .byte 255 // 0xff
37-
38- ; CHECK: .section .rodata.cst8,"aM",@progbits,8
39- ; CHECK: .LCPI1_0:
40- ; CHECK: .byte 0 // 0x0
41- ; CHECK: .byte 4 // 0x4
42- ; CHECK: .byte 8 // 0x8
43- ; CHECK: .byte 12 // 0xc
44- ; CHECK: .byte 255 // 0xff
45- ; CHECK: .byte 255 // 0xff
46- ; CHECK: .byte 255 // 0xff
47- ; CHECK: .byte 255 // 0xff
48- ; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16
49- ; CHECK: .LCPI1_1:
50- ; CHECK: .word 442 // 0x1ba
51- ; CHECK: .word 100 // 0x64
52- ; CHECK: .word 0 // 0x0
53- ; CHECK: .word 0 // 0x0
54-
23+ ;; Constant pools for function @cold_func.
24+ ; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
25+ ; CHECK-NEXT: .p2align
26+ ; CHECK-NEXT: .LCPI0_0:
27+ ; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
28+ ; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8
29+ ; CHECK-NEXT: .p2align
30+ ; CHECK-NEXT: .LCPI0_1:
31+ ; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
32+ ; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8
33+ ; CHECK-NEXT: .p2align
34+ ; CHECK-NEXT: .LCPI0_2:
35+ ; CHECK-NEXT: .byte 0 // 0x0
36+ ; CHECK-NEXT: .byte 4 // 0x4
37+ ; CHECK-NEXT: .byte 8 // 0x8
38+ ; CHECK-NEXT: .byte 12 // 0xc
39+ ; CHECK-NEXT: .byte 255 // 0xff
40+ ; CHECK-NEXT: .byte 255 // 0xff
41+ ; CHECK-NEXT: .byte 255 // 0xff
42+ ; CHECK-NEXT: .byte 255 // 0xff
43+
44+ ;; Constant pools for function @unprofiled_func
45+ ; CHECK: .section .rodata.cst8,"aM",@progbits,8
46+ ; CHECK-NEXT: .p2align
47+ ; CHECK-NEXT: .LCPI1_0:
48+ ; CHECK-NEXT: .byte 0 // 0x0
49+ ; CHECK-NEXT: .byte 4 // 0x4
50+ ; CHECK-NEXT: .byte 8 // 0x8
51+ ; CHECK-NEXT: .byte 12 // 0xc
52+ ; CHECK-NEXT: .byte 255 // 0xff
53+ ; CHECK-NEXT: .byte 255 // 0xff
54+ ; CHECK-NEXT: .byte 255 // 0xff
55+ ; CHECK-NEXT: .byte 255 // 0xff
56+ ; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16
57+ ; CHECK-NEXT: .p2align
58+ ; CHECK-NEXT: .LCPI1_1:
59+ ; CHECK-NEXT: .word 2 // 0x2
60+ ; CHECK-NEXT: .word 3 // 0x3
61+ ; CHECK-NEXT: .word 5 // 0x5
62+ ; CHECK-NEXT: .word 7 // 0x7
63+ ; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16
64+ ; CHECK-NEXT: .p2align
65+ ; CHECK-NEXT: .LCPI1_2:
66+ ; CHECK-NEXT: .word 442 // 0x1ba
67+ ; CHECK-NEXT: .word 100 // 0x64
68+ ; CHECK-NEXT: .word 0 // 0x0
69+ ; CHECK-NEXT: .word 0 // 0x0
70+
71+ ;; Constant pools for function @hot_func
5572; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
56- ; CHECK: .LCPI2_0:
57- ; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
58- ; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16
59- ; CHECK: .LCPI2_1:
60- ; CHECK: .word 442 // 0x1ba
61- ; CHECK: .word 100 // 0x64
62- ; CHECK: .word 0 // 0x0
63- ; CHECK: .word 0 // 0x0
64-
65- ; CHECK: .section .rodata.cst32,"aM",@progbits,32
66- ; CHECK: .globl val
73+ ; CHECK-NEXT: .p2align
74+ ; CHECK-NEXT: .LCPI2_0:
75+ ; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
76+ ; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16
77+ ; CHECK-NEXT: .p2align
78+ ; CHECK-NEXT: .LCPI2_1:
79+ ; CHECK-NEXT: .word 0 // 0x0
80+ ; CHECK-NEXT: .word 100 // 0x64
81+ ; CHECK-NEXT: .word 0 // 0x0
82+ ; CHECK-NEXT: .word 442 // 0x1ba
83+ ; CHECK-NEXT: .LCPI2_2:
84+ ; CHECK-NEXT: .word 442 // 0x1ba
85+ ; CHECK-NEXT: .word 100 // 0x64
86+ ; CHECK-NEXT: .word 0 // 0x0
87+ ; CHECK-NEXT: .word 0 // 0x0
88+
89+ ;; For global variable @val
90+ ;; The section name remains `.rodata.cst32` without hotness prefix because
91+ ;; the variable has external linkage and not analyzed. Compiler need symbolized
92+ ;; data access profiles to annotate such global variables' hotness.
93+ ; CHECK: .section .rodata.cst32,"aM",@progbits,32
94+ ; CHECK-NEXT: .globl val
6795
6896define i32 @cold_func (double %x , <16 x i8 > %a , <16 x i8 > %b ) !prof !16 {
6997 %2 = tail call i32 (...) @func_taking_arbitrary_param (double 6.800000e-01 )
@@ -83,14 +111,16 @@ define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
83111 %t1 = call <8 x i8 > @llvm.aarch64.neon.tbl2.v8i8 (<16 x i8 > %a , <16 x i8 > %b , <8 x i8 > <i8 0 , i8 4 , i8 8 , i8 12 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >)
84112 %t2 = bitcast <8 x i8 > %t1 to <4 x i16 >
85113 %t3 = zext <4 x i16 > %t2 to <4 x i32 >
86- %cmp = icmp ule <4 x i32 > <i32 442 , i32 100 , i32 0 , i32 0 >, %t3
114+ %t4 = add <4 x i32 > %t3 , <i32 2 , i32 3 , i32 5 , i32 7 >
115+ %cmp = icmp ule <4 x i32 > <i32 442 , i32 100 , i32 0 , i32 0 >, %t4
87116 ret <4 x i1 > %cmp
88117}
89118
90119define <4 x i1 > @hot_func (i32 %0 , <4 x i32 > %a ) !prof !17 {
91120 %2 = tail call i32 (...) @func_taking_arbitrary_param (double 6.800000e-01 )
92- %b = icmp ule <4 x i32 > %a , <i32 442 , i32 100 , i32 0 , i32 0 >
93- ret <4 x i1 > %b
121+ %b = add <4 x i32 > <i32 0 , i32 100 , i32 0 , i32 442 >, %a
122+ %c = icmp ule <4 x i32 > %b , <i32 442 , i32 100 , i32 0 , i32 0 >
123+ ret <4 x i1 > %c
94124}
95125
96126@val = unnamed_addr constant i256 1
@@ -107,14 +137,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 {
107137
1081387 : ; preds = %7, %2
109139 %8 = phi i32 [ 0 , %2 ], [ %10 , %7 ]
110- %9 = call i32 @rand ()
140+ %seed_val = load i256 , ptr @val
141+ %9 = call i32 @seed (i256 %seed_val )
111142 call void @hot_func (i32 %9 )
112143 %10 = add i32 %8 , 1
113144 %11 = icmp eq i32 %10 , 100000
114145 br i1 %11 , label %5 , label %7 , !prof !18
115146}
116147
117- declare i32 @rand ( )
148+ declare i32 @seed ( i256 )
118149declare double @double_func ()
119150declare <4 x i32 > @vector_func ()
120151declare <16 x i8 > @vector_func_16i8 ()
0 commit comments