Skip to content

Commit 627fe1b

Browse files
committed
Start the partition from s40
1 parent 27d6094 commit 627fe1b

File tree

54 files changed

+7102
-7178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+7102
-7178
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
9191
>;
9292

9393
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
94-
(add (sequence "SGPR%u", 30, 37),
95-
(sequence "SGPR%u", 46, 53),
96-
(sequence "SGPR%u", 62, 69),
97-
(sequence "SGPR%u", 78, 85),
98-
(sequence "SGPR%u", 94, 105))
94+
// Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer),
95+
// and s34 (base pointer) are callee-saved. The striped layout starts from s40,
96+
// with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid
97+
// ending with a 2-wide stripe.
98+
(add (sequence "SGPR%u", 30, 39),
99+
(sequence "SGPR%u", 48, 55),
100+
(sequence "SGPR%u", 64, 71),
101+
(sequence "SGPR%u", 80, 87),
102+
(sequence "SGPR%u", 96, 105))
99103
>;
100104

101105
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
124124
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
125125
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
126126
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
127-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
128-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
127+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
128+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
129129
; CHECK-NEXT: s_addk_i32 s32, 0x800
130130
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
131131
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -152,7 +152,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
152152
; CHECK-NEXT: s_mov_b32 s53, s12
153153
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
154154
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
155-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
155+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
156156
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
157157
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
158158
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -162,7 +162,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
162162
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
163163
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
164164
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
165-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
165+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
166166
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
167167
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
168168
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -184,8 +184,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
184184
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
185185
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
186186
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
187-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
188-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
187+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
188+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
189189
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
190190
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
191191
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -264,8 +264,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
264264
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
265265
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
266266
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
267-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
268-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
267+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
268+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
269269
; CHECK-NEXT: s_addk_i32 s32, 0x800
270270
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
271271
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -291,7 +291,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
291291
; CHECK-NEXT: s_mov_b32 s53, s12
292292
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
293293
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
294-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
294+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
295295
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
296296
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
297297
; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1]
@@ -300,7 +300,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
300300
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
301301
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
302302
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
303-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
303+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
304304
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
305305
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
306306
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -319,8 +319,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
319319
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
320320
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
321321
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
322-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
323-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
322+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
323+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
324324
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
325325
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
326326
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -406,8 +406,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
406406
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
407407
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
408408
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
409-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
410-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
409+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
410+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
411411
; CHECK-NEXT: s_addk_i32 s32, 0x800
412412
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
413413
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -434,7 +434,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
434434
; CHECK-NEXT: s_mov_b32 s53, s12
435435
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
436436
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
437-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
437+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
438438
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
439439
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
440440
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -444,7 +444,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
444444
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
445445
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
446446
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
447-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
447+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
448448
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
449449
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
450450
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -466,8 +466,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
466466
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
467467
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
468468
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
469-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
470-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
469+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
470+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
471471
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
472472
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
473473
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -548,8 +548,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
548548
; CHECK-NEXT: v_writelane_b32 v42, s35, 3
549549
; CHECK-NEXT: v_writelane_b32 v42, s36, 4
550550
; CHECK-NEXT: v_writelane_b32 v42, s37, 5
551-
; CHECK-NEXT: v_writelane_b32 v42, s46, 6
552-
; CHECK-NEXT: v_writelane_b32 v42, s47, 7
551+
; CHECK-NEXT: v_writelane_b32 v42, s38, 6
552+
; CHECK-NEXT: v_writelane_b32 v42, s39, 7
553553
; CHECK-NEXT: s_addk_i32 s32, 0x400
554554
; CHECK-NEXT: v_writelane_b32 v42, s48, 8
555555
; CHECK-NEXT: v_writelane_b32 v42, s49, 9
@@ -573,7 +573,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
573573
; CHECK-NEXT: s_mov_b32 s53, s12
574574
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
575575
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
576-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
576+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
577577
; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2
578578
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
579579
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -584,7 +584,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
584584
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
585585
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
586586
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
587-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
587+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
588588
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
589589
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
590590
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -602,8 +602,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
602602
; CHECK-NEXT: v_readlane_b32 s50, v42, 10
603603
; CHECK-NEXT: v_readlane_b32 s49, v42, 9
604604
; CHECK-NEXT: v_readlane_b32 s48, v42, 8
605-
; CHECK-NEXT: v_readlane_b32 s47, v42, 7
606-
; CHECK-NEXT: v_readlane_b32 s46, v42, 6
605+
; CHECK-NEXT: v_readlane_b32 s39, v42, 7
606+
; CHECK-NEXT: v_readlane_b32 s38, v42, 6
607607
; CHECK-NEXT: v_readlane_b32 s37, v42, 5
608608
; CHECK-NEXT: v_readlane_b32 s36, v42, 4
609609
; CHECK-NEXT: v_readlane_b32 s35, v42, 3
@@ -689,8 +689,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
689689
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
690690
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
691691
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
692-
; CHECK-NEXT: v_writelane_b32 v43, s46, 6
693-
; CHECK-NEXT: v_writelane_b32 v43, s47, 7
692+
; CHECK-NEXT: v_writelane_b32 v43, s38, 6
693+
; CHECK-NEXT: v_writelane_b32 v43, s39, 7
694694
; CHECK-NEXT: s_addk_i32 s32, 0x800
695695
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
696696
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -716,7 +716,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
716716
; CHECK-NEXT: s_mov_b32 s53, s12
717717
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
718718
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
719-
; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
719+
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
720720
; CHECK-NEXT: v_or_b32_e32 v42, 1, v2
721721
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
722722
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -727,7 +727,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
727727
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
728728
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
729729
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
730-
; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
730+
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
731731
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
732732
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
733733
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -748,8 +748,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
748748
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
749749
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
750750
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
751-
; CHECK-NEXT: v_readlane_b32 s47, v43, 7
752-
; CHECK-NEXT: v_readlane_b32 s46, v43, 6
751+
; CHECK-NEXT: v_readlane_b32 s39, v43, 7
752+
; CHECK-NEXT: v_readlane_b32 s38, v43, 6
753753
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
754754
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
755755
; CHECK-NEXT: v_readlane_b32 s35, v43, 3

0 commit comments

Comments
 (0)