Skip to content

Commit 41140ce

Browse files
committed
AMDGPU: Use vgpr to implement divergent i32->i64 anyext
Handle this for consistency with the zext case.
1 parent b4c658e commit 41140ce

File tree

4 files changed

+19
-27
lines changed

4 files changed

+19
-27
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2951,10 +2951,15 @@ def : GCNPat <
29512951
>;
29522952

29532953
def : GCNPat <
2954-
(i64 (anyext i32:$src)),
2954+
(i64 (UniformUnaryFrag<anyext> i32:$src)),
29552955
(REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
29562956
>;
29572957

2958+
def : GCNPat <
2959+
(i64 (anyext i32:$src)),
2960+
(REG_SEQUENCE VReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
2961+
>;
2962+
29582963
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
29592964
(i64 (ext i1:$src)),
29602965
(REG_SEQUENCE VReg_64,

llvm/test/CodeGen/AMDGPU/rem_i128.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,9 +1161,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
11611161
; GFX9-O0-NEXT: v_add3_u32 v8, v0, v2, v8
11621162
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
11631163
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1164-
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
11651164
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
1166-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
1165+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
11671166
; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9]
11681167
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v9
11691168
; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec
@@ -1190,9 +1189,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
11901189
; GFX9-O0-NEXT: v_add3_u32 v14, v9, v14, v15
11911190
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
11921191
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1193-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
11941192
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1195-
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
1193+
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
11961194
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
11971195
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
11981196
; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
@@ -1221,9 +1219,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
12211219
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
12221220
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
12231221
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1224-
; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
12251222
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1226-
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
1223+
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
12271224
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[14:15]
12281225
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
12291226
; GFX9-O0-NEXT: v_or_b32_e64 v9, v9, v14
@@ -1240,9 +1237,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
12401237
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
12411238
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
12421239
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1243-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, s5
12441240
; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
1245-
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
1241+
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
12461242
; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
12471243
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
12481244
; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9
@@ -1280,9 +1276,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
12801276
; GFX9-O0-NEXT: v_mov_b32_e32 v23, v24
12811277
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
12821278
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1283-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
12841279
; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
1285-
; GFX9-O0-NEXT: v_mov_b32_e32 v24, v1
1280+
; GFX9-O0-NEXT: v_mov_b32_e32 v24, s5
12861281
; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s4, v[23:24]
12871282
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v24
12881283
; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
@@ -2447,9 +2442,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
24472442
; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10
24482443
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
24492444
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
2450-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
24512445
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2452-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
2446+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5
24532447
; GFX9-O0-NEXT: v_lshlrev_b64 v[17:18], s4, v[2:3]
24542448
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18
24552449
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4
@@ -2476,9 +2470,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
24762470
; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11
24772471
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
24782472
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
2479-
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
24802473
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
2481-
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
2474+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5
24822475
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
24832476
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3
24842477
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
@@ -2507,9 +2500,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
25072500
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
25082501
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
25092502
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
2510-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5
25112503
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
2512-
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
2504+
; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
25132505
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
25142506
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16
25152507
; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
@@ -2526,9 +2518,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
25262518
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
25272519
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
25282520
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
2529-
; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
25302521
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
2531-
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
2522+
; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
25322523
; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
25332524
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
25342525
; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v17
@@ -2566,9 +2557,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
25662557
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20
25672558
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
25682559
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
2569-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s5
25702560
; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
2571-
; GFX9-O0-NEXT: v_mov_b32_e32 v20, v12
2561+
; GFX9-O0-NEXT: v_mov_b32_e32 v20, s5
25722562
; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20]
25732563
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20
25742564
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12

llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,9 +475,8 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline {
475475
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
476476
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
477477
; GFX9-O0-NEXT: ; implicit-def: $sgpr36
478-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s35
479478
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
480-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
479+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35
481480
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s34, v[0:1]
482481
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
483482
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec

llvm/test/CodeGen/AMDGPU/wwm-reserved.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -479,9 +479,8 @@ define i64 @called_i64(i64 %a) noinline {
479479
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
480480
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
481481
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
482-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
483482
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
484-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
483+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
485484
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1]
486485
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
487486
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
@@ -1310,9 +1309,8 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline {
13101309
; GFX9-O0-NEXT: v_add3_u32 v0, v0, v1, v2
13111310
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
13121311
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
1313-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
13141312
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
1315-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
1313+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s5
13161314
; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[0:1]
13171315
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
13181316
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec

0 commit comments

Comments
 (0)