Skip to content

Commit 7dffd73

Browse files
committed
Using S_MOV_B64_IMM_PSEUDO instead of dealing with legality concerns.
1 parent b085d4b commit 7dffd73

File tree

5 files changed

+539
-79
lines changed

5 files changed

+539
-79
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5273,16 +5273,12 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
52735273
static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
52745274
switch (Opc) {
52755275
case AMDGPU::S_MIN_U32:
5276-
case AMDGPU::V_CMP_LT_U64_e64: // umin.u64
52775276
return std::numeric_limits<uint32_t>::max();
52785277
case AMDGPU::S_MIN_I32:
5279-
case AMDGPU::V_CMP_LT_I64_e64: // min.i64
52805278
return std::numeric_limits<int32_t>::max();
52815279
case AMDGPU::S_MAX_U32:
5282-
case AMDGPU::V_CMP_GT_U64_e64: // umax.u64
52835280
return std::numeric_limits<uint32_t>::min();
52845281
case AMDGPU::S_MAX_I32:
5285-
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
52865282
return std::numeric_limits<int32_t>::min();
52875283
case AMDGPU::S_ADD_I32:
52885284
case AMDGPU::S_SUB_I32:
@@ -5335,7 +5331,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53355331
bool isSGPR = TRI->isSGPRClass(MRI.getRegClass(SrcReg));
53365332
Register DstReg = MI.getOperand(0).getReg();
53375333
MachineBasicBlock *RetBB = nullptr;
5338-
bool is32BitOpc = TRI->getRegSizeInBits(*MRI.getRegClass(DstReg)) == 32;
53395334
if (isSGPR) {
53405335
switch (Opc) {
53415336
case AMDGPU::S_MIN_U32:
@@ -5349,9 +5344,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53495344
RetBB = &BB;
53505345
break;
53515346
}
5352-
case AMDGPU::V_CMP_LT_U64_e64: // umin
5353-
case AMDGPU::V_CMP_LT_I64_e64: // min
5354-
case AMDGPU::V_CMP_GT_U64_e64: // umax
5347+
case AMDGPU::V_CMP_LT_U64_e64: // umin
5348+
case AMDGPU::V_CMP_LT_I64_e64: // min
5349+
case AMDGPU::V_CMP_GT_U64_e64: // umax
53555350
case AMDGPU::V_CMP_GT_I64_e64: { // max
53565351
// Idempotent operations.
53575352
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,6 +1270,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12701270
; GFX8DAGISEL: ; %bb.0: ; %entry
12711271
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12721272
<<<<<<< HEAD
1273+
<<<<<<< HEAD
12731274
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
12741275
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
12751276
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1278,6 +1279,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12781279
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
12791280
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
12801281
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1282+
=======
1283+
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
1284+
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
1285+
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1286+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
12811287
; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
12821288
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
12831289
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1301,6 +1307,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13011307
; GFX8GISEL: ; %bb.0: ; %entry
13021308
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13031309
<<<<<<< HEAD
1310+
<<<<<<< HEAD
13041311
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
13051312
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
13061313
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1309,6 +1316,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13091316
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
13101317
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
13111318
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1319+
=======
1320+
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
1321+
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
1322+
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
1323+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
13121324
; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13131325
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13141326
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1332,6 +1344,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13321344
; GFX9DAGISEL: ; %bb.0: ; %entry
13331345
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13341346
<<<<<<< HEAD
1347+
<<<<<<< HEAD
13351348
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
13361349
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
13371350
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1340,6 +1353,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13401353
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
13411354
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
13421355
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1356+
=======
1357+
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
1358+
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
1359+
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1360+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
13431361
; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13441362
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13451363
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1363,6 +1381,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13631381
; GFX9GISEL: ; %bb.0: ; %entry
13641382
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13651383
<<<<<<< HEAD
1384+
<<<<<<< HEAD
13661385
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
13671386
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
13681387
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1371,6 +1390,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13711390
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
13721391
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
13731392
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1393+
=======
1394+
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
1395+
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
1396+
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
1397+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
13741398
; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13751399
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13761400
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1394,6 +1418,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13941418
; GFX1064DAGISEL: ; %bb.0: ; %entry
13951419
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13961420
<<<<<<< HEAD
1421+
<<<<<<< HEAD
13971422
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
13981423
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
13991424
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1402,6 +1427,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14021427
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
14031428
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
14041429
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1430+
=======
1431+
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
1432+
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
1433+
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1434+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
14051435
; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14061436
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
14071437
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1424,6 +1454,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14241454
; GFX1064GISEL: ; %bb.0: ; %entry
14251455
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14261456
<<<<<<< HEAD
1457+
<<<<<<< HEAD
14271458
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
14281459
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
14291460
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -1432,6 +1463,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14321463
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
14331464
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
14341465
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1466+
=======
1467+
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
1468+
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
1469+
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
1470+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
14351471
; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14361472
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
14371473
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1454,6 +1490,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14541490
; GFX1032DAGISEL: ; %bb.0: ; %entry
14551491
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14561492
<<<<<<< HEAD
1493+
<<<<<<< HEAD
14571494
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
14581495
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
14591496
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
@@ -1462,6 +1499,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14621499
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
14631500
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
14641501
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1502+
=======
1503+
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
1504+
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
1505+
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
1506+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
14651507
; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14661508
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
14671509
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1484,6 +1526,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14841526
; GFX1032GISEL: ; %bb.0: ; %entry
14851527
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14861528
<<<<<<< HEAD
1529+
<<<<<<< HEAD
14871530
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
14881531
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
14891532
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
@@ -1492,6 +1535,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14921535
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
14931536
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
14941537
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1538+
=======
1539+
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
1540+
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
1541+
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
1542+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
14951543
; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14961544
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
14971545
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1514,6 +1562,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15141562
; GFX1164DAGISEL: ; %bb.0: ; %entry
15151563
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15161564
<<<<<<< HEAD
1565+
<<<<<<< HEAD
15171566
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
15181567
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
15191568
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -1522,18 +1571,29 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15221571
=======
15231572
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
15241573
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
1574+
=======
1575+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15251576
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
1577+
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
1578+
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
15261579
; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1580+
<<<<<<< HEAD
15271581
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1582+
=======
1583+
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1584+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15281585
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
15291586
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
15301587
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
15311588
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8
15321589
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8
15331590
<<<<<<< HEAD
1591+
<<<<<<< HEAD
15341592
=======
15351593
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15361594
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1595+
=======
1596+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15371597
; GFX1164DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
15381598
; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
15391599
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1550,6 +1610,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15501610
; GFX1164GISEL: ; %bb.0: ; %entry
15511611
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15521612
<<<<<<< HEAD
1613+
<<<<<<< HEAD
15531614
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
15541615
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
15551616
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
@@ -1558,18 +1619,29 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15581619
=======
15591620
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
15601621
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
1622+
=======
1623+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15611624
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
1625+
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
1626+
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
15621627
; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1628+
<<<<<<< HEAD
15631629
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1630+
=======
1631+
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
1632+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15641633
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
15651634
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
15661635
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
15671636
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8
15681637
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8
15691638
<<<<<<< HEAD
1639+
<<<<<<< HEAD
15701640
=======
15711641
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15721642
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1643+
=======
1644+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15731645
; GFX1164GISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
15741646
; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
15751647
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1586,6 +1658,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15861658
; GFX1132DAGISEL: ; %bb.0: ; %entry
15871659
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15881660
<<<<<<< HEAD
1661+
<<<<<<< HEAD
15891662
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
15901663
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
15911664
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -1594,17 +1667,28 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15941667
=======
15951668
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
15961669
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
1670+
=======
1671+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
15971672
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
1673+
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
1674+
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
15981675
; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1676+
<<<<<<< HEAD
15991677
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1678+
=======
1679+
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1680+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
16001681
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
16011682
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
16021683
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
16031684
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3
16041685
<<<<<<< HEAD
1686+
<<<<<<< HEAD
16051687
=======
16061688
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
16071689
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1690+
=======
1691+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
16081692
; GFX1132DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
16091693
; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2
16101694
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
@@ -1620,6 +1704,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
16201704
; GFX1132GISEL: ; %bb.0: ; %entry
16211705
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16221706
<<<<<<< HEAD
1707+
<<<<<<< HEAD
16231708
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
16241709
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
16251710
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
@@ -1628,17 +1713,28 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
16281713
=======
16291714
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
16301715
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
1716+
=======
1717+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
16311718
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
1719+
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
1720+
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
16321721
; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1722+
<<<<<<< HEAD
16331723
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1724+
=======
1725+
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1726+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
16341727
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
16351728
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
16361729
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
16371730
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3
16381731
<<<<<<< HEAD
1732+
<<<<<<< HEAD
16391733
=======
16401734
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
16411735
>>>>>>> 381cb9fada25 ([AMDGPU] Extending wave reduction intrinsics for `i64` types - 1)
1736+
=======
1737+
>>>>>>> 4d2b4133488e (Using `S_MOV_B64_IMM_PSEUDO` instead of dealing with legality concerns.)
16421738
; GFX1132GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
16431739
; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2
16441740
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3

0 commit comments

Comments
 (0)