Skip to content

Commit 4d2b413

Browse files
committed
Using S_MOV_B64_IMM_PSEUDO instead of dealing with legality concerns.
1 parent be74aa0 commit 4d2b413

File tree

5 files changed

+356
-189
lines changed

5 files changed

+356
-189
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5270,19 +5270,15 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
52705270
return LoopBB;
52715271
}
52725272

5273-
static uint32_t getIdentityValueForWaveReduction(unsigned Opc) {
5273+
static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
52745274
switch (Opc) {
52755275
case AMDGPU::S_MIN_U32:
5276-
case AMDGPU::V_CMP_LT_U64_e64: // umin.u64
52775276
return std::numeric_limits<uint32_t>::max();
52785277
case AMDGPU::S_MIN_I32:
5279-
case AMDGPU::V_CMP_LT_I64_e64: // min.i64
52805278
return std::numeric_limits<int32_t>::max();
52815279
case AMDGPU::S_MAX_U32:
5282-
case AMDGPU::V_CMP_GT_U64_e64: // umax.u64
52835280
return std::numeric_limits<uint32_t>::min();
52845281
case AMDGPU::S_MAX_I32:
5285-
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
52865282
return std::numeric_limits<int32_t>::min();
52875283
case AMDGPU::S_ADD_I32:
52885284
case AMDGPU::S_SUB_I32:
@@ -5292,7 +5288,24 @@ static uint32_t getIdentityValueForWaveReduction(unsigned Opc) {
52925288
case AMDGPU::S_AND_B32:
52935289
return std::numeric_limits<uint32_t>::max();
52945290
default:
5295-
llvm_unreachable("Unexpected opcode in getIdentityValueForWaveReduction");
5291+
llvm_unreachable(
5292+
"Unexpected opcode in getIdentityValueFor32BitWaveReduction");
5293+
}
5294+
}
5295+
5296+
static uint64_t getIdentityValueFor64BitWaveReduction(unsigned Opc) {
5297+
switch (Opc) {
5298+
case AMDGPU::V_CMP_LT_U64_e64: // umin.u64
5299+
return std::numeric_limits<uint64_t>::max();
5300+
case AMDGPU::V_CMP_LT_I64_e64: // min.i64
5301+
return std::numeric_limits<int64_t>::max();
5302+
case AMDGPU::V_CMP_GT_U64_e64: // umax.u64
5303+
return std::numeric_limits<uint64_t>::min();
5304+
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
5305+
return std::numeric_limits<int64_t>::min();
5306+
default:
5307+
llvm_unreachable(
5308+
"Unexpected opcode in getIdentityValueFor64BitWaveReduction");
52965309
}
52975310
}
52985311

@@ -5310,7 +5323,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53105323
bool isSGPR = TRI->isSGPRClass(MRI.getRegClass(SrcReg));
53115324
Register DstReg = MI.getOperand(0).getReg();
53125325
MachineBasicBlock *RetBB = nullptr;
5313-
bool is32BitOpc = TRI->getRegSizeInBits(*MRI.getRegClass(DstReg)) == 32;
53145326
if (isSGPR) {
53155327
switch (Opc) {
53165328
case AMDGPU::S_MIN_U32:
@@ -5324,9 +5336,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53245336
RetBB = &BB;
53255337
break;
53265338
}
5327-
case AMDGPU::V_CMP_LT_U64_e64: // umin
5328-
case AMDGPU::V_CMP_LT_I64_e64: // min
5329-
case AMDGPU::V_CMP_GT_U64_e64: // umax
5339+
case AMDGPU::V_CMP_LT_U64_e64: // umin
5340+
case AMDGPU::V_CMP_LT_I64_e64: // min
5341+
case AMDGPU::V_CMP_GT_U64_e64: // umax
53305342
case AMDGPU::V_CMP_GT_I64_e64: { // max
53315343
// Idempotent operations.
53325344
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
@@ -5405,6 +5417,11 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
54055417
// so that we will get the next active lane for next iteration.
54065418
MachineBasicBlock::iterator I = BB.end();
54075419
Register SrcReg = MI.getOperand(1).getReg();
5420+
bool is32BitOpc = (Opc == AMDGPU::S_MIN_U32 || Opc == AMDGPU::S_MIN_I32 ||
5421+
Opc == AMDGPU::S_MAX_U32 || Opc == AMDGPU::S_MAX_I32 ||
5422+
Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32 ||
5423+
Opc == AMDGPU::S_AND_B32 || Opc == AMDGPU::S_OR_B32 ||
5424+
Opc == AMDGPU::S_XOR_B32);
54085425

54095426
// Create Control flow for loop
54105427
// Split MI's Machine Basic block into For loop
@@ -5427,33 +5444,15 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
54275444

54285445
// Create initial values of induction variable from Exec, Accumulator and
54295446
// insert branch instr to newly created ComputeBlock
5430-
uint32_t IdentityValue = getIdentityValueForWaveReduction(Opc);
54315447
BuildMI(BB, I, DL, TII->get(MovOpcForExec), LoopIterator).addReg(ExecReg);
54325448
if (is32BitOpc) {
5449+
uint32_t IdentityValue = getIdentityValueFor32BitWaveReduction(Opc);
54335450
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), IdentityValReg)
54345451
.addImm(IdentityValue);
54355452
} else {
5436-
Register Identitylo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5437-
Register Identityhi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5438-
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identityhi)
5453+
uint64_t IdentityValue = getIdentityValueFor64BitWaveReduction(Opc);
5454+
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), IdentityValReg)
54395455
.addImm(IdentityValue);
5440-
switch (Opc) {
5441-
case AMDGPU::V_CMP_LT_U64_e64:
5442-
case AMDGPU::V_CMP_LT_I64_e64:
5443-
IdentityValue = int32_t(-1); // u|min
5444-
break;
5445-
case AMDGPU::V_CMP_GT_U64_e64:
5446-
case AMDGPU::V_CMP_GT_I64_e64:
5447-
IdentityValue = int32_t(0); // u|max
5448-
break;
5449-
}
5450-
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identitylo)
5451-
.addImm(IdentityValue);
5452-
BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE), IdentityValReg)
5453-
.addReg(Identitylo)
5454-
.addImm(AMDGPU::sub0)
5455-
.addReg(Identityhi)
5456-
.addImm(AMDGPU::sub1);
54575456
}
54585457
// clang-format off
54595458
BuildMI(BB, I, DL, TII->get(AMDGPU::S_BRANCH))

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,9 +1269,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12691269
; GFX8DAGISEL-LABEL: divergent_value_i64:
12701270
; GFX8DAGISEL: ; %bb.0: ; %entry
12711271
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272-
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1273-
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
12741272
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
1273+
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
1274+
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
12751275
; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
12761276
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
12771277
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1294,9 +1294,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12941294
; GFX8GISEL-LABEL: divergent_value_i64:
12951295
; GFX8GISEL: ; %bb.0: ; %entry
12961296
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297-
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
1298-
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
12991297
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
1298+
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
1299+
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
13001300
; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13011301
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13021302
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1319,9 +1319,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13191319
; GFX9DAGISEL-LABEL: divergent_value_i64:
13201320
; GFX9DAGISEL: ; %bb.0: ; %entry
13211321
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322-
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1323-
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
13241322
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
1323+
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
1324+
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
13251325
; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13261326
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13271327
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1344,9 +1344,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13441344
; GFX9GISEL-LABEL: divergent_value_i64:
13451345
; GFX9GISEL: ; %bb.0: ; %entry
13461346
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347-
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
1348-
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
13491347
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
1348+
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
1349+
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
13501350
; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13511351
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13521352
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1369,9 +1369,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13691369
; GFX1064DAGISEL-LABEL: divergent_value_i64:
13701370
; GFX1064DAGISEL: ; %bb.0: ; %entry
13711371
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372-
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1373-
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
13741372
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
1373+
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
1374+
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
13751375
; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13761376
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13771377
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1393,9 +1393,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13931393
; GFX1064GISEL-LABEL: divergent_value_i64:
13941394
; GFX1064GISEL: ; %bb.0: ; %entry
13951395
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396-
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
1397-
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
13981396
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
1397+
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
1398+
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
13991399
; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14001400
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
14011401
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1417,9 +1417,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14171417
; GFX1032DAGISEL-LABEL: divergent_value_i64:
14181418
; GFX1032DAGISEL: ; %bb.0: ; %entry
14191419
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420-
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
1421-
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
14221420
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
1421+
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
1422+
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
14231423
; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14241424
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
14251425
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1441,9 +1441,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14411441
; GFX1032GISEL-LABEL: divergent_value_i64:
14421442
; GFX1032GISEL: ; %bb.0: ; %entry
14431443
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444-
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
1445-
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
14461444
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
1445+
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
1446+
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
14471447
; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14481448
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
14491449
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1465,16 +1465,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14651465
; GFX1164DAGISEL-LABEL: divergent_value_i64:
14661466
; GFX1164DAGISEL: ; %bb.0: ; %entry
14671467
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468-
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
1469-
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
14701468
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
1469+
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
1470+
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
14711471
; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1472+
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
14721473
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
14731474
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
14741475
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
14751476
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8
14761477
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8
1477-
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
14781478
; GFX1164DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
14791479
; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
14801480
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1490,16 +1490,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14901490
; GFX1164GISEL-LABEL: divergent_value_i64:
14911491
; GFX1164GISEL: ; %bb.0: ; %entry
14921492
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493-
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
1494-
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
14951493
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
1494+
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
1495+
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
14961496
; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1497+
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
14971498
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
14981499
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
14991500
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
15001501
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8
15011502
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8
1502-
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15031503
; GFX1164GISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
15041504
; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
15051505
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1515,15 +1515,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15151515
; GFX1132DAGISEL-LABEL: divergent_value_i64:
15161516
; GFX1132DAGISEL: ; %bb.0: ; %entry
15171517
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518-
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
1519-
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
15201518
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
1519+
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
1520+
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
15211521
; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1522+
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
15221523
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
15231524
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
15241525
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
15251526
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3
1526-
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15271527
; GFX1132DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
15281528
; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2
15291529
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
@@ -1538,15 +1538,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15381538
; GFX1132GISEL-LABEL: divergent_value_i64:
15391539
; GFX1132GISEL: ; %bb.0: ; %entry
15401540
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541-
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
1542-
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
15431541
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
1542+
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
1543+
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
15441544
; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1545+
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
15451546
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
15461547
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
15471548
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
15481549
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3
1549-
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15501550
; GFX1132GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
15511551
; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2
15521552
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3

0 commit comments

Comments
 (0)