Skip to content

Commit cae4732

Browse files
committed
Using S_MOV_B64_IMM_PSEUDO instead of dealing with legality concerns.
1 parent 9362371 commit cae4732

File tree

5 files changed

+356
-189
lines changed

5 files changed

+356
-189
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5192,19 +5192,15 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
51925192
return LoopBB;
51935193
}
51945194

5195-
static uint32_t getIdentityValueForWaveReduction(unsigned Opc) {
5195+
static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
51965196
switch (Opc) {
51975197
case AMDGPU::S_MIN_U32:
5198-
case AMDGPU::V_CMP_LT_U64_e64: // umin.u64
51995198
return std::numeric_limits<uint32_t>::max();
52005199
case AMDGPU::S_MIN_I32:
5201-
case AMDGPU::V_CMP_LT_I64_e64: // min.i64
52025200
return std::numeric_limits<int32_t>::max();
52035201
case AMDGPU::S_MAX_U32:
5204-
case AMDGPU::V_CMP_GT_U64_e64: // umax.u64
52055202
return std::numeric_limits<uint32_t>::min();
52065203
case AMDGPU::S_MAX_I32:
5207-
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
52085204
return std::numeric_limits<int32_t>::min();
52095205
case AMDGPU::S_ADD_I32:
52105206
case AMDGPU::S_SUB_I32:
@@ -5214,7 +5210,24 @@ static uint32_t getIdentityValueForWaveReduction(unsigned Opc) {
52145210
case AMDGPU::S_AND_B32:
52155211
return std::numeric_limits<uint32_t>::max();
52165212
default:
5217-
llvm_unreachable("Unexpected opcode in getIdentityValueForWaveReduction");
5213+
llvm_unreachable(
5214+
"Unexpected opcode in getIdentityValueFor32BitWaveReduction");
5215+
}
5216+
}
5217+
5218+
static uint64_t getIdentityValueFor64BitWaveReduction(unsigned Opc) {
5219+
switch (Opc) {
5220+
case AMDGPU::V_CMP_LT_U64_e64: // umin.u64
5221+
return std::numeric_limits<uint64_t>::max();
5222+
case AMDGPU::V_CMP_LT_I64_e64: // min.i64
5223+
return std::numeric_limits<int64_t>::max();
5224+
case AMDGPU::V_CMP_GT_U64_e64: // umax.u64
5225+
return std::numeric_limits<uint64_t>::min();
5226+
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
5227+
return std::numeric_limits<int64_t>::min();
5228+
default:
5229+
llvm_unreachable(
5230+
"Unexpected opcode in getIdentityValueFor64BitWaveReduction");
52185231
}
52195232
}
52205233

@@ -5232,7 +5245,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52325245
bool isSGPR = TRI->isSGPRClass(MRI.getRegClass(SrcReg));
52335246
Register DstReg = MI.getOperand(0).getReg();
52345247
MachineBasicBlock *RetBB = nullptr;
5235-
bool is32BitOpc = TRI->getRegSizeInBits(*MRI.getRegClass(DstReg)) == 32;
52365248
if (isSGPR) {
52375249
switch (Opc) {
52385250
case AMDGPU::S_MIN_U32:
@@ -5246,9 +5258,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52465258
RetBB = &BB;
52475259
break;
52485260
}
5249-
case AMDGPU::V_CMP_LT_U64_e64: // umin
5250-
case AMDGPU::V_CMP_LT_I64_e64: // min
5251-
case AMDGPU::V_CMP_GT_U64_e64: // umax
5261+
case AMDGPU::V_CMP_LT_U64_e64: // umin
5262+
case AMDGPU::V_CMP_LT_I64_e64: // min
5263+
case AMDGPU::V_CMP_GT_U64_e64: // umax
52525264
case AMDGPU::V_CMP_GT_I64_e64: { // max
52535265
// Idempotent operations.
52545266
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
@@ -5327,6 +5339,11 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53275339
// so that we will get the next active lane for next iteration.
53285340
MachineBasicBlock::iterator I = BB.end();
53295341
Register SrcReg = MI.getOperand(1).getReg();
5342+
bool is32BitOpc = (Opc == AMDGPU::S_MIN_U32 || Opc == AMDGPU::S_MIN_I32 ||
5343+
Opc == AMDGPU::S_MAX_U32 || Opc == AMDGPU::S_MAX_I32 ||
5344+
Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32 ||
5345+
Opc == AMDGPU::S_AND_B32 || Opc == AMDGPU::S_OR_B32 ||
5346+
Opc == AMDGPU::S_XOR_B32);
53305347

53315348
// Create Control flow for loop
53325349
// Split MI's Machine Basic block into For loop
@@ -5349,33 +5366,15 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53495366

53505367
// Create initial values of induction variable from Exec, Accumulator and
53515368
// insert branch instr to newly created ComputeBlock
5352-
uint32_t IdentityValue = getIdentityValueForWaveReduction(Opc);
53535369
BuildMI(BB, I, DL, TII->get(MovOpcForExec), LoopIterator).addReg(ExecReg);
53545370
if (is32BitOpc) {
5371+
uint32_t IdentityValue = getIdentityValueFor32BitWaveReduction(Opc);
53555372
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), IdentityValReg)
53565373
.addImm(IdentityValue);
53575374
} else {
5358-
Register Identitylo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5359-
Register Identityhi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5360-
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identityhi)
5375+
uint64_t IdentityValue = getIdentityValueFor64BitWaveReduction(Opc);
5376+
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), IdentityValReg)
53615377
.addImm(IdentityValue);
5362-
switch (Opc) {
5363-
case AMDGPU::V_CMP_LT_U64_e64:
5364-
case AMDGPU::V_CMP_LT_I64_e64:
5365-
IdentityValue = int32_t(-1); // u|min
5366-
break;
5367-
case AMDGPU::V_CMP_GT_U64_e64:
5368-
case AMDGPU::V_CMP_GT_I64_e64:
5369-
IdentityValue = int32_t(0); // u|max
5370-
break;
5371-
}
5372-
BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), Identitylo)
5373-
.addImm(IdentityValue);
5374-
BuildMI(BB, I, DL, TII->get(TargetOpcode::REG_SEQUENCE), IdentityValReg)
5375-
.addReg(Identitylo)
5376-
.addImm(AMDGPU::sub0)
5377-
.addReg(Identityhi)
5378-
.addImm(AMDGPU::sub1);
53795378
}
53805379
// clang-format off
53815380
BuildMI(BB, I, DL, TII->get(AMDGPU::S_BRANCH))

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,9 +1269,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12691269
; GFX8DAGISEL-LABEL: divergent_value_i64:
12701270
; GFX8DAGISEL: ; %bb.0: ; %entry
12711271
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272-
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1273-
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
12741272
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
1273+
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
1274+
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
12751275
; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
12761276
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
12771277
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1294,9 +1294,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
12941294
; GFX8GISEL-LABEL: divergent_value_i64:
12951295
; GFX8GISEL: ; %bb.0: ; %entry
12961296
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1297-
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
1298-
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
12991297
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
1298+
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
1299+
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
13001300
; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13011301
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13021302
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1319,9 +1319,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13191319
; GFX9DAGISEL-LABEL: divergent_value_i64:
13201320
; GFX9DAGISEL: ; %bb.0: ; %entry
13211321
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322-
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1323-
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
13241322
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
1323+
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
1324+
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
13251325
; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13261326
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13271327
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1344,9 +1344,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13441344
; GFX9GISEL-LABEL: divergent_value_i64:
13451345
; GFX9GISEL: ; %bb.0: ; %entry
13461346
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347-
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
1348-
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
13491347
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
1348+
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
1349+
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
13501350
; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13511351
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13521352
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1369,9 +1369,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13691369
; GFX1064DAGISEL-LABEL: divergent_value_i64:
13701370
; GFX1064DAGISEL: ; %bb.0: ; %entry
13711371
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372-
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1373-
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
13741372
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
1373+
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
1374+
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
13751375
; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
13761376
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
13771377
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1393,9 +1393,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
13931393
; GFX1064GISEL-LABEL: divergent_value_i64:
13941394
; GFX1064GISEL: ; %bb.0: ; %entry
13951395
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1396-
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
1397-
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
13981396
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
1397+
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
1398+
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
13991399
; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14001400
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
14011401
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1417,9 +1417,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14171417
; GFX1032DAGISEL-LABEL: divergent_value_i64:
14181418
; GFX1032DAGISEL: ; %bb.0: ; %entry
14191419
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420-
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
1421-
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
14221420
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
1421+
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
1422+
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
14231423
; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14241424
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
14251425
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1441,9 +1441,9 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14411441
; GFX1032GISEL-LABEL: divergent_value_i64:
14421442
; GFX1032GISEL: ; %bb.0: ; %entry
14431443
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444-
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
1445-
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
14461444
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
1445+
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
1446+
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
14471447
; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
14481448
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
14491449
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
@@ -1465,16 +1465,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14651465
; GFX1164DAGISEL-LABEL: divergent_value_i64:
14661466
; GFX1164DAGISEL: ; %bb.0: ; %entry
14671467
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1468-
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
1469-
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
14701468
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
1469+
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
1470+
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
14711471
; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1472+
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
14721473
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
14731474
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
14741475
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
14751476
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s8
14761477
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v3, s8
1477-
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
14781478
; GFX1164DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
14791479
; GFX1164DAGISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
14801480
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1490,16 +1490,16 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
14901490
; GFX1164GISEL-LABEL: divergent_value_i64:
14911491
; GFX1164GISEL: ; %bb.0: ; %entry
14921492
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493-
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
1494-
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
14951493
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
1494+
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
1495+
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
14961496
; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1497+
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
14971498
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
14981499
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
14991500
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
15001501
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s8
15011502
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v3, s8
1502-
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15031503
; GFX1164GISEL-NEXT: v_cmp_gt_i64_e32 vcc, s[4:5], v[4:5]
15041504
; GFX1164GISEL-NEXT: s_and_b64 s[6:7], vcc, s[2:3]
15051505
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
@@ -1515,15 +1515,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15151515
; GFX1132DAGISEL-LABEL: divergent_value_i64:
15161516
; GFX1132DAGISEL: ; %bb.0: ; %entry
15171517
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1518-
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
1519-
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
15201518
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
1519+
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
1520+
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
15211521
; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1522+
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
15221523
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
15231524
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
15241525
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
15251526
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3
1526-
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15271527
; GFX1132DAGISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
15281528
; GFX1132DAGISEL-NEXT: s_and_b32 s6, vcc_lo, s2
15291529
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
@@ -1538,15 +1538,15 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
15381538
; GFX1132GISEL-LABEL: divergent_value_i64:
15391539
; GFX1132GISEL: ; %bb.0: ; %entry
15401540
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541-
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
1542-
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
15431541
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
1542+
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
1543+
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
15441544
; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
1545+
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
15451546
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
15461547
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
15471548
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
15481549
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3
1549-
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
15501550
; GFX1132GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, s[4:5], v[4:5]
15511551
; GFX1132GISEL-NEXT: s_and_b32 s6, vcc_lo, s2
15521552
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3

0 commit comments

Comments
 (0)