Skip to content

Commit 11df2dc

Browse files
committed
AMDGPU: Fix DPP combiner using isOperandLegal on incomplete inst
It is not safe to use isOperandLegal on an instruction that does not have a complete set of operands. Unforunately the APIs are not set up in a convenient way to speculatively check if an instruction will be legal in a hypothetical instruction. Build all the operands and then verify they are legal after. This is clumsy, we should have a more direct check for will these operands give a legal instruction. This seems to fix a missed optimization in the gfx11 test. The fold was firing for gfx1150, but not gfx1100. Both should support vop3 literals so I'm not sure why it wasn't working before.
1 parent 9dc68dd commit 11df2dc

File tree

2 files changed

+27
-27
lines changed

2 files changed

+27
-27
lines changed

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
250250
++NumOperands;
251251
}
252252
if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
253-
if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
253+
if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {
254254
DPPInst.add(*SDst);
255255
++NumOperands;
256256
}
@@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
296296
auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
297297
assert(Src0);
298298
int Src0Idx = NumOperands;
299-
if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
300-
LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
301-
Fail = true;
302-
break;
303-
}
299+
304300
DPPInst.add(*Src0);
305301
DPPInst->getOperand(NumOperands).setIsKill(false);
306302
++NumOperands;
@@ -319,21 +315,17 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
319315
}
320316
auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
321317
if (Src1) {
322-
int OpNum = NumOperands;
318+
assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1) &&
319+
"dpp version of instruction missing src1");
323320
// If subtarget does not support SGPRs for src1 operand then the
324321
// requirements are the same as for src0. We check src0 instead because
325322
// pseudos are shared between subtargets and allow SGPR for src1 on all.
326323
if (!ST->hasDPPSrc1SGPR()) {
327324
assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
328325
getOperandSize(*DPPInst, NumOperands, *MRI) &&
329326
"Src0 and Src1 operands should have the same size");
330-
OpNum = Src0Idx;
331-
}
332-
if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
333-
LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
334-
Fail = true;
335-
break;
336327
}
328+
337329
DPPInst.add(*Src1);
338330
++NumOperands;
339331
}
@@ -349,9 +341,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
349341
}
350342
auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
351343
if (Src2) {
352-
if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
353-
!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
354-
LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
344+
if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {
345+
LLVM_DEBUG(dbgs() << " failed: dpp does not have src2\n");
355346
Fail = true;
356347
break;
357348
}
@@ -431,6 +422,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
431422
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
432423
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
433424
DPPInst.addImm(CombBCZ ? 1 : 0);
425+
426+
for (AMDGPU::OpName Op :
427+
{AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
428+
int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op);
429+
if (OpIdx == -1)
430+
break;
431+
432+
if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
433+
LLVM_DEBUG(dbgs() << " failed: src operand is illegal\n");
434+
Fail = true;
435+
break;
436+
}
437+
}
434438
} while (false);
435439

436440
if (Fail) {

llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
44

55
---
66

77
# GCN-LABEL: name: vop3
88
# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
99
# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
1010
# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
11-
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
12-
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
11+
# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
1312
name: vop3
1413
tracksRegLiveness: true
1514
body: |
@@ -39,12 +38,9 @@ body: |
3938

4039
# GCN-LABEL: name: vop3_sgpr_src1
4140
# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
42-
# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
43-
# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
44-
# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
45-
# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
46-
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
47-
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
41+
# GCN: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
42+
# GCN: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
43+
# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
4844
# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
4945
name: vop3_sgpr_src1
5046
tracksRegLiveness: true

0 commit comments

Comments
 (0)