Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5653,7 +5653,7 @@ bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
unsigned SplitSize = 32;
if (IID == Intrinsic::amdgcn_update_dpp && (Size % 64 == 0) &&
ST.hasDPALU_DPP() &&
AMDGPU::isLegalDPALU_DPPControl(MI.getOperand(4).getImm()))
AMDGPU::isLegalDPALU_DPPControl(ST, MI.getOperand(4).getImm()))
SplitSize = 64;

if (Size == SplitSize) {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5052,11 +5052,13 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
if (DppCtrlIdx >= 0) {
unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();

if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
AMDGPU::isDPALU_DPP(MII.get(Opc))) {
// DP ALU DPP is supported for row_newbcast only on GFX9*
if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) {
// DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
// only on GFX12.
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
Error(S, "DP ALU dpp only supports row_newbcast");
Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
: "DP ALU dpp only supports row_newbcast");
return false;
}
}
Expand Down
35 changes: 30 additions & 5 deletions llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) {
DPPInst.addImm(ByteSelOpr->getImm());
}
if (MachineOperand *BitOp3 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::bitop3)) {
assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::bitop3));
DPPInst.add(*BitOp3);
}
}
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
Expand Down Expand Up @@ -544,11 +549,17 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
return false;
}

if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
assert(DppCtrl && DppCtrl->isImm());
unsigned DppCtrlVal = DppCtrl->getImm();
if ((MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp)) {
if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP)) {
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move is unsupported\n");
// Split it.
return false;
}
if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal)) {
LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported"
" control value\n");
// Let it split, then control may become legal.
Expand Down Expand Up @@ -704,6 +715,20 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
break;
}

if (!ST->hasFeature(AMDGPU::FeatureDPALU_DPP) &&
AMDGPU::isDPALU_DPP32BitOpc(OrigOp)) {
LLVM_DEBUG(dbgs() << " " << OrigMI
<< " failed: DPP ALU DPP is not supported\n");
break;
}

if (!AMDGPU::isLegalDPALU_DPPControl(*ST, DppCtrlVal) &&
AMDGPU::isDPALU_DPP(TII->get(OrigOp), *ST)) {
LLVM_DEBUG(dbgs() << " " << OrigMI
<< " failed: not valid 64-bit DPP control value\n");
break;
}

LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
if (Use == Src0) {
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -976,8 +976,10 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
unsigned Imm = MI->getOperand(OpNo).getImm();
const MCInstrDesc &Desc = MII.get(MI->getOpcode());

if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
O << " /* DP ALU dpp only supports row_newbcast */";
if (!AMDGPU::isLegalDPALU_DPPControl(STI, Imm) &&
AMDGPU::isDPALU_DPP(Desc, STI)) {
O << " /* DP ALU dpp only supports "
<< (isGFX12(STI) ? "row_share" : "row_newbcast") << " */";
return;
}
if (Imm <= DppCtrl::QUAD_PERM_LAST) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6621,7 +6621,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
unsigned SplitSize = 32;
if (IID == Intrinsic::amdgcn_update_dpp && (ValSize % 64 == 0) &&
ST->hasDPALU_DPP() &&
AMDGPU::isLegalDPALU_DPPControl(N->getConstantOperandVal(3)))
AMDGPU::isLegalDPALU_DPPControl(*ST, N->getConstantOperandVal(3)))
SplitSize = 64;

auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2616,9 +2616,9 @@ std::pair<MachineInstr*, MachineInstr*>
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);

if (ST.hasMovB64() &&
if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
AMDGPU::isLegalDPALU_DPPControl(
getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
ST, getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
return std::pair(&MI, nullptr);
}
Expand Down Expand Up @@ -5433,7 +5433,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}

if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
!AMDGPU::isLegalDPALU_DPPControl(DC) && AMDGPU::isDPALU_DPP(Desc)) {
!AMDGPU::isLegalDPALU_DPPControl(ST, DC) &&
AMDGPU::isDPALU_DPP(Desc, ST)) {
ErrInfo = "Invalid dpp_ctrl value: "
"DP ALU dpp only support row_newbcast";
return false;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,7 @@ class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
!eq(VT, v2f16) : VCSrc_v2f16,
!eq(VT, v2bf16) : VCSrc_v2bf16,
!eq(VT, f32) : VCSrc_f32,
!eq(VT, f64) : VCSrc_f64,
!eq(VT, v2i32) : VCSrc_v2b32,
1 : VCSrc_b32);
}
Expand Down
28 changes: 27 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3309,7 +3309,33 @@ bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
return false;
}

bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
bool isDPALU_DPP32BitOpc(unsigned Opc) {
switch (Opc) {
case AMDGPU::V_MUL_LO_U32_e64:
case AMDGPU::V_MUL_LO_U32_e64_dpp:
case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
case AMDGPU::V_MUL_HI_U32_e64:
case AMDGPU::V_MUL_HI_U32_e64_dpp:
case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
case AMDGPU::V_MUL_HI_I32_e64:
case AMDGPU::V_MUL_HI_I32_e64_dpp:
case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
case AMDGPU::V_MAD_U32_e64:
case AMDGPU::V_MAD_U32_e64_dpp:
case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
return true;
default:
return false;
}
}

bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST) {
if (!ST.hasFeature(AMDGPU::FeatureDPALU_DPP))
return false;

if (isDPALU_DPP32BitOpc(OpDesc.getOpcode()))
return ST.hasFeature(AMDGPU::FeatureGFX1250Insts);

return hasAny64BitVGPROperands(OpDesc);
}

Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1750,15 +1750,22 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);

LLVM_READNONE
inline bool isLegalDPALU_DPPControl(unsigned DC) {
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
if (isGFX12(ST))
return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
if (isGFX90A(ST))
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
return false;
}

/// \returns true if an instruction may have a 64-bit VGPR operand.
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);

/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
bool isDPALU_DPP32BitOpc(unsigned Opc);

/// \returns true if an instruction is a DP ALU DPP.
bool isDPALU_DPP(const MCInstrDesc &OpDesc);
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCSubtargetInfo &ST);

/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -2084,6 +2084,9 @@ multiclass VOP3_Realtriple_gfx11_gfx12<bits<10> op> :
multiclass VOP3_Real_Base_gfx11_gfx12<bits<10> op> :
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>;

multiclass VOP3_Real_Base_gfx11_gfx12_not_gfx1250<bits<10> op> :
VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Not12_50Gen, op>;

multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
string asmName> :
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
Expand Down Expand Up @@ -2211,9 +2214,9 @@ defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>;
defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>;
defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>;
defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>;
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>;
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>;
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>;
defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>;
defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>;
defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>;
defm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>;
defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
Expand Down Expand Up @@ -2242,6 +2245,10 @@ let AssemblerPredicate = isGFX11Plus in {
}

// These instructions differ from GFX12 variant by supporting DPP:
defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>;
defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>;
defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>;

defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>;
defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>;
defm V_PERM_PK16_B8_U4 : VOP3Only_Real_Base_gfx1250<0x243>;
Expand Down
42 changes: 31 additions & 11 deletions llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,GFX942
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s -check-prefixes=GCN,DPP64,GFX90A,DPP64-GFX9 -DCTL=row_newbcast
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,DPP64-GFX9,GFX942 -DCTL=row_newbcast
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10 -DCTL=row_share
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11 -DCTL=row_share
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX1250 -DCTL=row_share

; GCN-LABEL: {{^}}dpp64_ceil:
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
; DPP64: v_ceil_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP64: v_ceil_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
Expand All @@ -21,8 +22,8 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) {

; GCN-LABEL: {{^}}dpp64_rcp:
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
; DPP64: v_rcp_f64_dpp [[V]], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP64-GFX9: v_rcp_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
Expand Down Expand Up @@ -52,9 +53,9 @@ define amdgpu_kernel void @dpp64_rcp_unsupported_ctl(ptr addrspace(1) %arg, i64

; GCN-LABEL: {{^}}dpp64_div:
; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]],
; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_newbcast:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX10PLUS-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPPMOV64: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX90A-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GCN: v_div_scale_f64
; GCN: v_rcp_f64_e32
define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
Expand All @@ -69,6 +70,25 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
ret void
}

; On GFX9 it fails to combine because v_mul_lo_u32 has no e32 or dpp form.
; GCN-LABEL: {{^}}dpp_mul_row_share:
; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
; DPP64-GFX9: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
; DPP64-GFX9: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; DPP64-GFX9: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}}
; GFX1250: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]]
; GFX1250: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
; GFX1250: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}}
define amdgpu_kernel void @dpp_mul_row_share(ptr addrspace(1) %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id
%load = load i32, ptr addrspace(1) %gep
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 336, i32 15, i32 15, i1 1)
%mul = mul i32 %tmp0, %load
store i32 %mul, ptr addrspace(1) %gep
ret void
}

; GCN-LABEL: {{^}}dpp64_loop:
; GCN: v_mov_b32_dpp
; DPP64: v_mov_b32_dpp
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AMDGPU/dpp_combine_gfx1250.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s -check-prefix=GFX1250

---
name: v_bitop3_dpp
tracksRegLiveness: true
body: |
bb.0:
; GFX1250-LABEL: name: v_bitop3_dpp
; GFX1250: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX1250-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX1250-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec
; GFX1250-NEXT: [[V_BITOP3_B32_e64_dpp:%[0-9]+]]:vgpr_32 = V_BITOP3_B32_e64_dpp [[DEF]], [[V_MOV_B32_e32_]], 1, [[V_MOV_B32_dpp]], 128, 0, 15, 15, 1, implicit $exec
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_dpp %0, %0, 0, 15, 15, 0, implicit $exec
%2:vgpr_32 = V_MOV_B32_dpp %0, %0, 0, 0, 0, 0, implicit $exec
%3:vgpr_32 = V_BITOP3_B32_e64 %1, 1, %2, 128, implicit $exec
...