Skip to content

Commit b9ecee9

Browse files
authored
[AMDGPU] Fix DPP combining into V_BITOP3_B32 (#153083)
1 parent fb70f51 commit b9ecee9

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
421421
AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) {
422422
DPPInst.addImm(ByteSelOpr->getImm());
423423
}
424+
if (MachineOperand *BitOp3 =
425+
TII->getNamedOperand(OrigMI, AMDGPU::OpName::bitop3)) {
426+
assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::bitop3));
427+
DPPInst.add(*BitOp3);
428+
}
424429
}
425430
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
426431
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s -check-prefix=GFX1250
3+
4+
---
5+
name: v_bitop3_dpp
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
; GFX1250-LABEL: name: v_bitop3_dpp
10+
; GFX1250: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
11+
; GFX1250-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
12+
; GFX1250-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec
13+
; GFX1250-NEXT: [[V_BITOP3_B32_e64_dpp:%[0-9]+]]:vgpr_32 = V_BITOP3_B32_e64_dpp [[DEF]], [[V_MOV_B32_e32_]], 1, [[V_MOV_B32_dpp]], 128, 0, 15, 15, 1, implicit $exec
14+
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
15+
%1:vgpr_32 = V_MOV_B32_dpp %0, %0, 0, 15, 15, 0, implicit $exec
16+
%2:vgpr_32 = V_MOV_B32_dpp %0, %0, 0, 0, 0, 0, implicit $exec
17+
%3:vgpr_32 = V_BITOP3_B32_e64 %1, 1, %2, 128, implicit $exec
18+
...

0 commit comments

Comments
 (0)