Skip to content

Commit fd77066

Browse files
committed
[EXTERNAL] Fix v_mov_b16_t16 index in folding pass
1 parent d531704 commit fd77066

File tree

4 files changed

+56
-1
lines changed

4 files changed

+56
-1
lines changed

external/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
931931
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
932932
SubDef && TII.isFoldableCopy(*SubDef);
933933
SubDef = MRI.getVRegDef(Sub->getReg())) {
934-
MachineOperand &SrcOp = SubDef->getOperand(1);
934+
unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
935+
MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
936+
935937
if (SrcOp.isImm())
936938
return &SrcOp;
937939
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())

external/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3479,6 +3479,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
34793479
}
34803480
}
34813481

3482+
unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
3483+
switch (MI.getOpcode()) {
3484+
case AMDGPU::V_MOV_B16_t16_e32:
3485+
case AMDGPU::V_MOV_B16_t16_e64:
3486+
return 2;
3487+
case AMDGPU::V_MOV_B32_e32:
3488+
case AMDGPU::V_MOV_B32_e64:
3489+
case AMDGPU::V_MOV_B64_PSEUDO:
3490+
case AMDGPU::V_MOV_B64_e32:
3491+
case AMDGPU::V_MOV_B64_e64:
3492+
case AMDGPU::S_MOV_B32:
3493+
case AMDGPU::S_MOV_B64:
3494+
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3495+
case AMDGPU::COPY:
3496+
case AMDGPU::WWM_COPY:
3497+
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3498+
case AMDGPU::V_ACCVGPR_READ_B32_e64:
3499+
case AMDGPU::V_ACCVGPR_MOV_B32:
3500+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3501+
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3502+
return 1;
3503+
default:
3504+
llvm_unreachable("MI is not a foldable copy");
3505+
}
3506+
}
3507+
34823508
static constexpr AMDGPU::OpName ModifierOpNames[] = {
34833509
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
34843510
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,

external/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
433433
const MachineInstr &MIb) const override;
434434

435435
static bool isFoldableCopy(const MachineInstr &MI);
436+
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
436437

437438
void removeModOperands(MachineInstr &MI) const;
438439

external/llvm-project/llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ body: |
5959
%4:vgpr_16 = COPY %3:sgpr_lo16
6060
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
6161
S_ENDPGM 0, implicit %5
62+
...
6263

6364
---
6465
name: fold_16bit_madmix_clamp
@@ -197,3 +198,28 @@ body: |
197198
$vgpr0 = COPY %4
198199
S_ENDPGM 0, implicit $vgpr0
199200
...
201+
202+
---
203+
name: fold_imm16_across_reg_sequence
204+
tracksRegLiveness: true
205+
registers:
206+
body: |
207+
bb.0:
208+
liveins: $vgpr0, $vgpr1, $vgpr2
209+
; CHECK-LABEL: name: fold_imm16_across_reg_sequence
210+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
211+
; CHECK-NEXT: {{ $}}
212+
; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
213+
; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
214+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16
215+
; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec
216+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]]
217+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
218+
%0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
219+
%1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
220+
%2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16
221+
%3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
222+
$vgpr0 = COPY %3
223+
S_ENDPGM 0, implicit $vgpr0
224+
...
225+

0 commit comments

Comments
 (0)