Skip to content

Commit 7942675

Browse files
committed
Allow V_MOV_B64_PSEUDO unaligned dst registers
1 parent 1eeadc2 commit 7942675

File tree

3 files changed

+15
-3
lines changed

3 files changed

+15
-3
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5050,7 +5050,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
50505050
// aligned register constraint.
50515051
// FIXME: We do not verify inline asm operands, but custom inline asm
50525052
// verification is broken anyway
5053-
if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5053+
if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5054+
Opcode != AMDGPU::V_MOV_B64_PSEUDO) {
50545055
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
50555056
if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
50565057
if (const TargetRegisterClass *SubRC =
@@ -6003,7 +6004,8 @@ SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned OpNum,
60036004
return nullptr;
60046005
auto RegClass = TID.operands()[OpNum].RegClass;
60056006
// Special pseudos have no alignment requirement.
6006-
if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO || isSpill(TID))
6007+
if (TID.getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO ||
6008+
TID.getOpcode() == AMDGPU::V_MOV_B64_PSEUDO || isSpill(TID))
60076009
return RI.getRegClass(RegClass);
60086010

60096011
return adjustAllocatableRegClass(ST, RI, TID, RegClass);

llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ body: |
425425
bb.0:
426426
; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned
427427
; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
428-
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
428+
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
429429
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
430430
%0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
431431
%1:vreg_64 = COPY killed %0
@@ -438,6 +438,7 @@ body: |
438438
bb.0:
439439
; GCN-LABEL: name: fold_v_mov_b64_pseudo_64_to_unaligned
440440
; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
441+
; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
441442
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
442443
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
443444
%1:vreg_64 = COPY killed %0

llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,12 @@ body: |
9393
bb.0:
9494
$vgpr0_vgpr1 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec
9595
...
96+
97+
# GCN-LABEL: name: v_mov_b64_misalign
98+
# GCN: $vgpr5 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
99+
# GCN: $vgpr6 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
100+
name: v_mov_b64_misalign
101+
body: |
102+
bb.0:
103+
$vgpr5_vgpr6 = V_MOV_B64_PSEUDO 0, implicit $exec
104+
...

0 commit comments

Comments
 (0)