Skip to content

Commit 6d0f672

Browse files
committed
Fix test cases, test register compatibility with regclass used
1 parent b93b955 commit 6d0f672

File tree

4 files changed

+35
-49
lines changed

4 files changed

+35
-49
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,13 +2146,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21462146
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
21472147
Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
21482148

2149+
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
2150+
auto *TRI = MRI.getTargetRegisterInfo();
2151+
const MCInstrDesc &Mov64Desc = get(AMDGPU::V_MOV_B64_e32);
2152+
const TargetRegisterClass *Mov64RC =
2153+
getRegClass(Mov64Desc, /*OpNum=*/0, TRI);
2154+
21492155
const MachineOperand &SrcOp = MI.getOperand(1);
21502156
// FIXME: Will this work for 64-bit floating point immediates?
21512157
assert(!SrcOp.isFPImm());
2152-
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
2153-
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Dst);
2154-
if (ST.hasMovB64() && RI.isProperlyAlignedRC(*RC)) {
2155-
MI.setDesc(get(AMDGPU::V_MOV_B64_e32));
2158+
if (ST.hasMovB64() && Mov64RC->contains(Dst)) {
2159+
MI.setDesc(Mov64Desc);
21562160
if (SrcOp.isReg() || isInlineConstant(MI, 1) ||
21572161
isUInt<32>(SrcOp.getImm()) || ST.has64BitLiterals())
21582162
break;
@@ -2161,18 +2165,22 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21612165
APInt Imm(64, SrcOp.getImm());
21622166
APInt Lo(32, Imm.getLoBits(32).getZExtValue());
21632167
APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2168+
const MCInstrDesc &PkMovDesc = get(AMDGPU::V_PK_MOV_B32);
2169+
const TargetRegisterClass *PkMovRC =
2170+
getRegClass(PkMovDesc, /*OpNum=*/0, TRI);
2171+
21642172
if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo) &&
2165-
RI.isProperlyAlignedRC(*RC)) {
2166-
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
2167-
.addImm(SISrcMods::OP_SEL_1)
2168-
.addImm(Lo.getSExtValue())
2169-
.addImm(SISrcMods::OP_SEL_1)
2170-
.addImm(Lo.getSExtValue())
2171-
.addImm(0) // op_sel_lo
2172-
.addImm(0) // op_sel_hi
2173-
.addImm(0) // neg_lo
2174-
.addImm(0) // neg_hi
2175-
.addImm(0); // clamp
2173+
PkMovRC->contains(Dst)) {
2174+
BuildMI(MBB, MI, DL, PkMovDesc, Dst)
2175+
.addImm(SISrcMods::OP_SEL_1)
2176+
.addImm(Lo.getSExtValue())
2177+
.addImm(SISrcMods::OP_SEL_1)
2178+
.addImm(Lo.getSExtValue())
2179+
.addImm(0) // op_sel_lo
2180+
.addImm(0) // op_sel_hi
2181+
.addImm(0) // neg_lo
2182+
.addImm(0) // neg_hi
2183+
.addImm(0); // clamp
21762184
} else {
21772185
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
21782186
.addImm(Lo.getSExtValue())

llvm/test/CodeGen/AMDGPU/misaligned-vgpr-regsequence.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-after=si-load-store-opt %s -o - | FileCheck %s
22

3-
# CHECK: "misaligned-regsequence":
3+
# CHECK: misaligned_regsequence:
44
# CHECK: ; %bb.0:
55
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66
# CHECK: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -13,10 +13,10 @@
1313
# CHECK: s_endpgm
1414

1515
--- |
16-
define void @misaligned-regsequence() { ret void }
16+
define void @misaligned_regsequence() { ret void }
1717
...
1818
---
19-
name: misaligned-regsequence
19+
name: misaligned_regsequence
2020
tracksRegLiveness: true
2121
body: |
2222
bb.0:

llvm/test/CodeGen/AMDGPU/v_mov_b64_expansion.mir

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,12 @@ body: |
9393
bb.0:
9494
$vgpr0_vgpr1 = V_MOV_B64_PSEUDO 4575657222473777152, implicit $exec
9595
...
96+
97+
# GCN-LABEL: name: v_mov_b64_misalign
98+
# GCN: $vgpr5 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
99+
# GCN: $vgpr6 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr5_vgpr6
100+
name: v_mov_b64_misalign
101+
body: |
102+
bb.0:
103+
$vgpr5_vgpr6 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
104+
...

llvm/test/CodeGen/AMDGPU/vgpr-mov64-align.mir

Lines changed: 0 additions & 31 deletions
This file was deleted.

0 commit comments

Comments
 (0)