Skip to content

Commit 9811226

Browse files
authored
PeepholeOpt: Try to constrain uses to support subregister (#161338)
This allows removing a special case hack in ARM. ARM's implementation of getExtractSubregLikeInputs has the strange property that it reports a register with a class that does not support the reported subregister index. We can however reconstrain the register to support this usage. This is an alternative to #159600. I've included the test, but the output is different. In this case version the VMOVSR is replaced with an ordinary subregister extract copy.
1 parent 58b4951 commit 9811226

File tree

6 files changed

+112
-21
lines changed

6 files changed

+112
-21
lines changed

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,18 @@ bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) {
12031203
if (!NewSrc.Reg)
12041204
continue;
12051205

1206+
if (NewSrc.SubReg) {
1207+
// Verify the register class supports the subregister index. ARM's
1208+
// copy-like queries return register:subreg pairs where the register's
1209+
// current class does not directly support the subregister index.
1210+
const TargetRegisterClass *RC = MRI->getRegClass(NewSrc.Reg);
1211+
const TargetRegisterClass *WithSubRC =
1212+
TRI->getSubClassWithSubReg(RC, NewSrc.SubReg);
1213+
if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1214+
continue;
1215+
Changed = true;
1216+
}
1217+
12061218
// Rewrite source.
12071219
if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
12081220
// We may have extended the live-range of NewSrc, account for that.
@@ -1275,6 +1287,18 @@ MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
12751287
const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
12761288
Register NewVReg = MRI->createVirtualRegister(DefRC);
12771289

1290+
if (NewSrc.SubReg) {
1291+
const TargetRegisterClass *NewSrcRC = MRI->getRegClass(NewSrc.Reg);
1292+
const TargetRegisterClass *WithSubRC =
1293+
TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.SubReg);
1294+
1295+
// The new source may not directly support the subregister, but we should be
1296+
// able to assume it is constrainable to support the subregister (otherwise
1297+
// ValueTracker was lying and reported a useless value).
1298+
if (!MRI->constrainRegClass(NewSrc.Reg, WithSubRC))
1299+
llvm_unreachable("replacement register cannot support subregister");
1300+
}
1301+
12781302
MachineInstr *NewCopy =
12791303
BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
12801304
TII->get(TargetOpcode::COPY), NewVReg)

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -960,17 +960,3 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
960960
}
961961
return false;
962962
}
963-
964-
bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
965-
unsigned DefSubReg,
966-
const TargetRegisterClass *SrcRC,
967-
unsigned SrcSubReg) const {
968-
// We can't extract an SPR from an arbitary DPR (as opposed to a DPR_VFP2).
969-
if (DefRC == &ARM::SPRRegClass && DefSubReg == 0 &&
970-
SrcRC == &ARM::DPRRegClass &&
971-
(SrcSubReg == ARM::ssub_0 || SrcSubReg == ARM::ssub_1))
972-
return false;
973-
974-
return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
975-
SrcRC, SrcSubReg);
976-
}

llvm/lib/Target/ARM/ARMBaseRegisterInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
158158
const TargetRegisterClass *NewRC,
159159
LiveIntervals &LIS) const override;
160160

161-
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
162-
unsigned DefSubReg,
163-
const TargetRegisterClass *SrcRC,
164-
unsigned SrcSubReg) const override;
165-
166161
int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
167162
};
168163

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s | FileCheck %s
3+
4+
; Make sure there's no assertion from peephole-opt introducing illegal
5+
; subregister index uses.
6+
7+
target triple = "thumbv7-unknown-linux-android29"
8+
9+
define void @_ZN11VersionEdit10DecodeFromEv(i1 %call4, ptr %__profc__ZN11VersionEdit10DecodeFromEv) nounwind {
10+
; CHECK-LABEL: _ZN11VersionEdit10DecodeFromEv:
11+
; CHECK: @ %bb.0: @ %land.rhs.lr.ph
12+
; CHECK-NEXT: lsls r0, r0, #31
13+
; CHECK-NEXT: beq .LBB0_2
14+
; CHECK-NEXT: @ %bb.1:
15+
; CHECK-NEXT: adr r0, .LCPI0_0
16+
; CHECK-NEXT: vld1.64 {d0, d1}, [r0:128]
17+
; CHECK-NEXT: b .LBB0_3
18+
; CHECK-NEXT: .LBB0_2: @ %select.false
19+
; CHECK-NEXT: vmov.i32 q0, #0x0
20+
; CHECK-NEXT: .LBB0_3: @ %select.end
21+
; CHECK-NEXT: vldr s5, .LCPI0_1
22+
; CHECK-NEXT: vldr s4, .LCPI0_2
23+
; CHECK-NEXT: vmov.f32 s6, s0
24+
; CHECK-NEXT: vmov.f32 s7, s1
25+
; CHECK-NEXT: vst1.64 {d2, d3}, [r1]
26+
; CHECK-NEXT: bx lr
27+
; CHECK-NEXT: .p2align 4
28+
; CHECK-NEXT: @ %bb.4:
29+
; CHECK-NEXT: .LCPI0_0:
30+
; CHECK-NEXT: .long 1 @ 0x1
31+
; CHECK-NEXT: .long 0 @ 0x0
32+
; CHECK-NEXT: .long 1 @ 0x1
33+
; CHECK-NEXT: .long 0 @ 0x0
34+
; CHECK-NEXT: .LCPI0_1:
35+
; CHECK-NEXT: .long 0x00000000 @ float 0
36+
; CHECK-NEXT: .LCPI0_2:
37+
; CHECK-NEXT: .long 0x00000001 @ float 1.40129846E-45
38+
land.rhs.lr.ph:
39+
br i1 %call4, label %sw.bb, label %while.cond.while.end_crit_edge.split.loop.exit43
40+
41+
while.cond.while.end_crit_edge.split.loop.exit43: ; preds = %land.rhs.lr.ph
42+
%ext0 = extractelement <4 x i64> zeroinitializer, i64 0
43+
br label %while.cond.while.end_crit_edge
44+
45+
while.cond.while.end_crit_edge: ; preds = %sw.bb, %while.cond.while.end_crit_edge.split.loop.exit43
46+
%pgocount5374.ph = phi i64 [ %ext1, %sw.bb ], [ %ext0, %while.cond.while.end_crit_edge.split.loop.exit43 ]
47+
%ins = insertelement <2 x i64> splat (i64 1), i64 %pgocount5374.ph, i64 1
48+
store <2 x i64> %ins, ptr %__profc__ZN11VersionEdit10DecodeFromEv, align 8
49+
ret void
50+
51+
sw.bb: ; preds = %land.rhs.lr.ph
52+
%ext1 = extractelement <4 x i64> splat (i64 1), i64 0
53+
br label %while.cond.while.end_crit_edge
54+
}
55+

llvm/test/CodeGen/ARM/pr159343.mir

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -mtriple=thumbv7-unknown-linux-android29 %s -o - | FileCheck %s
3+
---
4+
name: Test_shouldRewriteCopySrc_Invalid_SubReg
5+
tracksRegLiveness: true
6+
body: |
7+
bb.1:
8+
liveins: $r0, $r1
9+
10+
; CHECK-LABEL: name: Test_shouldRewriteCopySrc_Invalid_SubReg
11+
; CHECK: liveins: $r0, $r1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[DEF:%[0-9]+]]:dpair = IMPLICIT_DEF
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr_vfp2 = COPY [[DEF]].dsub_0
15+
; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[COPY]], 14 /* CC::al */, $noreg
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]].ssub_1
17+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
18+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:spr = IMPLICIT_DEF
19+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:spr = IMPLICIT_DEF
20+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mqpr = REG_SEQUENCE killed [[DEF2]], %subreg.ssub_0, killed [[DEF1]], %subreg.ssub_1, killed [[DEF3]], %subreg.ssub_2, [[COPY]].ssub_1, %subreg.ssub_3
21+
; CHECK-NEXT: VST1q64 $r1, 0, killed [[REG_SEQUENCE]], 14 /* CC::al */, $noreg
22+
%0:dpair = IMPLICIT_DEF
23+
%1:dpr = COPY %0.dsub_0
24+
%2:gpr, %3:gpr = VMOVRRD killed %1, 14 /* CC::al */, $noreg
25+
%4:spr = VMOVSR killed %3, 14 /* CC::al */, $noreg
26+
%5:spr = IMPLICIT_DEF
27+
%6:spr = IMPLICIT_DEF
28+
%7:spr = IMPLICIT_DEF
29+
%8:mqpr = REG_SEQUENCE killed %6, %subreg.ssub_0, killed %5, %subreg.ssub_1, killed %7, %subreg.ssub_2, killed %4, %subreg.ssub_3
30+
VST1q64 $r1, 0, killed %8, 14 /* CC::al */, $noreg
31+
...

llvm/test/CodeGen/ARM/shouldRewriteCopySrc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ define float @shouldRewriteCopySrc(double %arg) #0 {
1212
; CHECK-NEXT: @APP
1313
; CHECK-NEXT: nop
1414
; CHECK-NEXT: @NO_APP
15-
; CHECK-NEXT: vmov r0, r1, d16
16-
; CHECK-NEXT: vmov s0, r0
15+
; CHECK-NEXT: vmov.f64 d0, d16
16+
; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $d0
1717
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
1818
; CHECK-NEXT: bx lr
1919
bb:

0 commit comments

Comments
 (0)