Skip to content

Commit a024fba

Browse files
committed
[AArch64] Improve lowering of GPR zeroing in copyPhysReg
This patch pivots GPR32 and GPR64 zeroing into distinct branches to simplify the code an improve the lowering. Zeroing GPR moves are now handled differently than non-zeroing ones. Zero source registers WZR and XZR do not require register annotations of undef, implicit and kill. The non-zeroing source now cannot process WZR removing the ternary expression. This patch also moves GPR64 logic right after GPR32 for better organization.
1 parent 6345222 commit a024fba

File tree

2 files changed

+52
-33
lines changed

2 files changed

+52
-33
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5063,7 +5063,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50635063
bool RenamableDest,
50645064
bool RenamableSrc) const {
50655065
if (AArch64::GPR32spRegClass.contains(DestReg) &&
5066-
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5066+
AArch64::GPR32spRegClass.contains(SrcReg)) {
50675067
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
50685068
// If either operand is WSP, expand to ADD #0.
50695069
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
@@ -5088,21 +5088,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50885088
.addImm(0)
50895089
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
50905090
}
5091-
} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5092-
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5093-
.addImm(0)
5094-
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
50955091
} else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
50965092
!Subtarget.hasZeroCycleRegMoveGPR32()) {
50975093
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
50985094
MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
50995095
&AArch64::GPR64spRegClass);
51005096
assert(DestRegX.isValid() && "Destination super-reg not valid");
5101-
MCRegister SrcRegX =
5102-
SrcReg == AArch64::WZR
5103-
? AArch64::XZR
5104-
: RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5105-
&AArch64::GPR64spRegClass);
5097+
MCRegister SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5098+
&AArch64::GPR64spRegClass);
51065099
assert(SrcRegX.isValid() && "Source super-reg not valid");
51075100
// This instruction is reading and writing X registers. This may upset
51085101
// the register scavenger and machine verifier, so we need to indicate
@@ -5121,6 +5114,53 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
51215114
return;
51225115
}
51235116

5117+
// GPR32 zeroing
5118+
if (AArch64::GPR32spRegClass.contains(DestReg) &&
5119+
SrcReg == AArch64::WZR) {
5120+
if (Subtarget.hasZeroCycleZeroingGPR32()) {
5121+
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5122+
.addImm(0)
5123+
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
5124+
} else {
5125+
BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5126+
.addReg(AArch64::WZR)
5127+
.addReg(AArch64::WZR);
5128+
}
5129+
return;
5130+
}
5131+
5132+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
5133+
AArch64::GPR64spRegClass.contains(SrcReg)) {
5134+
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5135+
// If either operand is SP, expand to ADD #0.
5136+
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5137+
.addReg(SrcReg, getKillRegState(KillSrc))
5138+
.addImm(0)
5139+
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
5140+
} else {
5141+
// Otherwise, expand to ORR XZR.
5142+
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5143+
.addReg(AArch64::XZR)
5144+
.addReg(SrcReg, getKillRegState(KillSrc));
5145+
}
5146+
return;
5147+
}
5148+
5149+
// GPR64 zeroing
5150+
if (AArch64::GPR64spRegClass.contains(DestReg) &&
5151+
SrcReg == AArch64::XZR) {
5152+
if (Subtarget.hasZeroCycleZeroingGPR64()) {
5153+
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5154+
.addImm(0)
5155+
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
5156+
} else {
5157+
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5158+
.addReg(AArch64::XZR)
5159+
.addReg(AArch64::XZR);
5160+
}
5161+
return;
5162+
}
5163+
51245164
// Copy a Predicate register by ORRing with itself.
51255165
if (AArch64::PPRRegClass.contains(DestReg) &&
51265166
AArch64::PPRRegClass.contains(SrcReg)) {
@@ -5205,27 +5245,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
52055245
return;
52065246
}
52075247

5208-
if (AArch64::GPR64spRegClass.contains(DestReg) &&
5209-
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5210-
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5211-
// If either operand is SP, expand to ADD #0.
5212-
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5213-
.addReg(SrcReg, getKillRegState(KillSrc))
5214-
.addImm(0)
5215-
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
5216-
} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5217-
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5218-
.addImm(0)
5219-
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
5220-
} else {
5221-
// Otherwise, expand to ORR XZR.
5222-
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5223-
.addReg(AArch64::XZR)
5224-
.addReg(SrcReg, getKillRegState(KillSrc));
5225-
}
5226-
return;
5227-
}
5228-
52295248
// Copy a DDDD register quad by copying the individual sub-registers.
52305249
if (AArch64::DDDDRegClass.contains(DestReg) &&
52315250
AArch64::DDDDRegClass.contains(SrcReg)) {

llvm/test/CodeGen/AArch64/arm64-copy-phys-zero-reg.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ body: |
3939
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f0
4040
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ: liveins: $x0, $lr
4141
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: {{ $}}
42-
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $x0 = ORRXrr $xzr, undef $xzr, implicit $wzr
42+
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: $w0 = ORRWrr $wzr, $wzr
4343
; CHECK-NO-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
4444
;
4545
; CHECK-ZCM-GPR32-ZCM-GPR64-NO-ZCZ-LABEL: name: f0
@@ -103,7 +103,7 @@ body: |
103103
; CHECK-ZCM-ZCZ: liveins: $x0, $lr
104104
; CHECK-ZCM-ZCZ-NEXT: {{ $}}
105105
; CHECK-ZCM-ZCZ-NEXT: $x0 = MOVZXi 0, 0
106-
; CHECK-ZCM-ZCZ-NEXT:BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
106+
; CHECK-ZCM-ZCZ-NEXT: BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
107107
$x0 = COPY $xzr
108108
BL @f2, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
109109
...

0 commit comments

Comments
 (0)