Skip to content

Commit 7d356e9

Browse files
authored
[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt (llvm#162993)
Early if conversion can create instruction sequences such as ``` mov x1, #1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.
1 parent 385ea0d commit 7d356e9

File tree

3 files changed

+132
-18
lines changed

3 files changed

+132
-18
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -700,16 +700,45 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
700700
// csel instruction. If so, return the folded opcode, and the replacement
701701
// register.
702702
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703-
unsigned *NewVReg = nullptr) {
703+
unsigned *NewReg = nullptr) {
704704
VReg = removeCopies(MRI, VReg);
705705
if (!Register::isVirtualRegister(VReg))
706706
return 0;
707707

708708
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709709
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710710
unsigned Opc = 0;
711-
unsigned SrcOpNum = 0;
711+
unsigned SrcReg = 0;
712712
switch (DefMI->getOpcode()) {
713+
case AArch64::SUBREG_TO_REG:
714+
// Check for the following way to define an 64-bit immediate:
715+
// %0:gpr32 = MOVi32imm 1
716+
// %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717+
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
718+
return 0;
719+
if (!DefMI->getOperand(2).isReg())
720+
return 0;
721+
if (!DefMI->getOperand(3).isImm() ||
722+
DefMI->getOperand(3).getImm() != AArch64::sub_32)
723+
return 0;
724+
DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
725+
if (DefMI->getOpcode() != AArch64::MOVi32imm)
726+
return 0;
727+
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
728+
return 0;
729+
assert(Is64Bit);
730+
SrcReg = AArch64::XZR;
731+
Opc = AArch64::CSINCXr;
732+
break;
733+
734+
case AArch64::MOVi32imm:
735+
case AArch64::MOVi64imm:
736+
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
737+
return 0;
738+
SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
739+
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
740+
break;
741+
713742
case AArch64::ADDSXri:
714743
case AArch64::ADDSWri:
715744
// if NZCV is used, do not fold.
@@ -724,7 +753,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
724753
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725754
DefMI->getOperand(3).getImm() != 0)
726755
return 0;
727-
SrcOpNum = 1;
756+
SrcReg = DefMI->getOperand(1).getReg();
728757
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729758
break;
730759

@@ -734,7 +763,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
734763
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735764
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736765
return 0;
737-
SrcOpNum = 2;
766+
SrcReg = DefMI->getOperand(2).getReg();
738767
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739768
break;
740769
}
@@ -753,17 +782,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
753782
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754783
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755784
return 0;
756-
SrcOpNum = 2;
785+
SrcReg = DefMI->getOperand(2).getReg();
757786
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758787
break;
759788
}
760789
default:
761790
return 0;
762791
}
763-
assert(Opc && SrcOpNum && "Missing parameters");
792+
assert(Opc && SrcReg && "Missing parameters");
764793

765-
if (NewVReg)
766-
*NewVReg = DefMI->getOperand(SrcOpNum).getReg();
794+
if (NewReg)
795+
*NewReg = SrcReg;
767796
return Opc;
768797
}
769798

@@ -964,28 +993,34 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
964993

965994
// Try folding simple instructions into the csel.
966995
if (TryFold) {
967-
unsigned NewVReg = 0;
968-
unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
996+
unsigned NewReg = 0;
997+
unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
969998
if (FoldedOpc) {
970999
// The folded opcodes csinc, csinc and csneg apply the operation to
9711000
// FalseReg, so we need to invert the condition.
9721001
CC = AArch64CC::getInvertedCondCode(CC);
9731002
TrueReg = FalseReg;
9741003
} else
975-
FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
1004+
FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
9761005

9771006
// Fold the operation. Leave any dead instructions for DCE to clean up.
9781007
if (FoldedOpc) {
979-
FalseReg = NewVReg;
1008+
FalseReg = NewReg;
9801009
Opc = FoldedOpc;
981-
// The extends the live range of NewVReg.
982-
MRI.clearKillFlags(NewVReg);
1010+
// Extend the live range of NewReg.
1011+
MRI.clearKillFlags(NewReg);
9831012
}
9841013
}
9851014

9861015
// Pull all virtual register into the appropriate class.
9871016
MRI.constrainRegClass(TrueReg, RC);
988-
MRI.constrainRegClass(FalseReg, RC);
1017+
// FalseReg might be WZR or XZR if the folded operand is a literal 1.
1018+
assert(
1019+
(FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1020+
FalseReg == AArch64::XZR) &&
1021+
"FalseReg was folded into a non-virtual register other than WZR or XZR");
1022+
if (FalseReg.isVirtual())
1023+
MRI.constrainRegClass(FalseReg, RC);
9891024

9901025
// Insert the csel.
9911026
BuildMI(MBB, I, DL, get(Opc), DstReg)

llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155
421421
unreachable
422422
}
423423
declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp
424+
425+
; CHECK-LABEL: fold_imm1_csinc_32:
426+
; CHECK: cmp w0, w1
427+
; CHECK-NEXT: csinc w0, w2, wzr, ge
428+
; CHECK-NEXT: ret
429+
define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp {
430+
entry:
431+
%cmp = icmp slt i32 %x, %y
432+
br i1 %cmp, label %if.then, label %if.else
433+
434+
if.then:
435+
br label %exit
436+
437+
if.else:
438+
br label %exit
439+
440+
exit:
441+
%result = phi i32 [ 1, %if.then ], [ %n, %if.else ]
442+
ret i32 %result
443+
}
444+
445+
; CHECK-LABEL: fold_imm1_csinc_64:
446+
; CHECK: cmp x0, x1
447+
; CHECK-NEXT: csinc x0, x2, xzr, ge
448+
; CHECK-NEXT: ret
449+
define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp {
450+
entry:
451+
%cmp = icmp slt i64 %x, %y
452+
br i1 %cmp, label %if.then, label %if.else
453+
454+
if.then:
455+
br label %exit
456+
457+
if.else:
458+
br label %exit
459+
460+
exit:
461+
%result = phi i64 [ 1, %if.then ], [ %n, %if.else ]
462+
ret i64 %result
463+
}
464+
465+
; CHECK-LABEL: fold_imm1_cset_32:
466+
; CHECK: cmp w0, w1
467+
; CHECK-NEXT: cset w0, lt
468+
; CHECK-NEXT: ret
469+
define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp {
470+
entry:
471+
%cmp = icmp slt i32 %x, %y
472+
br i1 %cmp, label %if.then, label %if.else
473+
474+
if.then:
475+
br label %exit
476+
477+
if.else:
478+
br label %exit
479+
480+
exit:
481+
%result = phi i32 [ 1, %if.then ], [ 0, %if.else ]
482+
ret i32 %result
483+
}
484+
485+
; CHECK-LABEL: fold_imm1_cset_64:
486+
; CHECK: cmp x0, x1
487+
; CHECK-NEXT: cset x0, lt
488+
; CHECK-NEXT: ret
489+
define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp {
490+
entry:
491+
%cmp = icmp slt i64 %x, %y
492+
br i1 %cmp, label %if.then, label %if.else
493+
494+
if.then:
495+
br label %exit
496+
497+
if.else:
498+
br label %exit
499+
500+
exit:
501+
%result = phi i64 [ 1, %if.then ], [ 0, %if.else ]
502+
ret i64 %result
503+
}

llvm/test/CodeGen/AArch64/peephole-csel.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
55
; CHECK-LABEL: peephole_csel:
66
; CHECK: // %bb.0: // %entry
77
; CHECK-NEXT: tst w2, #0x1
8-
; CHECK-NEXT: mov w8, #1 // =0x1
9-
; CHECK-NEXT: mov x9, xzr
8+
; CHECK-NEXT: mov x8, xzr
109
; CHECK-NEXT: tst w1, #0x1
11-
; CHECK-NEXT: csel x8, x8, x9, eq
10+
; CHECK-NEXT: csinc x8, x8, xzr, ne
1211
; CHECK-NEXT: str x8, [x0]
1312
; CHECK-NEXT: ret
1413
entry:

0 commit comments

Comments
 (0)