Skip to content

Commit de4092f

Browse files
committed
[AArch64] Convert CSEL(X, 1) into CSINC(X, XZR) in early-ifcvt
Early if conversion can create instruction sequences such as ``` mov x1, #1 csel x0, x1, x2, eq ``` which could be simplified into the following instead ``` csinc x0, x2, xzr, ne ``` One notable example that generates code like this is `cmpxchg weak`. This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so that the addition can be folded into CSEL by using CSINC instead.
1 parent c265d7a commit de4092f

File tree

3 files changed

+120
-9
lines changed

3 files changed

+120
-9
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -708,8 +708,32 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
708708
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709709
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710710
unsigned Opc = 0;
711-
unsigned SrcOpNum = 0;
711+
unsigned SrcReg = 0;
712712
switch (DefMI->getOpcode()) {
713+
case AArch64::SUBREG_TO_REG:
714+
// Check for the following way to define an 64-bit immediate:
715+
// %0:gpr32 = MOVi32imm 1
716+
// %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717+
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
718+
return 0;
719+
if (!DefMI->getOperand(2).isReg())
720+
return 0;
721+
if (!DefMI->getOperand(3).isImm() ||
722+
DefMI->getOperand(3).getImm() != AArch64::sub_32)
723+
return 0;
724+
DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
725+
if (DefMI->getOpcode() != AArch64::MOVi32imm)
726+
return 0;
727+
// fall-through to MOVi32imm case.
728+
[[fallthrough]];
729+
case AArch64::MOVi32imm:
730+
case AArch64::MOVi64imm:
731+
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
732+
return 0;
733+
SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
734+
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
735+
break;
736+
713737
case AArch64::ADDSXri:
714738
case AArch64::ADDSWri:
715739
// if NZCV is used, do not fold.
@@ -724,7 +748,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
724748
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725749
DefMI->getOperand(3).getImm() != 0)
726750
return 0;
727-
SrcOpNum = 1;
751+
SrcReg = DefMI->getOperand(1).getReg();
728752
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729753
break;
730754

@@ -734,7 +758,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
734758
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735759
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736760
return 0;
737-
SrcOpNum = 2;
761+
SrcReg = DefMI->getOperand(2).getReg();
738762
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739763
break;
740764
}
@@ -753,17 +777,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
753777
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754778
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755779
return 0;
756-
SrcOpNum = 2;
780+
SrcReg = DefMI->getOperand(2).getReg();
757781
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758782
break;
759783
}
760784
default:
761785
return 0;
762786
}
763-
assert(Opc && SrcOpNum && "Missing parameters");
787+
assert(Opc && SrcReg && "Missing parameters");
764788

765789
if (NewVReg)
766-
*NewVReg = DefMI->getOperand(SrcOpNum).getReg();
790+
*NewVReg = SrcReg;
767791
return Opc;
768792
}
769793

@@ -976,6 +1000,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
9761000

9771001
// Fold the operation. Leave any dead instructions for DCE to clean up.
9781002
if (FoldedOpc) {
1003+
// NewVReg might be XZR/WZR. In that case create a COPY into a virtual
1004+
// register.
1005+
if (!Register::isVirtualRegister(NewVReg)) {
1006+
unsigned ZeroReg = NewVReg;
1007+
NewVReg = MRI.createVirtualRegister(RC);
1008+
BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg);
1009+
}
1010+
9791011
FalseReg = NewVReg;
9801012
Opc = FoldedOpc;
9811013
// The extends the live range of NewVReg.

llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155
421421
unreachable
422422
}
423423
declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp
424+
425+
; CHECK-LABEL: fold_imm1_csinc_32:
426+
; CHECK: cmp w0, w1
427+
; CHECK-NEXT: csinc w0, w2, wzr, ge
428+
; CHECK-NEXT: ret
429+
define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp {
430+
entry:
431+
%cmp = icmp slt i32 %x, %y
432+
br i1 %cmp, label %if.then, label %if.else
433+
434+
if.then:
435+
br label %exit
436+
437+
if.else:
438+
br label %exit
439+
440+
exit:
441+
%result = phi i32 [ 1, %if.then ], [ %n, %if.else ]
442+
ret i32 %result
443+
}
444+
445+
; CHECK-LABEL: fold_imm1_csinc_64:
446+
; CHECK: cmp x0, x1
447+
; CHECK-NEXT: csinc x0, x2, xzr, ge
448+
; CHECK-NEXT: ret
449+
define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp {
450+
entry:
451+
%cmp = icmp slt i64 %x, %y
452+
br i1 %cmp, label %if.then, label %if.else
453+
454+
if.then:
455+
br label %exit
456+
457+
if.else:
458+
br label %exit
459+
460+
exit:
461+
%result = phi i64 [ 1, %if.then ], [ %n, %if.else ]
462+
ret i64 %result
463+
}
464+
465+
; CHECK-LABEL: fold_imm1_cset_32:
466+
; CHECK: cmp w0, w1
467+
; CHECK-NEXT: cset w0, lt
468+
; CHECK-NEXT: ret
469+
define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp {
470+
entry:
471+
%cmp = icmp slt i32 %x, %y
472+
br i1 %cmp, label %if.then, label %if.else
473+
474+
if.then:
475+
br label %exit
476+
477+
if.else:
478+
br label %exit
479+
480+
exit:
481+
%result = phi i32 [ 1, %if.then ], [ 0, %if.else ]
482+
ret i32 %result
483+
}
484+
485+
; CHECK-LABEL: fold_imm1_cset_64:
486+
; CHECK: cmp x0, x1
487+
; CHECK-NEXT: cset x0, lt
488+
; CHECK-NEXT: ret
489+
define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp {
490+
entry:
491+
%cmp = icmp slt i64 %x, %y
492+
br i1 %cmp, label %if.then, label %if.else
493+
494+
if.then:
495+
br label %exit
496+
497+
if.else:
498+
br label %exit
499+
500+
exit:
501+
%result = phi i64 [ 1, %if.then ], [ 0, %if.else ]
502+
ret i64 %result
503+
}

llvm/test/CodeGen/AArch64/peephole-csel.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
55
; CHECK-LABEL: peephole_csel:
66
; CHECK: // %bb.0: // %entry
77
; CHECK-NEXT: tst w2, #0x1
8-
; CHECK-NEXT: mov w8, #1 // =0x1
9-
; CHECK-NEXT: mov x9, xzr
8+
; CHECK-NEXT: mov x8, xzr
109
; CHECK-NEXT: tst w1, #0x1
11-
; CHECK-NEXT: csel x8, x8, x9, eq
10+
; CHECK-NEXT: csinc x8, x8, xzr, ne
1211
; CHECK-NEXT: str x8, [x0]
1312
; CHECK-NEXT: ret
1413
entry:

0 commit comments

Comments
 (0)