Skip to content

Commit f1bbabd

Browse files
authored
[ARM] Lower arm_neon_vbsl to ARMISD::VBSP and fold (vbsl x, y, y) to y (#109761)
This helps clean up the patterns a little and will help share combines on both the intrinsic and VBSP. A combine is then added to fold away the VBSP if both the selected operands are the same.
1 parent 5a191e3 commit f1bbabd

File tree

3 files changed

+35
-33
lines changed

3 files changed

+35
-33
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17653,6 +17653,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
1765317653
// No immediate versions of these to check for.
1765417654
break;
1765517655

17656+
case Intrinsic::arm_neon_vbsl: {
17657+
SDLoc dl(N);
17658+
return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
17659+
N->getOperand(2), N->getOperand(3));
17660+
}
1765617661
case Intrinsic::arm_mve_vqdmlah:
1765717662
case Intrinsic::arm_mve_vqdmlash:
1765817663
case Intrinsic::arm_mve_vqrdmlah:
@@ -19072,6 +19077,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1907219077
return SDValue();
1907319078
break;
1907419079
}
19080+
case ARMISD::VBSP:
19081+
if (N->getOperand(1) == N->getOperand(2))
19082+
return N->getOperand(1);
19083+
return SDValue();
1907519084
case ISD::INTRINSIC_VOID:
1907619085
case ISD::INTRINSIC_W_CHAIN:
1907719086
switch (N->getConstantOperandVal(1)) {

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5524,26 +5524,23 @@ def : Pat<(v16i8 (vnotq QPR:$src)),
55245524
// with different register constraints; it just inserts copies.
55255525
// That is why pseudo VBSP implemented. Is is expanded later into
55265526
// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
5527-
def VBSPd
5528-
: PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5529-
IIC_VBINiD, "",
5530-
[(set DPR:$Vd,
5531-
(v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5527+
def VBSPd : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5528+
IIC_VBINiD, "", []>;
55325529
let Predicates = [HasNEON] in {
5533-
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5534-
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5530+
def : Pat<(v8i8 (NEONvbsp (v8i8 DPR:$src1),
5531+
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
55355532
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5536-
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5537-
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5533+
def : Pat<(v4i16 (NEONvbsp (v4i16 DPR:$src1),
5534+
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
55385535
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5539-
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5540-
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5536+
def : Pat<(v2i32 (NEONvbsp (v2i32 DPR:$src1),
5537+
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
55415538
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5542-
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5543-
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5539+
def : Pat<(v2f32 (NEONvbsp (v2f32 DPR:$src1),
5540+
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
55445541
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5545-
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5546-
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5542+
def : Pat<(v1i64 (NEONvbsp (v1i64 DPR:$src1),
5543+
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
55475544
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
55485545

55495546
def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd),
@@ -5560,26 +5557,23 @@ def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
55605557
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
55615558
}
55625559

5563-
def VBSPq
5564-
: PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5565-
IIC_VBINiQ, "",
5566-
[(set QPR:$Vd,
5567-
(v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5560+
def VBSPq : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5561+
IIC_VBINiQ, "", []>;
55685562
let Predicates = [HasNEON] in {
5569-
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5570-
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5563+
def : Pat<(v16i8 (NEONvbsp (v16i8 QPR:$src1),
5564+
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
55715565
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5572-
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5573-
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5566+
def : Pat<(v8i16 (NEONvbsp (v8i16 QPR:$src1),
5567+
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
55745568
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5575-
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5576-
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5569+
def : Pat<(v4i32 (NEONvbsp (v4i32 QPR:$src1),
5570+
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
55775571
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5578-
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5579-
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5572+
def : Pat<(v4f32 (NEONvbsp (v4f32 QPR:$src1),
5573+
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
55805574
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5581-
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5582-
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5575+
def : Pat<(v2i64 (NEONvbsp (v2i64 QPR:$src1),
5576+
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
55835577
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
55845578

55855579
def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd),

llvm/test/CodeGen/ARM/vbsl.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw
264264
define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
265265
; CHECK-LABEL: same_param_all:
266266
; CHECK: @ %bb.0:
267-
; CHECK-NEXT: vorr d0, d1, d1
268-
; CHECK-NEXT: vbsl d0, d1, d1
267+
; CHECK-NEXT: vmov.f64 d0, d1
269268
; CHECK-NEXT: bx lr
270269
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b)
271270
ret <8 x i8> %vbsl.i
@@ -274,7 +273,7 @@ define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
274273
define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) {
275274
; CHECK-LABEL: same_param_12:
276275
; CHECK: @ %bb.0:
277-
; CHECK-NEXT: vbsl d0, d1, d1
276+
; CHECK-NEXT: vmov.f64 d0, d1
278277
; CHECK-NEXT: bx lr
279278
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b)
280279
ret <8 x i8> %vbsl.i

0 commit comments

Comments
 (0)