Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang/include/clang/Basic/arm_mve.td
Original file line number Diff line number Diff line change
Expand Up @@ -1270,13 +1270,13 @@ defm sqrshr: ScalarSaturatingShiftReg<s32, s64>;
def lsll: LongScalarShift<u64, (args s32:$sh), (IRInt<"lsll"> $lo, $hi, $sh)>;
def asrl: LongScalarShift<s64, (args s32:$sh), (IRInt<"asrl"> $lo, $hi, $sh)>;

multiclass vadcsbc {
multiclass vadcsbc<dag initial_carry_in> {
def q: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<NAME, [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def iq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
(seq (IRInt<NAME, [Vector]> $a, $b, 0):$pair,
(seq (IRInt<NAME, [Vector]> $a, $b, initial_carry_in):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def q_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Expand All @@ -1288,13 +1288,13 @@ multiclass vadcsbc {
def iq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<uint>:$carry, Predicate:$pred),
(seq (IRInt<NAME # "_predicated", [Vector, Predicate]> $inactive, $a, $b,
0, $pred):$pair,
initial_carry_in, $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
}
let params = T.Int32 in {
defm vadc: vadcsbc;
defm vsbc: vadcsbc;
defm vadc: vadcsbc<(u32 0)>;
defm vsbc: vadcsbc<(shl 1, 29)>;
}

let params = T.Int in {
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/arm-mve-intrinsics/vadc.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigne

// CHECK-LABEL: @test_vsbciq_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912)
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
Expand All @@ -110,7 +110,7 @@ int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {

// CHECK-LABEL: @test_vsbciq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912)
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
Expand Down Expand Up @@ -170,7 +170,7 @@ uint32x4_t test_vsbcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry) {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
Expand All @@ -190,7 +190,7 @@ int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsign
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5229,7 +5229,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
case Intrinsic::arm_mve_vsbc:
case Intrinsic::arm_mve_vsbc_predicated:
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
IntNo == Intrinsic::arm_mve_vsbc_predicated);
return;
case Intrinsic::arm_mve_vshlc:
Expand Down
92 changes: 90 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, pt
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 536870912)
%1 = extractvalue { <4 x i32>, i32 } %0, 1
%2 = lshr i32 %1, 29
%3 = and i32 %2, 1
Expand All @@ -125,6 +125,46 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, pt
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 536870912)
%1 = extractvalue { <4 x i32>, i32 } %0, 1
%2 = lshr i32 %1, 29
%3 = and i32 %2, 1
store i32 %3, ptr %carry_out, align 4
%4 = extractvalue { <4 x i32>, i32 } %0, 0
ret <4 x i32> %4
}

define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out) {
; CHECK-LABEL: test_vsbcq_s32_carry_in_zero:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
; CHECK-NEXT: vsbc.i32 q0, q0, q1
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
%1 = extractvalue { <4 x i32>, i32 } %0, 1
%2 = lshr i32 %1, 29
%3 = and i32 %2, 1
store i32 %3, ptr %carry_out, align 4
%4 = extractvalue { <4 x i32>, i32 } %0, 0
ret <4 x i32> %4
}

define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out) {
; CHECK-LABEL: test_vsbcq_u32_carry_in_zero:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
; CHECK-NEXT: vsbc.i32 q0, q0, q1
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
%1 = extractvalue { <4 x i32>, i32 } %0, 1
Expand Down Expand Up @@ -196,7 +236,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i3
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 536870912, <4 x i1> %1)
%3 = extractvalue { <4 x i32>, i32 } %2, 1
%4 = lshr i32 %3, 29
%5 = and i32 %4, 1
Expand All @@ -215,6 +255,54 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i3
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 536870912, <4 x i1> %1)
%3 = extractvalue { <4 x i32>, i32 } %2, 1
%4 = lshr i32 %3, 29
%5 = and i32 %4, 1
store i32 %5, ptr %carry_out, align 4
%6 = extractvalue { <4 x i32>, i32 } %2, 0
ret <4 x i32> %6
}

define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out, i16 zeroext %p) {
; CHECK-LABEL: test_vsbcq_m_s32_carry_in_zero:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
; CHECK-NEXT: vpst
; CHECK-NEXT: vsbct.i32 q0, q1, q2
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
%3 = extractvalue { <4 x i32>, i32 } %2, 1
%4 = lshr i32 %3, 29
%5 = and i32 %4, 1
store i32 %5, ptr %carry_out, align 4
%6 = extractvalue { <4 x i32>, i32 } %2, 0
ret <4 x i32> %6
}

define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out, i16 zeroext %p) {
; CHECK-LABEL: test_vsbcq_m_u32_carry_in_zero:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
; CHECK-NEXT: vpst
; CHECK-NEXT: vsbct.i32 q0, q1, q2
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
; CHECK-NEXT: ubfx r1, r1, #29, #1
; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/mve-vadc-vsbc-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ define void @sub_256(<4 x i32> %a_low, <4 x i32> %a_high, <4 x i32> %b_low, <4 x
; CHECK-NEXT: pop.w {r7, lr}
; CHECK-NEXT: b use_int32x4_t
entry:
%adc_low = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a_low, <4 x i32> %b_low, i32 0)
%adc_low = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a_low, <4 x i32> %b_low, i32 536870912)
%carry = extractvalue { <4 x i32>, i32 } %adc_low, 1
%result_low = extractvalue { <4 x i32>, i32 } %adc_low, 0
tail call void @use_int32x4_t(<4 x i32> %result_low)
Expand Down
Loading