Skip to content

Commit 3f643ad

Browse files
committed
[ARM] Fix clang codegen for MVE vsbciq intrinsic
The VSBCI instruction behaves as if the carry flag input is set, unlike the VADCI instruction which behaves as if it is clear. The IR intrinsics take and return a carry flag in the format expected in FPSCR, with the carry flag in bit 29, so we need to match that in clang.
1 parent e29a082 commit 3f643ad

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,13 @@ defm sqrshr: ScalarSaturatingShiftReg<s32, s64>;
12701270
def lsll: LongScalarShift<u64, (args s32:$sh), (IRInt<"lsll"> $lo, $hi, $sh)>;
12711271
def asrl: LongScalarShift<s64, (args s32:$sh), (IRInt<"asrl"> $lo, $hi, $sh)>;
12721272

1273-
multiclass vadcsbc {
1273+
multiclass vadcsbc<dag initial_carry_in> {
12741274
def q: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
12751275
(seq (IRInt<NAME, [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
12761276
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
12771277
(xval $pair, 0))>;
12781278
def iq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),
1279-
(seq (IRInt<NAME, [Vector]> $a, $b, 0):$pair,
1279+
(seq (IRInt<NAME, [Vector]> $a, $b, initial_carry_in):$pair,
12801280
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
12811281
(xval $pair, 0))>;
12821282
def q_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
@@ -1288,13 +1288,13 @@ multiclass vadcsbc {
12881288
def iq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
12891289
Ptr<uint>:$carry, Predicate:$pred),
12901290
(seq (IRInt<NAME # "_predicated", [Vector, Predicate]> $inactive, $a, $b,
1291-
0, $pred):$pair,
1291+
initial_carry_in, $pred):$pair,
12921292
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
12931293
(xval $pair, 0))>;
12941294
}
12951295
let params = T.Int32 in {
1296-
defm vadc: vadcsbc;
1297-
defm vsbc: vadcsbc;
1296+
defm vadc: vadcsbc<(u32 0)>;
1297+
defm vsbc: vadcsbc<(shl 1, 29)>;
12981298
}
12991299

13001300
let params = T.Int in {

clang/test/CodeGen/arm-mve-intrinsics/vadc.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigne
9292

9393
// CHECK-LABEL: @test_vsbciq_s32(
9494
// CHECK-NEXT: entry:
95-
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
95+
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912)
9696
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
9797
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
9898
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
@@ -110,7 +110,7 @@ int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {
110110

111111
// CHECK-LABEL: @test_vsbciq_u32(
112112
// CHECK-NEXT: entry:
113-
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
113+
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912)
114114
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
115115
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
116116
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
@@ -170,7 +170,7 @@ uint32x4_t test_vsbcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry) {
170170
// CHECK-NEXT: entry:
171171
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
172172
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
173-
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
173+
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912, <4 x i1> [[TMP1]])
174174
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
175175
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
176176
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
@@ -190,7 +190,7 @@ int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsign
190190
// CHECK-NEXT: entry:
191191
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
192192
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
193-
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
193+
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 536870912, <4 x i1> [[TMP1]])
194194
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
195195
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
196196
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]

0 commit comments

Comments
 (0)