Skip to content

Commit e29a082

Browse files
committed
[ARM] Fix MVE VSBCI instruction selection
The MVE VSBCI instruction should be selected when the carry-in is set, unlike the VADCI instruction which should be selected when the carry-in is zero. This was already implemented in the code, but the function was always called with Add=1, even for the subtract instructions.
1 parent 34d4cd8 commit e29a082

File tree

3 files changed

+92
-4
lines changed

3 files changed

+92
-4
lines changed

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5229,7 +5229,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
52295229
return;
52305230
case Intrinsic::arm_mve_vsbc:
52315231
case Intrinsic::arm_mve_vsbc_predicated:
5232-
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5232+
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
52335233
IntNo == Intrinsic::arm_mve_vsbc_predicated);
52345234
return;
52355235
case Intrinsic::arm_mve_vshlc:

llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_s32(<4 x i32> %a, <4 x i32> %b, pt
108108
; CHECK-NEXT: str r1, [r0]
109109
; CHECK-NEXT: bx lr
110110
entry:
111-
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
111+
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 536870912)
112112
%1 = extractvalue { <4 x i32>, i32 } %0, 1
113113
%2 = lshr i32 %1, 29
114114
%3 = and i32 %2, 1
@@ -125,6 +125,46 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_u32(<4 x i32> %a, <4 x i32> %b, pt
125125
; CHECK-NEXT: ubfx r1, r1, #29, #1
126126
; CHECK-NEXT: str r1, [r0]
127127
; CHECK-NEXT: bx lr
128+
entry:
129+
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 536870912)
130+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
131+
%2 = lshr i32 %1, 29
132+
%3 = and i32 %2, 1
133+
store i32 %3, ptr %carry_out, align 4
134+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
135+
ret <4 x i32> %4
136+
}
137+
138+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_s32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out) {
139+
; CHECK-LABEL: test_vsbcq_s32_carry_in_zero:
140+
; CHECK: @ %bb.0: @ %entry
141+
; CHECK-NEXT: movs r1, #0
142+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
143+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
144+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
145+
; CHECK-NEXT: ubfx r1, r1, #29, #1
146+
; CHECK-NEXT: str r1, [r0]
147+
; CHECK-NEXT: bx lr
148+
entry:
149+
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
150+
%1 = extractvalue { <4 x i32>, i32 } %0, 1
151+
%2 = lshr i32 %1, 29
152+
%3 = and i32 %2, 1
153+
store i32 %3, ptr %carry_out, align 4
154+
%4 = extractvalue { <4 x i32>, i32 } %0, 0
155+
ret <4 x i32> %4
156+
}
157+
158+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_u32_carry_in_zero(<4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out) {
159+
; CHECK-LABEL: test_vsbcq_u32_carry_in_zero:
160+
; CHECK: @ %bb.0: @ %entry
161+
; CHECK-NEXT: movs r1, #0
162+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r1
163+
; CHECK-NEXT: vsbc.i32 q0, q0, q1
164+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
165+
; CHECK-NEXT: ubfx r1, r1, #29, #1
166+
; CHECK-NEXT: str r1, [r0]
167+
; CHECK-NEXT: bx lr
128168
entry:
129169
%0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0)
130170
%1 = extractvalue { <4 x i32>, i32 } %0, 1
@@ -196,7 +236,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_s32(<4 x i32> %inactive, <4 x i3
196236
entry:
197237
%0 = zext i16 %p to i32
198238
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
199-
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
239+
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 536870912, <4 x i1> %1)
200240
%3 = extractvalue { <4 x i32>, i32 } %2, 1
201241
%4 = lshr i32 %3, 29
202242
%5 = and i32 %4, 1
@@ -215,6 +255,54 @@ define arm_aapcs_vfpcc <4 x i32> @test_vsbciq_m_u32(<4 x i32> %inactive, <4 x i3
215255
; CHECK-NEXT: ubfx r1, r1, #29, #1
216256
; CHECK-NEXT: str r1, [r0]
217257
; CHECK-NEXT: bx lr
258+
entry:
259+
%0 = zext i16 %p to i32
260+
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
261+
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 536870912, <4 x i1> %1)
262+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
263+
%4 = lshr i32 %3, 29
264+
%5 = and i32 %4, 1
265+
store i32 %5, ptr %carry_out, align 4
266+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
267+
ret <4 x i32> %6
268+
}
269+
270+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_s32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out, i16 zeroext %p) {
271+
; CHECK-LABEL: test_vsbcq_m_s32_carry_in_zero:
272+
; CHECK: @ %bb.0: @ %entry
273+
; CHECK-NEXT: movs r2, #0
274+
; CHECK-NEXT: vmsr p0, r1
275+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
276+
; CHECK-NEXT: vpst
277+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
278+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
279+
; CHECK-NEXT: ubfx r1, r1, #29, #1
280+
; CHECK-NEXT: str r1, [r0]
281+
; CHECK-NEXT: bx lr
282+
entry:
283+
%0 = zext i16 %p to i32
284+
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
285+
%2 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1)
286+
%3 = extractvalue { <4 x i32>, i32 } %2, 1
287+
%4 = lshr i32 %3, 29
288+
%5 = and i32 %4, 1
289+
store i32 %5, ptr %carry_out, align 4
290+
%6 = extractvalue { <4 x i32>, i32 } %2, 0
291+
ret <4 x i32> %6
292+
}
293+
294+
define arm_aapcs_vfpcc <4 x i32> @test_vsbcq_m_u32_carry_in_zero(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, ptr nocapture %carry_out, i16 zeroext %p) {
295+
; CHECK-LABEL: test_vsbcq_m_u32_carry_in_zero:
296+
; CHECK: @ %bb.0: @ %entry
297+
; CHECK-NEXT: movs r2, #0
298+
; CHECK-NEXT: vmsr p0, r1
299+
; CHECK-NEXT: vmsr fpscr_nzcvqc, r2
300+
; CHECK-NEXT: vpst
301+
; CHECK-NEXT: vsbct.i32 q0, q1, q2
302+
; CHECK-NEXT: vmrs r1, fpscr_nzcvqc
303+
; CHECK-NEXT: ubfx r1, r1, #29, #1
304+
; CHECK-NEXT: str r1, [r0]
305+
; CHECK-NEXT: bx lr
218306
entry:
219307
%0 = zext i16 %p to i32
220308
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)

llvm/test/CodeGen/Thumb2/mve-vadc-vsbc-spill.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ define void @sub_256(<4 x i32> %a_low, <4 x i32> %a_high, <4 x i32> %b_low, <4 x
5959
; CHECK-NEXT: pop.w {r7, lr}
6060
; CHECK-NEXT: b use_int32x4_t
6161
entry:
62-
%adc_low = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a_low, <4 x i32> %b_low, i32 0)
62+
%adc_low = tail call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> %a_low, <4 x i32> %b_low, i32 536870912)
6363
%carry = extractvalue { <4 x i32>, i32 } %adc_low, 1
6464
%result_low = extractvalue { <4 x i32>, i32 } %adc_low, 0
6565
tail call void @use_int32x4_t(<4 x i32> %result_low)

0 commit comments

Comments
 (0)