Skip to content

Commit 86c4ef5

Browse files
authored
[AArch64] Add patterns for sub from add negative immediates (#156024)
`sub 3` will be canonicalized in llvm to `add -3`. This adds some tablegen patterns for add from a negative immediate so that we can still generate sub imm SVE instructions. The alternative is to add a isel combine, which seemed to work but created problems for mad and index patterns. This version does still need to add a lower-than-default Complexity to the ComplexPatterns to ensure that index was selected over sub-imm + index, as the default Complexity on ComplexPatterns is quite high. Fixes #155928
1 parent ff77c73 commit 86c4ef5

File tree

8 files changed

+67
-56
lines changed

8 files changed

+67
-56
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
246246
return false;
247247
}
248248

249-
template<MVT::SimpleValueType VT>
249+
template <MVT::SimpleValueType VT, bool Negate>
250250
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
251-
return SelectSVEAddSubImm(N, VT, Imm, Shift);
251+
return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
252252
}
253253

254254
template <MVT::SimpleValueType VT, bool Negate>
@@ -489,7 +489,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
489489

490490
bool SelectCMP_SWAP(SDNode *N);
491491

492-
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492+
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493+
bool Negate);
493494
bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
494495
bool Negate);
495496
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -4227,35 +4228,36 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
42274228
}
42284229

42294230
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4230-
SDValue &Shift) {
4231+
SDValue &Shift, bool Negate) {
42314232
if (!isa<ConstantSDNode>(N))
42324233
return false;
42334234

42344235
SDLoc DL(N);
4235-
uint64_t Val = cast<ConstantSDNode>(N)
4236-
->getAPIntValue()
4237-
.trunc(VT.getFixedSizeInBits())
4238-
.getZExtValue();
4236+
APInt Val =
4237+
cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4238+
4239+
if (Negate)
4240+
Val = -Val;
42394241

42404242
switch (VT.SimpleTy) {
42414243
case MVT::i8:
42424244
// All immediates are supported.
42434245
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4244-
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4246+
Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
42454247
return true;
42464248
case MVT::i16:
42474249
case MVT::i32:
42484250
case MVT::i64:
42494251
// Support 8bit unsigned immediates.
4250-
if (Val <= 255) {
4252+
if ((Val & ~0xff) == 0) {
42514253
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4252-
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4254+
Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
42534255
return true;
42544256
}
42554257
// Support 16bit unsigned immediates that are a multiple of 256.
4256-
if (Val <= 65280 && Val % 256 == 0) {
4258+
if ((Val & ~0xff00) == 0) {
42574259
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4258-
Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4260+
Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
42594261
return true;
42604262
}
42614263
break;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ let Predicates = [HasSVE_or_SME, UseExperimentalZeroingPseudos] in {
650650

651651
let Predicates = [HasSVE_or_SME] in {
652652
defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
653-
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
653+
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub, add>;
654654
defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
655655
defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>;
656656
defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,10 +315,16 @@ def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16
315315
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
316316
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;
317317

318-
def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
319-
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
320-
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
321-
def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
318+
let Complexity = 1 in {
319+
def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, false>", []>;
320+
def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, false>", []>;
321+
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, false>", []>;
322+
def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, false>", []>;
323+
324+
def SVEAddSubNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, true>", []>;
325+
def SVEAddSubNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, true>", []>;
326+
def SVEAddSubNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, true>", []>;
327+
def SVEAddSubNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, true>", []>;
322328

323329
def SVEAddSubSSatNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
324330
def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
@@ -329,6 +335,7 @@ def SVEAddSubSSatPosImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MV
329335
def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
330336
def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
331337
def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
338+
} // Complexity = 1
332339

333340
def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
334341
def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
@@ -5221,7 +5228,8 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
52215228
let hasSideEffects = 0;
52225229
}
52235230

5224-
multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
5231+
multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op,
5232+
SDPatternOperator inv_op = null_frag> {
52255233
def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
52265234
def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
52275235
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
@@ -5231,6 +5239,12 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
52315239
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
52325240
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
52335241
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
5242+
5243+
// Extra patterns for add(x, splat(-ve)) -> sub(x, +ve). There is no i8
5244+
// pattern as all i8 constants can be handled by an add.
5245+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubNegImm16Pat, !cast<Instruction>(NAME # _H)>;
5246+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubNegImm32Pat, !cast<Instruction>(NAME # _S)>;
5247+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubNegImm64Pat, !cast<Instruction>(NAME # _D)>;
52345248
}
52355249

52365250
multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,

llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
1313
; CHECK-LABEL: fixed_bitselect_v8i32:
1414
; CHECK: // %bb.0:
1515
; CHECK-NEXT: ptrue p0.s, vl8
16-
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
1716
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
18-
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
17+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1918
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
20-
; CHECK-NEXT: add z1.s, z0.s, z1.s
21-
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
22-
; CHECK-NEXT: and z0.d, z0.d, z2.d
23-
; CHECK-NEXT: and z1.d, z1.d, z3.d
24-
; CHECK-NEXT: orr z0.d, z1.d, z0.d
19+
; CHECK-NEXT: mov z2.d, z0.d
20+
; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
21+
; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
22+
; CHECK-NEXT: and z0.d, z0.d, z3.d
23+
; CHECK-NEXT: and z1.d, z2.d, z1.d
24+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
2525
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
2626
; CHECK-NEXT: ret
2727
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr

llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
9494
define <4 x i32> @v4i32_out_range_start() #0 {
9595
; CHECK-LABEL: v4i32_out_range_start:
9696
; CHECK: // %bb.0:
97-
; CHECK-NEXT: index z0.s, #0, #1
98-
; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
97+
; CHECK-NEXT: mov w8, #16 // =0x10
98+
; CHECK-NEXT: index z0.s, w8, #1
9999
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
100100
; CHECK-NEXT: ret
101101
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>

llvm/test/CodeGen/AArch64/sve-int-imm.ll

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,7 @@ define <vscale x 16 x i8> @addnve_i8_low(<vscale x 16 x i8> %a) {
229229
define <vscale x 8 x i16> @addnve_i16_low(<vscale x 8 x i16> %a) {
230230
; CHECK-LABEL: addnve_i16_low:
231231
; CHECK: // %bb.0:
232-
; CHECK-NEXT: mov z1.h, #-30 // =0xffffffffffffffe2
233-
; CHECK-NEXT: add z0.h, z0.h, z1.h
232+
; CHECK-NEXT: sub z0.h, z0.h, #30 // =0x1e
234233
; CHECK-NEXT: ret
235234
%res = add <vscale x 8 x i16> %a, splat(i16 -30)
236235
ret <vscale x 8 x i16> %res
@@ -248,8 +247,7 @@ define <vscale x 8 x i16> @addnve_i16_high(<vscale x 8 x i16> %a) {
248247
define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
249248
; CHECK-LABEL: addnve_i32_low:
250249
; CHECK: // %bb.0:
251-
; CHECK-NEXT: mov z1.s, #-30 // =0xffffffffffffffe2
252-
; CHECK-NEXT: add z0.s, z0.s, z1.s
250+
; CHECK-NEXT: sub z0.s, z0.s, #30 // =0x1e
253251
; CHECK-NEXT: ret
254252
%res = add <vscale x 4 x i32> %a, splat(i32 -30)
255253
ret <vscale x 4 x i32> %res
@@ -258,8 +256,7 @@ define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
258256
define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
259257
; CHECK-LABEL: addnve_i32_high:
260258
; CHECK: // %bb.0:
261-
; CHECK-NEXT: mov z1.s, #-1024 // =0xfffffffffffffc00
262-
; CHECK-NEXT: add z0.s, z0.s, z1.s
259+
; CHECK-NEXT: sub z0.s, z0.s, #1024 // =0x400
263260
; CHECK-NEXT: ret
264261
%res = add <vscale x 4 x i32> %a, splat(i32 -1024)
265262
ret <vscale x 4 x i32> %res
@@ -268,8 +265,7 @@ define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
268265
define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
269266
; CHECK-LABEL: addnve_i64_low:
270267
; CHECK: // %bb.0:
271-
; CHECK-NEXT: mov z1.d, #-30 // =0xffffffffffffffe2
272-
; CHECK-NEXT: add z0.d, z0.d, z1.d
268+
; CHECK-NEXT: sub z0.d, z0.d, #30 // =0x1e
273269
; CHECK-NEXT: ret
274270
%res = add <vscale x 2 x i64> %a, splat(i64 -30)
275271
ret <vscale x 2 x i64> %res
@@ -278,8 +274,7 @@ define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
278274
define <vscale x 2 x i64> @addnve_i64_high(<vscale x 2 x i64> %a) {
279275
; CHECK-LABEL: addnve_i64_high:
280276
; CHECK: // %bb.0:
281-
; CHECK-NEXT: mov z1.d, #-1024 // =0xfffffffffffffc00
282-
; CHECK-NEXT: add z0.d, z0.d, z1.d
277+
; CHECK-NEXT: sub z0.d, z0.d, #1024 // =0x400
283278
; CHECK-NEXT: ret
284279
%res = add <vscale x 2 x i64> %a, splat(i64 -1024)
285280
ret <vscale x 2 x i64> %res

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,21 @@ target triple = "aarch64"
1414
define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) {
1515
; CHECK-LABEL: fixed_bitselect_v8i32:
1616
; CHECK: // %bb.0:
17-
; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
18-
; CHECK-NEXT: ldp q2, q1, [x0]
17+
; CHECK-NEXT: ldp q1, q0, [x0]
1918
; CHECK-NEXT: ldp q5, q4, [x1]
2019
; CHECK-NEXT: ldp q6, q7, [x2]
21-
; CHECK-NEXT: add z3.s, z1.s, z0.s
22-
; CHECK-NEXT: subr z1.s, z1.s, #0 // =0x0
23-
; CHECK-NEXT: add z0.s, z2.s, z0.s
20+
; CHECK-NEXT: mov z2.d, z0.d
21+
; CHECK-NEXT: mov z3.d, z1.d
22+
; CHECK-NEXT: sub z1.s, z1.s, #1 // =0x1
23+
; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
2424
; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
25-
; CHECK-NEXT: and z1.d, z1.d, z4.d
26-
; CHECK-NEXT: and z3.d, z3.d, z7.d
27-
; CHECK-NEXT: and z0.d, z0.d, z6.d
28-
; CHECK-NEXT: and z2.d, z2.d, z5.d
29-
; CHECK-NEXT: orr z1.d, z3.d, z1.d
30-
; CHECK-NEXT: orr z0.d, z0.d, z2.d
25+
; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
26+
; CHECK-NEXT: and z2.d, z2.d, z4.d
27+
; CHECK-NEXT: and z3.d, z3.d, z5.d
28+
; CHECK-NEXT: and z4.d, z0.d, z7.d
29+
; CHECK-NEXT: and z0.d, z1.d, z6.d
30+
; CHECK-NEXT: orr z1.d, z4.d, z2.d
31+
; CHECK-NEXT: orr z0.d, z0.d, z3.d
3132
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
3233
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
3334
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ define void @build_vector_0_dec3_v8i32(ptr %a) {
6969
; CHECK-LABEL: build_vector_0_dec3_v8i32:
7070
; CHECK: // %bb.0:
7171
; CHECK-NEXT: index z0.s, #0, #-3
72-
; CHECK-NEXT: mov z1.s, #-12 // =0xfffffffffffffff4
73-
; CHECK-NEXT: add z1.s, z0.s, z1.s
74-
; CHECK-NEXT: stp q0, q1, [x0]
72+
; CHECK-NEXT: str q0, [x0]
73+
; CHECK-NEXT: sub z0.s, z0.s, #12 // =0xc
74+
; CHECK-NEXT: str q0, [x0, #16]
7575
; CHECK-NEXT: ret
7676
;
7777
; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32:
@@ -91,11 +91,10 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) {
9191
; CHECK-LABEL: build_vector_minus2_dec32_v4i64:
9292
; CHECK: // %bb.0:
9393
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
94-
; CHECK-NEXT: mov z1.d, #-66 // =0xffffffffffffffbe
95-
; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe
9694
; CHECK-NEXT: index z0.d, #0, x8
97-
; CHECK-NEXT: add z1.d, z0.d, z1.d
98-
; CHECK-NEXT: add z0.d, z0.d, z2.d
95+
; CHECK-NEXT: mov z1.d, z0.d
96+
; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
97+
; CHECK-NEXT: sub z1.d, z1.d, #66 // =0x42
9998
; CHECK-NEXT: stp q0, q1, [x0]
10099
; CHECK-NEXT: ret
101100
;

0 commit comments

Comments
 (0)