Skip to content

Commit 0df525b

Browse files
authored
[LoongArch] Add patterns to support [x]vadda.{b/h/w/d} generation (#160674)
This commit add patterns for lsx and lasx to support generating `[x]vadda.{b/h/w/d}` instructions. Note: For convenience, this commit also set `ISD::ABS` as legal. As shown in the tests, this brings no change to the results, just same as the results obtained from expanding it before. But, setting it as legal brings more vectorization opportunities to IR transformation which may bring more vector optimization chances for later stages and the backend.
1 parent 0fc6213 commit 0df525b

File tree

7 files changed

+298
-40
lines changed

7 files changed

+298
-40
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
340340
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
341341
Expand);
342342
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
343+
setOperationAction(ISD::ABS, VT, Legal);
343344
setOperationAction(ISD::ABDS, VT, Legal);
344345
setOperationAction(ISD::ABDU, VT, Legal);
345346
setOperationAction(ISD::SADDSAT, VT, Legal);
@@ -419,6 +420,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
419420
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
420421
Expand);
421422
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
423+
setOperationAction(ISD::ABS, VT, Legal);
422424
setOperationAction(ISD::ABDS, VT, Legal);
423425
setOperationAction(ISD::ABDU, VT, Legal);
424426
setOperationAction(ISD::SADDSAT, VT, Legal);

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,10 +2015,26 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
20152015
(XVFTINTRZ_LU_D v4f64:$vj)),
20162016
sub_128)>;
20172017

2018+
// abs
2019+
def : Pat<(abs v32i8:$xj), (XVMAX_B v32i8:$xj, (XVNEG_B v32i8:$xj))>;
2020+
def : Pat<(abs v16i16:$xj), (XVMAX_H v16i16:$xj, (XVNEG_H v16i16:$xj))>;
2021+
def : Pat<(abs v8i32:$xj), (XVMAX_W v8i32:$xj, (XVNEG_W v8i32:$xj))>;
2022+
def : Pat<(abs v4i64:$xj), (XVMAX_D v4i64:$xj, (XVNEG_D v4i64:$xj))>;
2023+
20182024
// XVABSD_{B/H/W/D}[U]
20192025
defm : PatXrXr<abds, "XVABSD">;
20202026
defm : PatXrXrU<abdu, "XVABSD">;
20212027

2028+
// XVADDA_{B/H/W/D}
2029+
def : Pat<(add (v32i8 (abs v32i8:$xj)), (v32i8 (abs v32i8:$xk))),
2030+
(XVADDA_B v32i8:$xj, v32i8:$xk)>;
2031+
def : Pat<(add (v16i16 (abs v16i16:$xj)), (v16i16 (abs v16i16:$xk))),
2032+
(XVADDA_H v16i16:$xj, v16i16:$xk)>;
2033+
def : Pat<(add (v8i32 (abs v8i32:$xj)), (v8i32 (abs v8i32:$xk))),
2034+
(XVADDA_W v8i32:$xj, v8i32:$xk)>;
2035+
def : Pat<(add (v4i64 (abs v4i64:$xj)), (v4i64 (abs v4i64:$xk))),
2036+
(XVADDA_D v4i64:$xj, v4i64:$xk)>;
2037+
20222038
// XVSADD_{B/H/W/D}[U], XVSSUB_{B/H/W/D}[U]
20232039
defm : PatXrXr<saddsat, "XVSADD">;
20242040
defm : PatXrXr<ssubsat, "XVSSUB">;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,10 +2154,26 @@ def : Pat<(f32 f32imm_vldi:$in),
21542154
def : Pat<(f64 f64imm_vldi:$in),
21552155
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
21562156

2157+
// abs
2158+
def : Pat<(abs v16i8:$vj), (VMAX_B v16i8:$vj, (VNEG_B v16i8:$vj))>;
2159+
def : Pat<(abs v8i16:$vj), (VMAX_H v8i16:$vj, (VNEG_H v8i16:$vj))>;
2160+
def : Pat<(abs v4i32:$vj), (VMAX_W v4i32:$vj, (VNEG_W v4i32:$vj))>;
2161+
def : Pat<(abs v2i64:$vj), (VMAX_D v2i64:$vj, (VNEG_D v2i64:$vj))>;
2162+
21572163
// VABSD_{B/H/W/D}[U]
21582164
defm : PatVrVr<abds, "VABSD">;
21592165
defm : PatVrVrU<abdu, "VABSD">;
21602166

2167+
// VADDA_{B/H/W/D}
2168+
def : Pat<(add (v16i8 (abs v16i8:$vj)), (v16i8 (abs v16i8:$vk))),
2169+
(VADDA_B v16i8:$vj, v16i8:$vk)>;
2170+
def : Pat<(add (v8i16 (abs v8i16:$vj)), (v8i16 (abs v8i16:$vk))),
2171+
(VADDA_H v8i16:$vj, v8i16:$vk)>;
2172+
def : Pat<(add (v4i32 (abs v4i32:$vj)), (v4i32 (abs v4i32:$vk))),
2173+
(VADDA_W v4i32:$vj, v4i32:$vk)>;
2174+
def : Pat<(add (v2i64 (abs v2i64:$vj)), (v2i64 (abs v2i64:$vk))),
2175+
(VADDA_D v2i64:$vj, v2i64:$vk)>;
2176+
21612177
// VSADD_{B/H/W/D}[U], VSSUB_{B/H/W/D}[U]
21622178
defm : PatVrVr<saddsat, "VSADD">;
21632179
defm : PatVrVr<ssubsat, "VSSUB">;
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
4+
5+
define void @vabs_b(ptr %dst, ptr %src) {
6+
; CHECK-LABEL: vabs_b:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: xvld $xr0, $a1, 0
9+
; CHECK-NEXT: xvneg.b $xr1, $xr0
10+
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
11+
; CHECK-NEXT: xvst $xr0, $a0, 0
12+
; CHECK-NEXT: ret
13+
entry:
14+
%a = load <32 x i8>, ptr %src
15+
%b = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 true)
16+
store <32 x i8> %b, ptr %dst
17+
ret void
18+
}
19+
20+
define void @vabs_b_1(ptr %dst, ptr %src) {
21+
; CHECK-LABEL: vabs_b_1:
22+
; CHECK: # %bb.0: # %entry
23+
; CHECK-NEXT: xvld $xr0, $a1, 0
24+
; CHECK-NEXT: xvneg.b $xr1, $xr0
25+
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
26+
; CHECK-NEXT: xvst $xr0, $a0, 0
27+
; CHECK-NEXT: ret
28+
entry:
29+
%a = load <32 x i8>, ptr %src
30+
%b = tail call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 false)
31+
store <32 x i8> %b, ptr %dst
32+
ret void
33+
}
34+
35+
define void @vabs_h(ptr %dst, ptr %src) {
36+
; CHECK-LABEL: vabs_h:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: xvld $xr0, $a1, 0
39+
; CHECK-NEXT: xvneg.h $xr1, $xr0
40+
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
41+
; CHECK-NEXT: xvst $xr0, $a0, 0
42+
; CHECK-NEXT: ret
43+
entry:
44+
%a = load <16 x i16>, ptr %src
45+
%b = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 true)
46+
store <16 x i16> %b, ptr %dst
47+
ret void
48+
}
49+
50+
define void @vabs_h_1(ptr %dst, ptr %src) {
51+
; CHECK-LABEL: vabs_h_1:
52+
; CHECK: # %bb.0: # %entry
53+
; CHECK-NEXT: xvld $xr0, $a1, 0
54+
; CHECK-NEXT: xvneg.h $xr1, $xr0
55+
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
56+
; CHECK-NEXT: xvst $xr0, $a0, 0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%a = load <16 x i16>, ptr %src
60+
%b = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 false)
61+
store <16 x i16> %b, ptr %dst
62+
ret void
63+
}
64+
65+
define void @vabs_w(ptr %dst, ptr %src) {
66+
; CHECK-LABEL: vabs_w:
67+
; CHECK: # %bb.0: # %entry
68+
; CHECK-NEXT: xvld $xr0, $a1, 0
69+
; CHECK-NEXT: xvneg.w $xr1, $xr0
70+
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
71+
; CHECK-NEXT: xvst $xr0, $a0, 0
72+
; CHECK-NEXT: ret
73+
entry:
74+
%a = load <8 x i32>, ptr %src
75+
%b = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 true)
76+
store <8 x i32> %b, ptr %dst
77+
ret void
78+
}
79+
80+
define void @vabs_w_1(ptr %dst, ptr %src) {
81+
; CHECK-LABEL: vabs_w_1:
82+
; CHECK: # %bb.0: # %entry
83+
; CHECK-NEXT: xvld $xr0, $a1, 0
84+
; CHECK-NEXT: xvneg.w $xr1, $xr0
85+
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
86+
; CHECK-NEXT: xvst $xr0, $a0, 0
87+
; CHECK-NEXT: ret
88+
entry:
89+
%a = load <8 x i32>, ptr %src
90+
%b = tail call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 false)
91+
store <8 x i32> %b, ptr %dst
92+
ret void
93+
}
94+
95+
define void @vabs_d(ptr %dst, ptr %src) {
96+
; CHECK-LABEL: vabs_d:
97+
; CHECK: # %bb.0: # %entry
98+
; CHECK-NEXT: xvld $xr0, $a1, 0
99+
; CHECK-NEXT: xvneg.d $xr1, $xr0
100+
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
101+
; CHECK-NEXT: xvst $xr0, $a0, 0
102+
; CHECK-NEXT: ret
103+
entry:
104+
%a = load <4 x i64>, ptr %src
105+
%b = tail call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 true)
106+
store <4 x i64> %b, ptr %dst
107+
ret void
108+
}
109+
110+
define void @vabs_d_1(ptr %dst, ptr %src) {
111+
; CHECK-LABEL: vabs_d_1:
112+
; CHECK: # %bb.0: # %entry
113+
; CHECK-NEXT: xvld $xr0, $a1, 0
114+
; CHECK-NEXT: xvneg.d $xr1, $xr0
115+
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
116+
; CHECK-NEXT: xvst $xr0, $a0, 0
117+
; CHECK-NEXT: ret
118+
entry:
119+
%a = load <4 x i64>, ptr %src
120+
%b = tail call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 false)
121+
store <4 x i64> %b, ptr %dst
122+
ret void
123+
}
124+
125+
declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1)
126+
declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
127+
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
128+
declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/adda.ll

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,7 @@ define void @vadda_b(ptr %res, ptr %a, ptr %b) nounwind {
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: xvld $xr0, $a1, 0
99
; CHECK-NEXT: xvld $xr1, $a2, 0
10-
; CHECK-NEXT: xvneg.b $xr2, $xr0
11-
; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr2
12-
; CHECK-NEXT: xvneg.b $xr2, $xr1
13-
; CHECK-NEXT: xvmax.b $xr1, $xr1, $xr2
14-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
10+
; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1
1511
; CHECK-NEXT: xvst $xr0, $a0, 0
1612
; CHECK-NEXT: ret
1713
entry:
@@ -33,11 +29,7 @@ define void @vadda_h(ptr %res, ptr %a, ptr %b) nounwind {
3329
; CHECK: # %bb.0: # %entry
3430
; CHECK-NEXT: xvld $xr0, $a1, 0
3531
; CHECK-NEXT: xvld $xr1, $a2, 0
36-
; CHECK-NEXT: xvneg.h $xr2, $xr0
37-
; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr2
38-
; CHECK-NEXT: xvneg.h $xr2, $xr1
39-
; CHECK-NEXT: xvmax.h $xr1, $xr1, $xr2
40-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
32+
; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1
4133
; CHECK-NEXT: xvst $xr0, $a0, 0
4234
; CHECK-NEXT: ret
4335
entry:
@@ -59,11 +51,7 @@ define void @vadda_w(ptr %res, ptr %a, ptr %b) nounwind {
5951
; CHECK: # %bb.0: # %entry
6052
; CHECK-NEXT: xvld $xr0, $a1, 0
6153
; CHECK-NEXT: xvld $xr1, $a2, 0
62-
; CHECK-NEXT: xvneg.w $xr2, $xr0
63-
; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr2
64-
; CHECK-NEXT: xvneg.w $xr2, $xr1
65-
; CHECK-NEXT: xvmax.w $xr1, $xr1, $xr2
66-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
54+
; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1
6755
; CHECK-NEXT: xvst $xr0, $a0, 0
6856
; CHECK-NEXT: ret
6957
entry:
@@ -85,11 +73,7 @@ define void @vadda_d(ptr %res, ptr %a, ptr %b) nounwind {
8573
; CHECK: # %bb.0: # %entry
8674
; CHECK-NEXT: xvld $xr0, $a1, 0
8775
; CHECK-NEXT: xvld $xr1, $a2, 0
88-
; CHECK-NEXT: xvneg.d $xr2, $xr0
89-
; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr2
90-
; CHECK-NEXT: xvneg.d $xr2, $xr1
91-
; CHECK-NEXT: xvmax.d $xr1, $xr1, $xr2
92-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
76+
; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1
9377
; CHECK-NEXT: xvst $xr0, $a0, 0
9478
; CHECK-NEXT: ret
9579
entry:
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
4+
5+
define void @vabs_b(ptr %dst, ptr %src) {
6+
; CHECK-LABEL: vabs_b:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vld $vr0, $a1, 0
9+
; CHECK-NEXT: vneg.b $vr1, $vr0
10+
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
11+
; CHECK-NEXT: vst $vr0, $a0, 0
12+
; CHECK-NEXT: ret
13+
entry:
14+
%a = load <16 x i8>, ptr %src
15+
%b = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 true)
16+
store <16 x i8> %b, ptr %dst
17+
ret void
18+
}
19+
20+
define void @vabs_b_1(ptr %dst, ptr %src) {
21+
; CHECK-LABEL: vabs_b_1:
22+
; CHECK: # %bb.0: # %entry
23+
; CHECK-NEXT: vld $vr0, $a1, 0
24+
; CHECK-NEXT: vneg.b $vr1, $vr0
25+
; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
26+
; CHECK-NEXT: vst $vr0, $a0, 0
27+
; CHECK-NEXT: ret
28+
entry:
29+
%a = load <16 x i8>, ptr %src
30+
%b = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 false)
31+
store <16 x i8> %b, ptr %dst
32+
ret void
33+
}
34+
35+
define void @vabs_h(ptr %dst, ptr %src) {
36+
; CHECK-LABEL: vabs_h:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: vld $vr0, $a1, 0
39+
; CHECK-NEXT: vneg.h $vr1, $vr0
40+
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
41+
; CHECK-NEXT: vst $vr0, $a0, 0
42+
; CHECK-NEXT: ret
43+
entry:
44+
%a = load <8 x i16>, ptr %src
45+
%b = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 true)
46+
store <8 x i16> %b, ptr %dst
47+
ret void
48+
}
49+
50+
define void @vabs_h_1(ptr %dst, ptr %src) {
51+
; CHECK-LABEL: vabs_h_1:
52+
; CHECK: # %bb.0: # %entry
53+
; CHECK-NEXT: vld $vr0, $a1, 0
54+
; CHECK-NEXT: vneg.h $vr1, $vr0
55+
; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
56+
; CHECK-NEXT: vst $vr0, $a0, 0
57+
; CHECK-NEXT: ret
58+
entry:
59+
%a = load <8 x i16>, ptr %src
60+
%b = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 false)
61+
store <8 x i16> %b, ptr %dst
62+
ret void
63+
}
64+
65+
define void @vabs_w(ptr %dst, ptr %src) {
66+
; CHECK-LABEL: vabs_w:
67+
; CHECK: # %bb.0: # %entry
68+
; CHECK-NEXT: vld $vr0, $a1, 0
69+
; CHECK-NEXT: vneg.w $vr1, $vr0
70+
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
71+
; CHECK-NEXT: vst $vr0, $a0, 0
72+
; CHECK-NEXT: ret
73+
entry:
74+
%a = load <4 x i32>, ptr %src
75+
%b = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 true)
76+
store <4 x i32> %b, ptr %dst
77+
ret void
78+
}
79+
80+
define void @vabs_w_1(ptr %dst, ptr %src) {
81+
; CHECK-LABEL: vabs_w_1:
82+
; CHECK: # %bb.0: # %entry
83+
; CHECK-NEXT: vld $vr0, $a1, 0
84+
; CHECK-NEXT: vneg.w $vr1, $vr0
85+
; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
86+
; CHECK-NEXT: vst $vr0, $a0, 0
87+
; CHECK-NEXT: ret
88+
entry:
89+
%a = load <4 x i32>, ptr %src
90+
%b = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false)
91+
store <4 x i32> %b, ptr %dst
92+
ret void
93+
}
94+
95+
define void @vabs_d(ptr %dst, ptr %src) {
96+
; CHECK-LABEL: vabs_d:
97+
; CHECK: # %bb.0: # %entry
98+
; CHECK-NEXT: vld $vr0, $a1, 0
99+
; CHECK-NEXT: vneg.d $vr1, $vr0
100+
; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
101+
; CHECK-NEXT: vst $vr0, $a0, 0
102+
; CHECK-NEXT: ret
103+
entry:
104+
%a = load <2 x i64>, ptr %src
105+
%b = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 true)
106+
store <2 x i64> %b, ptr %dst
107+
ret void
108+
}
109+
110+
define void @vabs_d_1(ptr %dst, ptr %src) {
111+
; CHECK-LABEL: vabs_d_1:
112+
; CHECK: # %bb.0: # %entry
113+
; CHECK-NEXT: vld $vr0, $a1, 0
114+
; CHECK-NEXT: vneg.d $vr1, $vr0
115+
; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
116+
; CHECK-NEXT: vst $vr0, $a0, 0
117+
; CHECK-NEXT: ret
118+
entry:
119+
%a = load <2 x i64>, ptr %src
120+
%b = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 false)
121+
store <2 x i64> %b, ptr %dst
122+
ret void
123+
}
124+
125+
declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
126+
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
127+
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
128+
declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)

0 commit comments

Comments
 (0)