Skip to content

Commit 27eabd5

Browse files
authored
[LoongArch] Add patterns to support vector type average instructions generation (#161079)
NOTE: For simplicity and convenience, `v2i64/v4i64` types on LA32 is not optimized. If hoping to implement this in the future, special handling for `bitcast` and `build_vector` is needed.
1 parent d9e5e72 commit 27eabd5

File tree

4 files changed

+208
-132
lines changed

4 files changed

+208
-132
lines changed

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
20272027
(XVFTINTRZ_LU_D v4f64:$vj)),
20282028
sub_128)>;
20292029

2030+
// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
2031+
defm : VAvgPat<sra, "XVAVG_B", v32i8>;
2032+
defm : VAvgPat<sra, "XVAVG_H", v16i16>;
2033+
defm : VAvgPat<sra, "XVAVG_W", v8i32>;
2034+
defm : VAvgPat<sra, "XVAVG_D", v4i64>;
2035+
defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
2036+
defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
2037+
defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
2038+
defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
2039+
defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
2040+
defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
2041+
defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
2042+
defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
2043+
defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
2044+
defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
2045+
defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
2046+
defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
2047+
20302048
// abs
20312049
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
20322050
def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
15181518
}
15191519
}
15201520

1521+
multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
1522+
def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
1523+
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
1524+
}
1525+
1526+
multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
1527+
def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
1528+
(vt (vsplat_imm_eq_1)))),
1529+
(vt (vsplat_imm_eq_1))),
1530+
(!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
1531+
}
1532+
15211533
let Predicates = [HasExtLSX] in {
15221534

15231535
// VADD_{B/H/W/D}
@@ -2157,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in),
21572169
def : Pat<(f64 f64imm_vldi:$in),
21582170
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
21592171

2172+
// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
2173+
defm : VAvgPat<sra, "VAVG_B", v16i8>;
2174+
defm : VAvgPat<sra, "VAVG_H", v8i16>;
2175+
defm : VAvgPat<sra, "VAVG_W", v4i32>;
2176+
defm : VAvgPat<sra, "VAVG_D", v2i64>;
2177+
defm : VAvgPat<srl, "VAVG_BU", v16i8>;
2178+
defm : VAvgPat<srl, "VAVG_HU", v8i16>;
2179+
defm : VAvgPat<srl, "VAVG_WU", v4i32>;
2180+
defm : VAvgPat<srl, "VAVG_DU", v2i64>;
2181+
defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
2182+
defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
2183+
defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
2184+
defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
2185+
defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
2186+
defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
2187+
defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
2188+
defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
2189+
21602190
// abs
21612191
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
21622192
def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll

Lines changed: 80 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3-
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
44

55
define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind {
66
; CHECK-LABEL: xvavg_b:
77
; CHECK: # %bb.0: # %entry
88
; CHECK-NEXT: xvld $xr0, $a1, 0
99
; CHECK-NEXT: xvld $xr1, $a2, 0
10-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
11-
; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
10+
; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1
1211
; CHECK-NEXT: xvst $xr0, $a0, 0
1312
; CHECK-NEXT: ret
1413
entry:
@@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind {
2524
; CHECK: # %bb.0: # %entry
2625
; CHECK-NEXT: xvld $xr0, $a1, 0
2726
; CHECK-NEXT: xvld $xr1, $a2, 0
28-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
29-
; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
27+
; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1
3028
; CHECK-NEXT: xvst $xr0, $a0, 0
3129
; CHECK-NEXT: ret
3230
entry:
@@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind {
4341
; CHECK: # %bb.0: # %entry
4442
; CHECK-NEXT: xvld $xr0, $a1, 0
4543
; CHECK-NEXT: xvld $xr1, $a2, 0
46-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
47-
; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
44+
; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1
4845
; CHECK-NEXT: xvst $xr0, $a0, 0
4946
; CHECK-NEXT: ret
5047
entry:
@@ -57,14 +54,22 @@ entry:
5754
}
5855

5956
define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind {
60-
; CHECK-LABEL: xvavg_d:
61-
; CHECK: # %bb.0: # %entry
62-
; CHECK-NEXT: xvld $xr0, $a1, 0
63-
; CHECK-NEXT: xvld $xr1, $a2, 0
64-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
65-
; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
66-
; CHECK-NEXT: xvst $xr0, $a0, 0
67-
; CHECK-NEXT: ret
57+
; LA32-LABEL: xvavg_d:
58+
; LA32: # %bb.0: # %entry
59+
; LA32-NEXT: xvld $xr0, $a1, 0
60+
; LA32-NEXT: xvld $xr1, $a2, 0
61+
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
62+
; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
63+
; LA32-NEXT: xvst $xr0, $a0, 0
64+
; LA32-NEXT: ret
65+
;
66+
; LA64-LABEL: xvavg_d:
67+
; LA64: # %bb.0: # %entry
68+
; LA64-NEXT: xvld $xr0, $a1, 0
69+
; LA64-NEXT: xvld $xr1, $a2, 0
70+
; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1
71+
; LA64-NEXT: xvst $xr0, $a0, 0
72+
; LA64-NEXT: ret
6873
entry:
6974
%va = load <4 x i64>, ptr %a
7075
%vb = load <4 x i64>, ptr %b
@@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind {
7984
; CHECK: # %bb.0: # %entry
8085
; CHECK-NEXT: xvld $xr0, $a1, 0
8186
; CHECK-NEXT: xvld $xr1, $a2, 0
82-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
83-
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
87+
; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1
8488
; CHECK-NEXT: xvst $xr0, $a0, 0
8589
; CHECK-NEXT: ret
8690
entry:
@@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind {
97101
; CHECK: # %bb.0: # %entry
98102
; CHECK-NEXT: xvld $xr0, $a1, 0
99103
; CHECK-NEXT: xvld $xr1, $a2, 0
100-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
101-
; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
104+
; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1
102105
; CHECK-NEXT: xvst $xr0, $a0, 0
103106
; CHECK-NEXT: ret
104107
entry:
@@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind {
115118
; CHECK: # %bb.0: # %entry
116119
; CHECK-NEXT: xvld $xr0, $a1, 0
117120
; CHECK-NEXT: xvld $xr1, $a2, 0
118-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
119-
; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
121+
; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1
120122
; CHECK-NEXT: xvst $xr0, $a0, 0
121123
; CHECK-NEXT: ret
122124
entry:
@@ -129,14 +131,22 @@ entry:
129131
}
130132

131133
define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind {
132-
; CHECK-LABEL: xvavg_du:
133-
; CHECK: # %bb.0: # %entry
134-
; CHECK-NEXT: xvld $xr0, $a1, 0
135-
; CHECK-NEXT: xvld $xr1, $a2, 0
136-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
137-
; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
138-
; CHECK-NEXT: xvst $xr0, $a0, 0
139-
; CHECK-NEXT: ret
134+
; LA32-LABEL: xvavg_du:
135+
; LA32: # %bb.0: # %entry
136+
; LA32-NEXT: xvld $xr0, $a1, 0
137+
; LA32-NEXT: xvld $xr1, $a2, 0
138+
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
139+
; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
140+
; LA32-NEXT: xvst $xr0, $a0, 0
141+
; LA32-NEXT: ret
142+
;
143+
; LA64-LABEL: xvavg_du:
144+
; LA64: # %bb.0: # %entry
145+
; LA64-NEXT: xvld $xr0, $a1, 0
146+
; LA64-NEXT: xvld $xr1, $a2, 0
147+
; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1
148+
; LA64-NEXT: xvst $xr0, $a0, 0
149+
; LA64-NEXT: ret
140150
entry:
141151
%va = load <4 x i64>, ptr %a
142152
%vb = load <4 x i64>, ptr %b
@@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind {
151161
; CHECK: # %bb.0: # %entry
152162
; CHECK-NEXT: xvld $xr0, $a1, 0
153163
; CHECK-NEXT: xvld $xr1, $a2, 0
154-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
155-
; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
156-
; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1
164+
; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1
157165
; CHECK-NEXT: xvst $xr0, $a0, 0
158166
; CHECK-NEXT: ret
159167
entry:
@@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind {
171179
; CHECK: # %bb.0: # %entry
172180
; CHECK-NEXT: xvld $xr0, $a1, 0
173181
; CHECK-NEXT: xvld $xr1, $a2, 0
174-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
175-
; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
176-
; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1
182+
; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1
177183
; CHECK-NEXT: xvst $xr0, $a0, 0
178184
; CHECK-NEXT: ret
179185
entry:
@@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind {
191197
; CHECK: # %bb.0: # %entry
192198
; CHECK-NEXT: xvld $xr0, $a1, 0
193199
; CHECK-NEXT: xvld $xr1, $a2, 0
194-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
195-
; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
196-
; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1
200+
; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1
197201
; CHECK-NEXT: xvst $xr0, $a0, 0
198202
; CHECK-NEXT: ret
199203
entry:
@@ -207,15 +211,23 @@ entry:
207211
}
208212

209213
define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind {
210-
; CHECK-LABEL: xvavgr_d:
211-
; CHECK: # %bb.0: # %entry
212-
; CHECK-NEXT: xvld $xr0, $a1, 0
213-
; CHECK-NEXT: xvld $xr1, $a2, 0
214-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
215-
; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
216-
; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1
217-
; CHECK-NEXT: xvst $xr0, $a0, 0
218-
; CHECK-NEXT: ret
214+
; LA32-LABEL: xvavgr_d:
215+
; LA32: # %bb.0: # %entry
216+
; LA32-NEXT: xvld $xr0, $a1, 0
217+
; LA32-NEXT: xvld $xr1, $a2, 0
218+
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
219+
; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
220+
; LA32-NEXT: xvsrai.d $xr0, $xr0, 1
221+
; LA32-NEXT: xvst $xr0, $a0, 0
222+
; LA32-NEXT: ret
223+
;
224+
; LA64-LABEL: xvavgr_d:
225+
; LA64: # %bb.0: # %entry
226+
; LA64-NEXT: xvld $xr0, $a1, 0
227+
; LA64-NEXT: xvld $xr1, $a2, 0
228+
; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1
229+
; LA64-NEXT: xvst $xr0, $a0, 0
230+
; LA64-NEXT: ret
219231
entry:
220232
%va = load <4 x i64>, ptr %a
221233
%vb = load <4 x i64>, ptr %b
@@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind {
231243
; CHECK: # %bb.0: # %entry
232244
; CHECK-NEXT: xvld $xr0, $a1, 0
233245
; CHECK-NEXT: xvld $xr1, $a2, 0
234-
; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
235-
; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1
236-
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
246+
; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1
237247
; CHECK-NEXT: xvst $xr0, $a0, 0
238248
; CHECK-NEXT: ret
239249
entry:
@@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind {
251261
; CHECK: # %bb.0: # %entry
252262
; CHECK-NEXT: xvld $xr0, $a1, 0
253263
; CHECK-NEXT: xvld $xr1, $a2, 0
254-
; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
255-
; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1
256-
; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1
264+
; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1
257265
; CHECK-NEXT: xvst $xr0, $a0, 0
258266
; CHECK-NEXT: ret
259267
entry:
@@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind {
271279
; CHECK: # %bb.0: # %entry
272280
; CHECK-NEXT: xvld $xr0, $a1, 0
273281
; CHECK-NEXT: xvld $xr1, $a2, 0
274-
; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
275-
; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1
276-
; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1
282+
; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1
277283
; CHECK-NEXT: xvst $xr0, $a0, 0
278284
; CHECK-NEXT: ret
279285
entry:
@@ -287,15 +293,23 @@ entry:
287293
}
288294

289295
define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind {
290-
; CHECK-LABEL: xvavgr_du:
291-
; CHECK: # %bb.0: # %entry
292-
; CHECK-NEXT: xvld $xr0, $a1, 0
293-
; CHECK-NEXT: xvld $xr1, $a2, 0
294-
; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
295-
; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1
296-
; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1
297-
; CHECK-NEXT: xvst $xr0, $a0, 0
298-
; CHECK-NEXT: ret
296+
; LA32-LABEL: xvavgr_du:
297+
; LA32: # %bb.0: # %entry
298+
; LA32-NEXT: xvld $xr0, $a1, 0
299+
; LA32-NEXT: xvld $xr1, $a2, 0
300+
; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1
301+
; LA32-NEXT: xvaddi.du $xr0, $xr0, 1
302+
; LA32-NEXT: xvsrli.d $xr0, $xr0, 1
303+
; LA32-NEXT: xvst $xr0, $a0, 0
304+
; LA32-NEXT: ret
305+
;
306+
; LA64-LABEL: xvavgr_du:
307+
; LA64: # %bb.0: # %entry
308+
; LA64-NEXT: xvld $xr0, $a1, 0
309+
; LA64-NEXT: xvld $xr1, $a2, 0
310+
; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1
311+
; LA64-NEXT: xvst $xr0, $a0, 0
312+
; LA64-NEXT: ret
299313
entry:
300314
%va = load <4 x i64>, ptr %a
301315
%vb = load <4 x i64>, ptr %b

0 commit comments

Comments
 (0)