Skip to content

Commit e237e1c

Browse files
[Hexagon] Add HVX patterns for vector arithmetic
This patch Introduces instruction selection patterns to generate the vsub, vadd, vmpy, vmin, and vmax HVX vector instructions. These patterns match on standard IR-level vector operations and lower them to the corresponding Hexagon HVX intrinsics. Patch By: Fateme Hosseini Co-authored-by: Jyotsna Verma <[email protected]>
1 parent 4c17f9b commit e237e1c

File tree

5 files changed

+257
-198
lines changed

5 files changed

+257
-198
lines changed

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
380380
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
381381
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
382382
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
383+
def Sext: pf1<sext>; def Zext: pf1<zext>;
383384

384385
def Smin: pf2<smin>; def Smax: pf2<smax>;
385386
def Umin: pf2<umin>; def Umax: pf2<umax>;

llvm/lib/Target/Hexagon/HexagonPatternsHVX.td

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
7676
def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
7777
def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
7878

79+
def VShuff: OutPatFrag<(ops node:$Vs, node:$S),
80+
(V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
81+
82+
def VDeal: OutPatFrag<(ops node:$Vs, node:$S),
83+
(V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
84+
7985
class VSubi<InstHexagon VSub, InstHexagon VSplati>:
8086
OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>;
8187

@@ -402,6 +408,64 @@ class Vneg1<ValueType VecTy>
402408
class Vnot<ValueType VecTy>
403409
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
404410

411+
class ExtOp_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
412+
PatFrag VPred, int Shuff>
413+
: Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))),
414+
(VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>;
415+
416+
class VOpAcc_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
417+
PatFrag VxPred, PatFrag VsPred, int Shuff>
418+
: Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))),
419+
(VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>;
420+
421+
let Predicates = [UseHVX] in {
422+
let AddedComplexity = 200 in {
423+
def : ExtOp_pat<V6_vaddubh, Add, Zext, VecPI16, HVI8, -2>;
424+
def : ExtOp_pat<V6_vadduhw, Add, Zext, VecPI32, HVI16, -4>;
425+
def : ExtOp_pat<V6_vaddhw, Add, Sext, VecPI32, HVI16, -4>;
426+
427+
def : ExtOp_pat<V6_vsububh, Sub, Zext, VecPI16, HVI8, -2>;
428+
def : ExtOp_pat<V6_vsubuhw, Sub, Zext, VecPI32, HVI16, -4>;
429+
def : ExtOp_pat<V6_vsubhw, Sub, Sext, VecPI32, HVI16, -4>;
430+
431+
def : ExtOp_pat<V6_vmpybv, Mul, Sext, VecPI16, HVI8, -2>;
432+
def : ExtOp_pat<V6_vmpyhv, Mul, Sext, VecPI32, HVI16, -4>;
433+
def : ExtOp_pat<V6_vmpyubv, Mul, Zext, VecPI16, HVI8, -2>;
434+
def : ExtOp_pat<V6_vmpyuhv, Mul, Zext, VecPI32, HVI16, -4>;
435+
436+
// The first operand in V6_vmpybusv is unsigned.
437+
def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)),
438+
(VecPI16 (sext HVI8:$Vv)))),
439+
(VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>;
440+
441+
// The second operand in V6_vmpyhus is unsigned.
442+
def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)),
443+
(VecPI32 (zext HVI16:$Vv)))),
444+
(VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>;
445+
446+
def : VOpAcc_pat<V6_vaddubh_acc, Add, Zext, VecPI16, HWI16, HVI8, -2>;
447+
def : VOpAcc_pat<V6_vadduhw_acc, Add, Zext, VecPI32, HWI32, HVI16, -4>;
448+
def : VOpAcc_pat<V6_vaddhw_acc, Add, Sext, VecPI32, HWI32, HVI16, -4>;
449+
450+
def : VOpAcc_pat<V6_vmpybv_acc, Mul, Sext, VecPI16, HWI16, HVI8, -2>;
451+
def : VOpAcc_pat<V6_vmpyubv_acc, Mul, Zext, VecPI16, HWI16, HVI8, -2>;
452+
def : VOpAcc_pat<V6_vmpyhv_acc, Mul, Sext, VecPI32, HWI32, HVI16, -4>;
453+
def : VOpAcc_pat<V6_vmpyuhv_acc, Mul, Zext, VecPI32, HWI32, HVI16, -4>;
454+
455+
// The second operand in V6_vmpybusv_acc is unsigned.
456+
def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)),
457+
(VecPI16 (sext HVI8:$Vt))))),
458+
(VShuff (V6_vmpybusv_acc (VDeal $Vx, -2),
459+
HVI8:$Vs, HVI8:$Vt), -2)>;
460+
461+
// The third operand in V6_vmpyhus_acc is unsigned.
462+
def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)),
463+
(VecPI32 (zext HVI16:$Vt)))),
464+
(VShuff (V6_vmpyhus_acc (VDeal $Vx, -4),
465+
HVI16:$Vs, HVI16:$Vt), -4)>;
466+
}
467+
}
468+
405469
let Predicates = [UseHVX] in {
406470
let AddedComplexity = 200 in {
407471
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;

llvm/test/CodeGen/Hexagon/autohvx/arith.ll

Lines changed: 2 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
132132
ret <32 x i32> %p
133133
}
134134

135-
; --- add
136-
137-
; CHECK-LABEL: addb_64:
138-
; CHECK: vadd(v0.b,v1.b)
139-
define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
140-
%p = add <64 x i8> %v0, %v1
141-
ret <64 x i8> %p
142-
}
143-
144-
; CHECK-LABEL: addb_128:
145-
; CHECK: vadd(v0.b,v1.b)
146-
define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
147-
%p = add <128 x i8> %v0, %v1
148-
ret <128 x i8> %p
149-
}
150-
151-
; CHECK-LABEL: addh_64:
152-
; CHECK: vadd(v0.h,v1.h)
153-
define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
154-
%p = add <32 x i16> %v0, %v1
155-
ret <32 x i16> %p
156-
}
157-
158-
; CHECK-LABEL: addh_128:
159-
; CHECK: vadd(v0.h,v1.h)
160-
define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
161-
%p = add <64 x i16> %v0, %v1
162-
ret <64 x i16> %p
163-
}
164-
165-
; CHECK-LABEL: addw_64:
166-
; CHECK: vadd(v0.w,v1.w)
167-
define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
168-
%p = add <16 x i32> %v0, %v1
169-
ret <16 x i32> %p
170-
}
171-
172-
; CHECK-LABEL: addw_128:
173-
; CHECK: vadd(v0.w,v1.w)
174-
define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
175-
%p = add <32 x i32> %v0, %v1
176-
ret <32 x i32> %p
177-
}
178-
179-
; --- sub
180-
181-
; CHECK-LABEL: subb_64:
182-
; CHECK: vsub(v0.b,v1.b)
183-
define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
184-
%p = sub <64 x i8> %v0, %v1
185-
ret <64 x i8> %p
186-
}
187-
188-
; CHECK-LABEL: subb_128:
189-
; CHECK: vsub(v0.b,v1.b)
190-
define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
191-
%p = sub <128 x i8> %v0, %v1
192-
ret <128 x i8> %p
193-
}
194-
195-
; CHECK-LABEL: subh_64:
196-
; CHECK: vsub(v0.h,v1.h)
197-
define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
198-
%p = sub <32 x i16> %v0, %v1
199-
ret <32 x i16> %p
200-
}
201-
202-
; CHECK-LABEL: subh_128:
203-
; CHECK: vsub(v0.h,v1.h)
204-
define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
205-
%p = sub <64 x i16> %v0, %v1
206-
ret <64 x i16> %p
207-
}
208-
209-
; CHECK-LABEL: subw_64:
210-
; CHECK: vsub(v0.w,v1.w)
211-
define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
212-
%p = sub <16 x i32> %v0, %v1
213-
ret <16 x i32> %p
214-
}
215-
216-
; CHECK-LABEL: subw_128:
217-
; CHECK: vsub(v0.w,v1.w)
218-
define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
219-
%p = sub <32 x i32> %v0, %v1
220-
ret <32 x i32> %p
221-
}
222-
223-
; --- mul
224-
225-
; CHECK-LABEL: mpyb_64:
226-
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
227-
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
228-
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
229-
%p = mul <64 x i8> %v0, %v1
230-
ret <64 x i8> %p
231-
}
232-
233-
; CHECK-LABEL: mpyb_128:
234-
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
235-
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
236-
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
237-
%p = mul <128 x i8> %v0, %v1
238-
ret <128 x i8> %p
239-
}
240-
241-
; CHECK-LABEL: mpyh_64:
242-
; CHECK: vmpyi(v0.h,v1.h)
243-
define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
244-
%p = mul <32 x i16> %v0, %v1
245-
ret <32 x i16> %p
246-
}
247-
248-
; CHECK-LABEL: mpyh_128:
249-
; CHECK: vmpyi(v0.h,v1.h)
250-
define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
251-
%p = mul <64 x i16> %v0, %v1
252-
ret <64 x i16> %p
253-
}
254-
255-
; CHECK-LABEL: mpyw_64:
256-
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
257-
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
258-
define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
259-
%p = mul <16 x i32> %v0, %v1
260-
ret <16 x i32> %p
261-
}
262-
263-
; CHECK-LABEL: mpyw_128:
264-
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
265-
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
266-
define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
267-
%p = mul <32 x i32> %v0, %v1
268-
ret <32 x i32> %p
269-
}
270-
271-
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
272-
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
135+
attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
136+
attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }

0 commit comments

Comments
 (0)