-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Hexagon] Add HVX patterns for vector arithmetic #170704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-clang-format @llvm/pr-subscribers-backend-hexagon Author: Fateme Hosseini (fhossein-quic) ChangesThis patch Introduces instruction selection patterns to generate the vsub, vadd, vmpy, vmin, and vmax HVX vector instructions. These patterns match on standard IR-level vector operations and lower them to the corresponding Hexagon HVX intrinsics. Patch By: Fateme Hosseini Full diff: https://github.com/llvm/llvm-project/pull/170704.diff 5 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index e84070f1a5468..e84a3286eaa9a 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -380,6 +380,7 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
+def Sext: pf1<sext>; def Zext: pf1<zext>;
def Smin: pf2<smin>; def Smax: pf2<smax>;
def Umin: pf2<umin>; def Umax: pf2<umax>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 674d19176a88b..64bb93a5ca8f8 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
+def VShuff: OutPatFrag<(ops node:$Vs, node:$S),
+ (V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
+def VDeal: OutPatFrag<(ops node:$Vs, node:$S),
+ (V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;
+
class VSubi<InstHexagon VSub, InstHexagon VSplati>:
OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>;
@@ -402,6 +408,64 @@ class Vneg1<ValueType VecTy>
class Vnot<ValueType VecTy>
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
+class ExtOp_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+ PatFrag VPred, int Shuff>
+ : Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))),
+ (VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>;
+
+class VOpAcc_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
+ PatFrag VxPred, PatFrag VsPred, int Shuff>
+ : Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))),
+ (VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>;
+
+let Predicates = [UseHVX] in {
+ let AddedComplexity = 200 in {
+ def : ExtOp_pat<V6_vaddubh, Add, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vadduhw, Add, Zext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vaddhw, Add, Sext, VecPI32, HVI16, -4>;
+
+ def : ExtOp_pat<V6_vsububh, Sub, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vsubuhw, Sub, Zext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vsubhw, Sub, Sext, VecPI32, HVI16, -4>;
+
+ def : ExtOp_pat<V6_vmpybv, Mul, Sext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vmpyhv, Mul, Sext, VecPI32, HVI16, -4>;
+ def : ExtOp_pat<V6_vmpyubv, Mul, Zext, VecPI16, HVI8, -2>;
+ def : ExtOp_pat<V6_vmpyuhv, Mul, Zext, VecPI32, HVI16, -4>;
+
+ // The first operand in V6_vmpybusv is unsigned.
+ def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)),
+ (VecPI16 (sext HVI8:$Vv)))),
+ (VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>;
+
+ // The second operand in V6_vmpyhus is unsigned.
+ def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)),
+ (VecPI32 (zext HVI16:$Vv)))),
+ (VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>;
+
+ def : VOpAcc_pat<V6_vaddubh_acc, Add, Zext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vadduhw_acc, Add, Zext, VecPI32, HWI32, HVI16, -4>;
+ def : VOpAcc_pat<V6_vaddhw_acc, Add, Sext, VecPI32, HWI32, HVI16, -4>;
+
+ def : VOpAcc_pat<V6_vmpybv_acc, Mul, Sext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vmpyubv_acc, Mul, Zext, VecPI16, HWI16, HVI8, -2>;
+ def : VOpAcc_pat<V6_vmpyhv_acc, Mul, Sext, VecPI32, HWI32, HVI16, -4>;
+ def : VOpAcc_pat<V6_vmpyuhv_acc, Mul, Zext, VecPI32, HWI32, HVI16, -4>;
+
+ // The second operand in V6_vmpybusv_acc is unsigned.
+ def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)),
+ (VecPI16 (sext HVI8:$Vt))))),
+ (VShuff (V6_vmpybusv_acc (VDeal $Vx, -2),
+ HVI8:$Vs, HVI8:$Vt), -2)>;
+
+ // The third operand in V6_vmpyhus_acc is unsigned.
+ def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)),
+ (VecPI32 (zext HVI16:$Vt)))),
+ (VShuff (V6_vmpyhus_acc (VDeal $Vx, -4),
+ HVI16:$Vs, HVI16:$Vt), -4)>;
+ }
+}
+
let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
index f45dce7791118..291243299c534 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll
@@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
ret <32 x i32> %p
}
-; --- add
-
-; CHECK-LABEL: addb_64:
-; CHECK: vadd(v0.b,v1.b)
-define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = add <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: addb_128:
-; CHECK: vadd(v0.b,v1.b)
-define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = add <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: addh_64:
-; CHECK: vadd(v0.h,v1.h)
-define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = add <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: addh_128:
-; CHECK: vadd(v0.h,v1.h)
-define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = add <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: addw_64:
-; CHECK: vadd(v0.w,v1.w)
-define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = add <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: addw_128:
-; CHECK: vadd(v0.w,v1.w)
-define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = add <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-; --- sub
-
-; CHECK-LABEL: subb_64:
-; CHECK: vsub(v0.b,v1.b)
-define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = sub <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: subb_128:
-; CHECK: vsub(v0.b,v1.b)
-define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = sub <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: subh_64:
-; CHECK: vsub(v0.h,v1.h)
-define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = sub <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: subh_128:
-; CHECK: vsub(v0.h,v1.h)
-define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = sub <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: subw_64:
-; CHECK: vsub(v0.w,v1.w)
-define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = sub <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: subw_128:
-; CHECK: vsub(v0.w,v1.w)
-define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = sub <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-; --- mul
-
-; CHECK-LABEL: mpyb_64:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
- %p = mul <64 x i8> %v0, %v1
- ret <64 x i8> %p
-}
-
-; CHECK-LABEL: mpyb_128:
-; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
-; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
-define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
- %p = mul <128 x i8> %v0, %v1
- ret <128 x i8> %p
-}
-
-; CHECK-LABEL: mpyh_64:
-; CHECK: vmpyi(v0.h,v1.h)
-define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
- %p = mul <32 x i16> %v0, %v1
- ret <32 x i16> %p
-}
-
-; CHECK-LABEL: mpyh_128:
-; CHECK: vmpyi(v0.h,v1.h)
-define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
- %p = mul <64 x i16> %v0, %v1
- ret <64 x i16> %p
-}
-
-; CHECK-LABEL: mpyw_64:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
- %p = mul <16 x i32> %v0, %v1
- ret <16 x i32> %p
-}
-
-; CHECK-LABEL: mpyw_128:
-; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
-; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
-define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
- %p = mul <32 x i32> %v0, %v1
- ret <32 x i32> %p
-}
-
-attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
-attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
+attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
index a9483037e14b1..dd70368979c87 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
@@ -573,4 +573,137 @@ define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
ret <32 x i32> %t1
}
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
+; --- Float32
+
+; CHECK-LABEL: test_2j:
+; CHECK: q[[Q2J0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2J0]],v0,v1)
+define <32 x float> @test_2j(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp oeq <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2k:
+; CHECK: q[[Q2K0:[0-3]]] = vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2K0]],v1,v0)
+define <32 x float> @test_2k(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp one <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2l:
+; CHECK: v0.sf = vmin(v1.sf,v0.sf)
+define <32 x float> @test_2l(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp olt <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2m:
+; CHECK: q[[Q2M0:[0-3]]] = vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2M0]],v1,v0)
+define <32 x float> @test_2m(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp ole <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2n:
+; CHECK: v0.sf = vmax(v0.sf,v1.sf)
+define <32 x float> @test_2n(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp ogt <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2o:
+; CHECK: q[[Q2O0:[0-3]]] = vcmp.gt(v1.sf,v0.sf)
+; CHECK: v0 = vmux(q[[Q2O0]],v1,v0)
+define <32 x float> @test_2o(<32 x float> %v0, <32 x float> %v1) #1 {
+ %t0 = fcmp oge <32 x float> %v0, %v1
+ %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2p:
+; CHECK: r[[R2P0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2P1:[0-3]]] = vand(v2,r[[R2P0]])
+; CHECK: q[[Q2P1:[0-3]]] &= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2P1]],v0,v1)
+define <32 x float> @test_2p(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp oeq <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = and <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2q:
+; CHECK: r[[R2Q0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2Q1:[0-3]]] = vand(v2,r[[R2Q0]])
+; CHECK: q[[Q2Q1:[0-3]]] |= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2Q1]],v0,v1)
+define <32 x float> @test_2q(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp oeq <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = or <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2r:
+; CHECK: r[[R2R0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2R1:[0-3]]] = vand(v2,r[[R2R0]])
+; CHECK: q[[Q2R1:[0-3]]] ^= vcmp.eq(v0.w,v1.w)
+; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
+define <32 x float> @test_2r(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp oeq <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = xor <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2s:
+; CHECK: r[[R2S0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2S1:[0-3]]] = vand(v2,r[[R2S0]])
+; CHECK: q[[Q2S1:[0-3]]] &= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
+define <32 x float> @test_2s(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp ogt <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = and <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2t:
+; CHECK: r[[R2T0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2T1:[0-3]]] = vand(v2,r[[R2T0]])
+; CHECK: q[[Q2T1:[0-3]]] |= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2T1]],v0,v1)
+define <32 x float> @test_2t(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp ogt <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = or <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+; CHECK-LABEL: test_2u:
+; CHECK: r[[R2U0:[0-9]*]] = ##16843009
+; CHECK: q[[Q2U1:[0-3]]] = vand(v2,r[[R2U0]])
+; CHECK: q[[Q2U1:[0-3]]] ^= vcmp.gt(v0.sf,v1.sf)
+; CHECK: v0 = vmux(q[[Q2U1]],v0,v1)
+define <32 x float> @test_2u(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
+ %q0 = fcmp ogt <32 x float> %v0, %v1
+ %q1 = trunc <32 x i32> %v2 to <32 x i1>
+ %q2 = xor <32 x i1> %q0, %q1
+ %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
+ ret <32 x float> %t1
+}
+
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
+attributes #1 = { nounwind readnone "target-cpu"="hexagonv68" "target-features"="+hvxv68,+hvx-length128b,+hvx-qfloat" }
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
index 7673f8b12264f..52176d6d2158c 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
@@ -574,4 +574,4 @@ define <16 x i32> @test_2i(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 {
ret <16 x i32> %t1
}
-attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
+attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
|
6f10741 to
fa0e8e0
Compare
f0f2a94 to
5171a40
Compare
| @@ -1,11 +1,11 @@ | |||
| ; RUN: llc -mtriple=hexagon < %s | FileCheck %s | |||
| ; RUN: llc -mtriple=hexagon -mattr=+hvxv73,+hvx-length64b < %s | FileCheck %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why? The benefit of using attributes is that you can have different settings for different functions in the same file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Krzysztof, Not sure what you mean here... The change is needed for this test to pass..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I brought the attribute back. My question from you is that all the functions in this test are using the same attributes, so practically having the attributes won't give us extra flexibility, unless we want to expand it in the future with functions with different attributes, ,right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Krzysztof, Not sure what you mean here... The change is needed for this test to pass..
This is just a change in attributes. The convention was to set attributes via attributes = { ... } in the test body, not the command line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I brought the attribute back. My question from you is that all the functions in this test are using the same attributes, so practically having the attributes won't give us extra flexibility, unless we want to expand it in the future with functions with different attributes, ,right?
Yes. It started as having hvx64b and hvx128b in the same file, but you could mix hvx versions if you wanted to, for example. It costs pretty much nothing to maintain, but you get that flexibility...
| } | ||
|
|
||
| ; CHECK-LABEL: test_2p: | ||
| ; CHECK: r[[R2P0:[0-9]*]] = ##16843009 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't use the 0x01010101 value, use -1 instead. -1 doesn't require constant extender.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using -1 wouldn't be semantically equivalent, because the IR does trunc <32 x i32> %v2 to <32 x i1>, which results in only the least-significant bit of each lane defining the predicate. Using -1 would make vand(v2, -1) pass all bits through. So, we have to use ##16843009 (0x01010101).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you want to do a truncation from v32i32 to v32i1, using 0x01010101 is still wrong.
A Q register always has 128 bits. A value of type v32i1 is represented as 32 groups of 4 bits each. Within each group all 4 bits have to be 0 or all have to be 1.
If v2 has 0x00000001 repeated 32 times, using vand with 0x01010101 will produce groups of bits in Q that are 0x0000 or 0x0001. This will cause the final vmux to pick only the lowest byte from v0 instead the whole float. To get a proper truncation from v32i32 to v32i1, do vandv(v2, vsplatw(0x00000001)) first, then vand(result, -1) vcmpw.eq(resulr, zero).
5171a40 to
e237e1c
Compare
This patch Introduces instruction selection patterns to generate the vsub, vadd, vmpy, vmin, and vmax HVX vector instructions. These patterns match on standard IR-level vector operations and lower them to the corresponding Hexagon HVX intrinsics. Patch By: Fateme Hosseini Co-authored-by: Jyotsna Verma <[email protected]>
e237e1c to
c36843d
Compare
This patch Introduces instruction selection patterns to generate the vsub, vadd, vmpy, vmin, and vmax HVX vector instructions. These patterns match on standard IR-level vector operations and lower them to the corresponding Hexagon HVX intrinsics.
Patch By: Fateme Hosseini