Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/Hexagon/HexagonPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
def Sext: pf1<sext>; def Zext: pf1<zext>;

def Smin: pf2<smin>; def Smax: pf2<smax>;
def Umin: pf2<umin>; def Umax: pf2<umax>;
Expand Down
64 changes: 64 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;

def VShuff: OutPatFrag<(ops node:$Vs, node:$S),
(V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;

def VDeal: OutPatFrag<(ops node:$Vs, node:$S),
(V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>;

class VSubi<InstHexagon VSub, InstHexagon VSplati>:
OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>;

Expand Down Expand Up @@ -402,6 +408,64 @@ class Vneg1<ValueType VecTy>
class Vnot<ValueType VecTy>
: PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;

class ExtOp_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
PatFrag VPred, int Shuff>
: Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))),
(VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>;

class VOpAcc_pat<InstHexagon MI, PatFrag Op, PatFrag Ext, ValueType ResType,
PatFrag VxPred, PatFrag VsPred, int Shuff>
: Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))),
(VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>;

let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def : ExtOp_pat<V6_vaddubh, Add, Zext, VecPI16, HVI8, -2>;
def : ExtOp_pat<V6_vadduhw, Add, Zext, VecPI32, HVI16, -4>;
def : ExtOp_pat<V6_vaddhw, Add, Sext, VecPI32, HVI16, -4>;

def : ExtOp_pat<V6_vsububh, Sub, Zext, VecPI16, HVI8, -2>;
def : ExtOp_pat<V6_vsubuhw, Sub, Zext, VecPI32, HVI16, -4>;
def : ExtOp_pat<V6_vsubhw, Sub, Sext, VecPI32, HVI16, -4>;

def : ExtOp_pat<V6_vmpybv, Mul, Sext, VecPI16, HVI8, -2>;
def : ExtOp_pat<V6_vmpyhv, Mul, Sext, VecPI32, HVI16, -4>;
def : ExtOp_pat<V6_vmpyubv, Mul, Zext, VecPI16, HVI8, -2>;
def : ExtOp_pat<V6_vmpyuhv, Mul, Zext, VecPI32, HVI16, -4>;

// The first operand in V6_vmpybusv is unsigned.
def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)),
(VecPI16 (sext HVI8:$Vv)))),
(VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>;

// The second operand in V6_vmpyhus is unsigned.
def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)),
(VecPI32 (zext HVI16:$Vv)))),
(VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>;

def : VOpAcc_pat<V6_vaddubh_acc, Add, Zext, VecPI16, HWI16, HVI8, -2>;
def : VOpAcc_pat<V6_vadduhw_acc, Add, Zext, VecPI32, HWI32, HVI16, -4>;
def : VOpAcc_pat<V6_vaddhw_acc, Add, Sext, VecPI32, HWI32, HVI16, -4>;

def : VOpAcc_pat<V6_vmpybv_acc, Mul, Sext, VecPI16, HWI16, HVI8, -2>;
def : VOpAcc_pat<V6_vmpyubv_acc, Mul, Zext, VecPI16, HWI16, HVI8, -2>;
def : VOpAcc_pat<V6_vmpyhv_acc, Mul, Sext, VecPI32, HWI32, HVI16, -4>;
def : VOpAcc_pat<V6_vmpyuhv_acc, Mul, Zext, VecPI32, HWI32, HVI16, -4>;

// The second operand in V6_vmpybusv_acc is unsigned.
def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)),
(VecPI16 (sext HVI8:$Vt))))),
(VShuff (V6_vmpybusv_acc (VDeal $Vx, -2),
HVI8:$Vs, HVI8:$Vt), -2)>;

// The third operand in V6_vmpyhus_acc is unsigned.
def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)),
(VecPI32 (zext HVI16:$Vt)))),
(VShuff (V6_vmpyhus_acc (VDeal $Vx, -4),
HVI16:$Vs, HVI16:$Vt), -4)>;
}
}

let Predicates = [UseHVX] in {
let AddedComplexity = 200 in {
def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
Expand Down
140 changes: 2 additions & 138 deletions llvm/test/CodeGen/Hexagon/autohvx/arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
ret <32 x i32> %p
}

; --- add

; CHECK-LABEL: addb_64:
; CHECK: vadd(v0.b,v1.b)
define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
%p = add <64 x i8> %v0, %v1
ret <64 x i8> %p
}

; CHECK-LABEL: addb_128:
; CHECK: vadd(v0.b,v1.b)
define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
%p = add <128 x i8> %v0, %v1
ret <128 x i8> %p
}

; CHECK-LABEL: addh_64:
; CHECK: vadd(v0.h,v1.h)
define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
%p = add <32 x i16> %v0, %v1
ret <32 x i16> %p
}

; CHECK-LABEL: addh_128:
; CHECK: vadd(v0.h,v1.h)
define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
%p = add <64 x i16> %v0, %v1
ret <64 x i16> %p
}

; CHECK-LABEL: addw_64:
; CHECK: vadd(v0.w,v1.w)
define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
%p = add <16 x i32> %v0, %v1
ret <16 x i32> %p
}

; CHECK-LABEL: addw_128:
; CHECK: vadd(v0.w,v1.w)
define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = add <32 x i32> %v0, %v1
ret <32 x i32> %p
}

; --- sub

; CHECK-LABEL: subb_64:
; CHECK: vsub(v0.b,v1.b)
define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
%p = sub <64 x i8> %v0, %v1
ret <64 x i8> %p
}

; CHECK-LABEL: subb_128:
; CHECK: vsub(v0.b,v1.b)
define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
%p = sub <128 x i8> %v0, %v1
ret <128 x i8> %p
}

; CHECK-LABEL: subh_64:
; CHECK: vsub(v0.h,v1.h)
define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
%p = sub <32 x i16> %v0, %v1
ret <32 x i16> %p
}

; CHECK-LABEL: subh_128:
; CHECK: vsub(v0.h,v1.h)
define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
%p = sub <64 x i16> %v0, %v1
ret <64 x i16> %p
}

; CHECK-LABEL: subw_64:
; CHECK: vsub(v0.w,v1.w)
define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
%p = sub <16 x i32> %v0, %v1
ret <16 x i32> %p
}

; CHECK-LABEL: subw_128:
; CHECK: vsub(v0.w,v1.w)
define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = sub <32 x i32> %v0, %v1
ret <32 x i32> %p
}

; --- mul

; CHECK-LABEL: mpyb_64:
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
%p = mul <64 x i8> %v0, %v1
ret <64 x i8> %p
}

; CHECK-LABEL: mpyb_128:
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
%p = mul <128 x i8> %v0, %v1
ret <128 x i8> %p
}

; CHECK-LABEL: mpyh_64:
; CHECK: vmpyi(v0.h,v1.h)
define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 {
%p = mul <32 x i16> %v0, %v1
ret <32 x i16> %p
}

; CHECK-LABEL: mpyh_128:
; CHECK: vmpyi(v0.h,v1.h)
define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
%p = mul <64 x i16> %v0, %v1
ret <64 x i16> %p
}

; CHECK-LABEL: mpyw_64:
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
%p = mul <16 x i32> %v0, %v1
ret <16 x i32> %p
}

; CHECK-LABEL: mpyw_128:
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = mul <32 x i32> %v0, %v1
ret <32 x i32> %p
}

attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
135 changes: 134 additions & 1 deletion llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll
Original file line number Diff line number Diff line change
Expand Up @@ -573,4 +573,137 @@ define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 {
ret <32 x i32> %t1
}

attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
; --- Float32

; CHECK-LABEL: test_2j:
; CHECK: q[[Q2J0:[0-3]]] = vcmp.eq(v0.w,v1.w)
; CHECK: v0 = vmux(q[[Q2J0]],v0,v1)
define <32 x float> @test_2j(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp oeq <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2k:
; CHECK: q[[Q2K0:[0-3]]] = vcmp.eq(v0.w,v1.w)
; CHECK: v0 = vmux(q[[Q2K0]],v1,v0)
define <32 x float> @test_2k(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp one <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2l:
; CHECK: v0.sf = vmin(v1.sf,v0.sf)
define <32 x float> @test_2l(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp olt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2m:
; CHECK: q[[Q2M0:[0-3]]] = vcmp.gt(v0.sf,v1.sf)
; CHECK: v0 = vmux(q[[Q2M0]],v1,v0)
define <32 x float> @test_2m(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp ole <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2n:
; CHECK: v0.sf = vmax(v0.sf,v1.sf)
define <32 x float> @test_2n(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp ogt <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2o:
; CHECK: q[[Q2O0:[0-3]]] = vcmp.gt(v1.sf,v0.sf)
; CHECK: v0 = vmux(q[[Q2O0]],v1,v0)
define <32 x float> @test_2o(<32 x float> %v0, <32 x float> %v1) #1 {
%t0 = fcmp oge <32 x float> %v0, %v1
%t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2p:
; CHECK: r[[R2P0:[0-9]*]] = ##16843009
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use the 0x01010101 value, use -1 instead. -1 doesn't require constant extender.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using -1 wouldn't be semantically equivalent, because the IR does trunc <32 x i32> %v2 to <32 x i1>, which results in only the least-significant bit of each lane defining the predicate. Using -1 would make vand(v2, -1) pass all bits through. So, we have to use ##16843009 (0x01010101).

Copy link
Contributor

@kparzysz kparzysz Dec 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to do a truncation from v32i32 to v32i1, using 0x01010101 is still wrong.

A Q register always has 128 bits. A value of type v32i1 is represented as 32 groups of 4 bits each. Within each group all 4 bits have to be 0 or all have to be 1.

If v2 has 0x00000001 repeated 32 times, using vand with 0x01010101 will produce groups of bits in Q that are 0x0000 or 0x0001. This will cause the final vmux to pick only the lowest byte from v0 instead the whole float. To get a proper truncation from v32i32 to v32i1, do vandv(v2, vsplatw(0x00000001)) first, then vand(result, -1) vcmpw.eq(resulr, zero).

; CHECK: q[[Q2P1:[0-3]]] = vand(v2,r[[R2P0]])
; CHECK: q[[Q2P1:[0-3]]] &= vcmp.eq(v0.w,v1.w)
; CHECK: v0 = vmux(q[[Q2P1]],v0,v1)
define <32 x float> @test_2p(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = and <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2q:
; CHECK: r[[R2Q0:[0-9]*]] = ##16843009
; CHECK: q[[Q2Q1:[0-3]]] = vand(v2,r[[R2Q0]])
; CHECK: q[[Q2Q1:[0-3]]] |= vcmp.eq(v0.w,v1.w)
; CHECK: v0 = vmux(q[[Q2Q1]],v0,v1)
define <32 x float> @test_2q(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = or <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2r:
; CHECK: r[[R2R0:[0-9]*]] = ##16843009
; CHECK: q[[Q2R1:[0-3]]] = vand(v2,r[[R2R0]])
; CHECK: q[[Q2R1:[0-3]]] ^= vcmp.eq(v0.w,v1.w)
; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
define <32 x float> @test_2r(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp oeq <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = xor <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2s:
; CHECK: r[[R2S0:[0-9]*]] = ##16843009
; CHECK: q[[Q2S1:[0-3]]] = vand(v2,r[[R2S0]])
; CHECK: q[[Q2S1:[0-3]]] &= vcmp.gt(v0.sf,v1.sf)
; CHECK: v0 = vmux(q[[Q2R1]],v0,v1)
define <32 x float> @test_2s(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = and <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2t:
; CHECK: r[[R2T0:[0-9]*]] = ##16843009
; CHECK: q[[Q2T1:[0-3]]] = vand(v2,r[[R2T0]])
; CHECK: q[[Q2T1:[0-3]]] |= vcmp.gt(v0.sf,v1.sf)
; CHECK: v0 = vmux(q[[Q2T1]],v0,v1)
define <32 x float> @test_2t(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = or <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

; CHECK-LABEL: test_2u:
; CHECK: r[[R2U0:[0-9]*]] = ##16843009
; CHECK: q[[Q2U1:[0-3]]] = vand(v2,r[[R2U0]])
; CHECK: q[[Q2U1:[0-3]]] ^= vcmp.gt(v0.sf,v1.sf)
; CHECK: v0 = vmux(q[[Q2U1]],v0,v1)
define <32 x float> @test_2u(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 {
%q0 = fcmp ogt <32 x float> %v0, %v1
%q1 = trunc <32 x i32> %v2 to <32 x i1>
%q2 = xor <32 x i1> %q0, %q1
%t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1
ret <32 x float> %t1
}

attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
attributes #1 = { nounwind readnone "target-cpu"="hexagonv68" "target-features"="+hvxv68,+hvx-length128b,+hvx-qfloat" }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll
Original file line number Diff line number Diff line change
Expand Up @@ -574,4 +574,4 @@ define <16 x i32> @test_2i(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 {
ret <16 x i32> %t1
}

attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }
Loading