Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1839,11 +1839,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);

// NEON doesn't support integer divides, but SVE does
// A number of operations like MULH and integer divides are not supported by
// NEON but are available in SVE.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
}

// NEON doesn't support 64-bit vector integer muls, but SVE does.
Expand Down Expand Up @@ -1880,10 +1883,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
Expand All @@ -1905,8 +1904,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
}

// Use SVE for vectors with more than 2 elements.
Expand Down
140 changes: 80 additions & 60 deletions llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,11 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: smulh_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = sext <16 x i8> %a to <16 x i16>
%2 = sext <16 x i8> %b to <16 x i16>
Expand All @@ -142,9 +144,11 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: smulh_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = sext <8 x i16> %a to <8 x i32>
%2 = sext <8 x i16> %b to <8 x i32>
Expand All @@ -157,9 +161,11 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: smulh_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = sext <4 x i32> %a to <4 x i64>
%2 = sext <4 x i32> %b to <4 x i64>
Expand All @@ -172,15 +178,11 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: smulh_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: smulh x10, x10, x11
; CHECK-NEXT: smulh x8, x8, x9
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = sext <2 x i64> %a to <2 x i128>
%2 = sext <2 x i64> %b to <2 x i128>
Expand All @@ -193,9 +195,11 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: umulh_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = zext <16 x i8> %a to <16 x i16>
%2 = zext <16 x i8> %b to <16 x i16>
Expand All @@ -208,9 +212,11 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: umulh_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = zext <8 x i16> %a to <8 x i32>
%2 = zext <8 x i16> %b to <8 x i32>
Expand All @@ -223,9 +229,11 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: umulh_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = zext <4 x i32> %a to <4 x i64>
%2 = zext <4 x i32> %b to <4 x i64>
Expand All @@ -238,15 +246,11 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: umulh_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: umulh x10, x10, x11
; CHECK-NEXT: umulh x8, x8, x9
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%1 = zext <2 x i64> %a to <2 x i128>
%2 = zext <2 x i64> %b to <2 x i128>
Expand All @@ -263,8 +267,11 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) {
define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: smulh_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: shrn v0.8b, v0.8h, #8
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = sext <8 x i8> %a to <8 x i16>
%2 = sext <8 x i8> %b to <8 x i16>
Expand All @@ -277,8 +284,11 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: smulh_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = sext <4 x i16> %a to <4 x i32>
%2 = sext <4 x i16> %b to <4 x i32>
Expand All @@ -291,8 +301,11 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: smulh_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: shrn v0.2s, v0.2d, #32
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = sext <2 x i32> %a to <2 x i64>
%2 = sext <2 x i32> %b to <2 x i64>
Expand All @@ -305,12 +318,11 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: smulh_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: smulh x8, x8, x9
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = sext <1 x i64> %a to <1 x i128>
%2 = sext <1 x i64> %b to <1 x i128>
Expand All @@ -323,8 +335,11 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: umulh_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT: shrn v0.8b, v0.8h, #8
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <8 x i8> %a to <8 x i16>
%2 = zext <8 x i8> %b to <8 x i16>
Expand All @@ -337,8 +352,11 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) {
define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: umulh_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v0.4h, v0.4s, #16
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <4 x i16> %a to <4 x i32>
%2 = zext <4 x i16> %b to <4 x i32>
Expand All @@ -351,8 +369,11 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) {
define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: umulh_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: shrn v0.2s, v0.2d, #32
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <2 x i32> %a to <2 x i64>
%2 = zext <2 x i32> %b to <2 x i64>
Expand All @@ -365,12 +386,11 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) {
define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: umulh_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: umulh x8, x8, x9
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ptrue p0.d, vl1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <1 x i64> %a to <1 x i128>
%2 = zext <1 x i64> %b to <1 x i128>
Expand Down
Loading
Loading