Merge commit '8c51812913c6aa301919a127d2cdfae7d9d6054a' into merge/ve-vvp-usdiv

Simon Moll · Simon Moll · commit a364251de1c2 · 2022-01-03T14:10:46.000+01:00
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -229,6 +229,14 @@ multiclass Binary_rv_vv<
   defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
 }
 
+multiclass Binary_rv_vr_vv<
+    SDPatternOperator OpNode,
+    ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
+    string OpBaseName> {
+  defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+  defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
+}
+
 // Expand both 64bit and 32 bit variant (256 elements)
 multiclass Binary_rv_vv_ShortLong<
     SDPatternOperator OpNode,
@@ -254,23 +262,6 @@ multiclass Binary_vr_vv_ShortLong<
                       ShortOpBaseName>;
 }
 
-// Binary operators that support broadcasts on LHS and RHS.
-multiclass Binary_all<
-    SDPatternOperator OpNode,
-    ValueType ScalarVT, ValueType DataVT,
-    ValueType MaskVT, string OpBaseName> {
-  defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
-  defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
-}
-
-multiclass Binary_ShortLong<
-    SDPatternOperator OpNode,
-    ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
-    ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
-  defm : Binary_all<OpNode, LongScalarVT, LongDataVT, v256i1, LongOpBaseName>;
-  defm : Binary_all<OpNode, ShortScalarVT, ShortDataVT, v256i1, ShortOpBaseName>;
-}
-
 multiclass Ternary<
     SDPatternOperator OpNode,
     ValueType ScalarVT, ValueType DataVT,
@@ -332,6 +323,18 @@ multiclass Ternary_ShortLong<
 // Integer arithmetic (256 elements)
 defm : Unary_ShortLong<vvp_ctpop, i64, v256i64, "VPCNT", i32, v256i32, "PVPCNTLO">;
 
+multiclass Binary_rv_vr_vv_ShortLong<
+    SDPatternOperator OpNode,
+    ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
+    ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
+  defm : Binary_rv_vr_vv<OpNode,
+                      LongScalarVT, LongDataVT, v256i1,
+                      LongOpBaseName>;
+  defm : Binary_rv_vr_vv<OpNode,
+                      ShortScalarVT, ShortDataVT, v256i1,
+                      ShortOpBaseName>;
+}
+
 defm : Binary_rv_vv_ShortLong<c_vvp_add,
                               i64, v256i64, "VADDSL",
                               i32, v256i32, "VADDSWSX">;
@@ -341,6 +344,12 @@ defm : Binary_rv_vv_ShortLong<vvp_sub,
 defm : Binary_rv_vv_ShortLong<c_vvp_mul,
                               i64, v256i64, "VMULSL",
                               i32, v256i32, "VMULSWSX">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_sdiv,
+                              i64, v256i64, "VDIVSL",
+                              i32, v256i32, "VDIVSWSX">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_udiv,
+                              i64, v256i64, "VDIVUL",
+                              i32, v256i32, "VDIVUW">;
 defm : Binary_rv_vv_ShortLong<c_vvp_and,
                               i64, v256i64, "VAND",
                               i32, v256i32, "PVANDLO">;
@@ -351,9 +360,6 @@ defm : Binary_rv_vv_ShortLong<c_vvp_xor,
                               i64, v256i64, "VXOR",
                               i32, v256i32, "PVXORLO">;
 
-defm : Binary_ShortLong<vvp_sdiv, i64, v256i64, "VDIVSL", i32, v256i32, "VDIVSWSX">;
-defm : Binary_ShortLong<vvp_udiv, i64, v256i64, "VDIVUL", i32, v256i32, "VDIVUW">;
-
 defm : Binary_vr_vv_ShortLong<vvp_shl,
                               i64, v256i64, "VSLL",
                               i32, v256i32, "PVSLLLO">;
@@ -370,7 +376,7 @@ defm : Unary_ShortLong<vvp_fsqrt, f64, v256f64, "VFSQRTD", f32, v256f32, "VFSQRT
 defm : Binary_rv_vv_ShortLong<c_vvp_fadd, f64, v256f64, "VFADDD", f32, v256f32, "PVFADDUP">;
 defm : Binary_rv_vv_ShortLong<vvp_fsub,  f64, v256f64, "VFSUBD", f32, v256f32, "PVFSUBUP">;
 defm : Binary_rv_vv_ShortLong<c_vvp_fmul, f64, v256f64, "VFMULD", f32, v256f32, "PVFMULUP">;
-defm : Binary_ShortLong<vvp_fdiv,  f64, v256f64, "VFDIVD", f32, v256f32, "VFDIVS">;
+defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,  f64, v256f64, "VFDIVD", f32, v256f32, "VFDIVS">;
 
 defm : Binary_rv_vv_ShortLong<c_vvp_fminnum, f64, v256f64, "VFMIND", f32, v256f32, "VFMINS">;
 defm : Binary_rv_vv_ShortLong<c_vvp_fmaxnum, f64, v256f64, "VFMAXD", f32, v256f32, "VFMAXS">;
diff --git a/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll b/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
 
-define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
-; CHECK-LABEL: test_vp_int:
+declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+define fastcc <256 x i32> @test_vp_sdiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v256i32_vv:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvl %s0
@@ -12,5 +14,68 @@ define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x
   ret <256 x i32> %r0
 }
 
-; integer arith
-declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+define fastcc <256 x i32> @test_vp_sdiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v256i32_rv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivs.w.sx %v0, %s0, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %xins = insertelement <256 x i32> undef, i32 %s0, i32 0
+  %i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+define fastcc <256 x i32> @test_vp_sdiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v256i32_vr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivs.w.sx %v0, %v0, %s0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %yins = insertelement <256 x i32> undef, i32 %s1, i32 0
+  %i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+
+declare <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vdivs.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
+define fastcc <256 x i64> @test_vp_sdiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v256i64_rv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivs.l %v0, %s0, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %xins = insertelement <256 x i64> undef, i64 %s0, i32 0
+  %i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
+define fastcc <256 x i64> @test_vp_sdiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_sdiv_v256i64_vr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivs.l %v0, %v0, %s0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %yins = insertelement <256 x i64> undef, i64 %s1, i32 0
+  %i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
diff --git a/llvm/test/CodeGen/VE/Vector/vp_udiv.ll b/llvm/test/CodeGen/VE/Vector/vp_udiv.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
 
-define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
-; CHECK-LABEL: test_vp_int:
+declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+define fastcc <256 x i32> @test_vp_udiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v256i32_vv:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    lvl %s0
@@ -12,5 +14,68 @@ define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x
   ret <256 x i32> %r0
 }
 
-; integer arith
-declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+define fastcc <256 x i32> @test_vp_udiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v256i32_rv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivu.w %v0, %s0, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %xins = insertelement <256 x i32> undef, i32 %s0, i32 0
+  %i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+define fastcc <256 x i32> @test_vp_udiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v256i32_vr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivu.w %v0, %v0, %s0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %yins = insertelement <256 x i32> undef, i32 %s1, i32 0
+  %i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i32> %r0
+}
+
+
+declare <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vdivu.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
+define fastcc <256 x i64> @test_vp_udiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v256i64_rv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivu.l %v0, %s0, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %xins = insertelement <256 x i64> undef, i64 %s0, i32 0
+  %i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
+define fastcc <256 x i64> @test_vp_udiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_udiv_v256i64_vr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vdivu.l %v0, %v0, %s0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %yins = insertelement <256 x i64> undef, i64 %s1, i32 0
+  %i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
+  %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}