diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 89165fa8f8fdb..3ad88653ae44b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4571,6 +4571,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return false; EVT VecVT = LHS.getValueType(); + // Optimize 'Not equal to zero-vector' comparisons to 'Greater than or + // less than' operators. + // Example: Consider k to be any non-zero positive value. + // * for k != 0, change SETNE to SETUGT (k > 0) + // * for 0 != k, change SETNE to SETULT (0 < k) + if (CC == ISD::SETNE) { + // Only optimize for integer types (avoid FP completely) + if (VecVT.getVectorElementType().isInteger()) { + if (ISD::isBuildVectorAllZeros(RHS.getNode())) + CC = ISD::SETUGT; + else if (ISD::isBuildVectorAllZeros(LHS.getNode())) + CC = ISD::SETULT; + } + } bool Swap, Negate; unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); diff --git a/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll index 1325abf0ff3b7..0ae2a43ccb724 100644 --- a/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll +++ b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll @@ -10,8 +10,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; This testcase is for the special case of zero-vector comparisons. -; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor). -; This pattern is expected to be optimized in a future patch. +; Optimize zero-vector `vcmpequh` compares followed by negate to `vcmpgtuh`. ; POWERPC_64LE-LABEL: test_Greater_than: ; POWERPC_64LE: # %bb.0: # %entry ; POWERPC_64LE-NEXT: lfd f0, 0(r3) @@ -19,8 +18,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_64LE-NEXT: li r4, 0 ; POWERPC_64LE-NEXT: li r3, 4 ; POWERPC_64LE-NEXT: xxswapd v2, f0 -; POWERPC_64LE-NEXT: vcmpequh v2, v2, v3 -; POWERPC_64LE-NEXT: xxlnor v2, v2, v2 +; POWERPC_64LE-NEXT: vcmpgtuh v2, v2, v3 ; POWERPC_64LE-NEXT: vmrglh v3, v2, v2 ; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2 ; POWERPC_64LE-NEXT: vextuwrx r3, r3, v3 @@ -42,8 +40,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_64-NEXT: xxlxor v3, v3, v3 ; POWERPC_64-NEXT: li r4, 12 ; POWERPC_64-NEXT: li r3, 8 -; POWERPC_64-NEXT: vcmpequh v2, v2, v3 -; POWERPC_64-NEXT: xxlnor v2, v2, v2 +; POWERPC_64-NEXT: vcmpgtuh v2, v2, v3 ; POWERPC_64-NEXT: vmrghh v2, v2, v2 ; POWERPC_64-NEXT: vextuwlx r4, r4, v2 ; POWERPC_64-NEXT: vextuwlx r3, r3, v2 @@ -66,8 +63,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_32-NEXT: xxlxor v3, v3, v3 ; POWERPC_32-NEXT: lxvwsx vs0, r3, r4 ; POWERPC_32-NEXT: xxmrghw v2, vs1, vs0 -; POWERPC_32-NEXT: vcmpequh v2, v2, v3 -; POWERPC_32-NEXT: xxlnor v2, v2, v2 +; POWERPC_32-NEXT: vcmpgtuh v2, v2, v3 ; POWERPC_32-NEXT: vmrghh v2, v2, v2 ; POWERPC_32-NEXT: stxv v2, -32(r1) ; POWERPC_32-NEXT: lwz r3, -20(r1) diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll index 87208691eb047..9c8c97500956b 100644 --- a/llvm/test/CodeGen/PowerPC/pr61315.ll +++ b/llvm/test/CodeGen/PowerPC/pr61315.ll @@ -1,23 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s -define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI0_0: -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) { ; CHECK-LABEL: ConvertExtractedMaskBitsToVect: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha @@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <16 x i8> %0, i64 7 %a5 = zext i8 %a4 to i16 @@ -43,24 +26,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) ret <16 x i8> %a11 } -define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI1_0: -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) { ; CHECK-LABEL: ConvertExtractedMaskBitsToVect2: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha @@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0 ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <16 x i8> %0, i64 7 %a5 = zext i8 %a4 to i32 @@ -86,24 +51,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0 ret <16 x i8> %a11 } -define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI2_0: -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) { ; CHECK-LABEL: ConvertExtractedMaskBitsToVect3: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha @@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0 ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <8 x i16> %0, i64 3 %a5 = zext i16 %a4 to i32 diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll index 5ebfec68695f0..1e92abbcac933 100644 --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -367,8 +367,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK: # %bb.0: ; CHECK-NEXT: xxlor v2, v2, v3 ; CHECK-NEXT: xxlxor v3, v3, v3 -; CHECK-NEXT: vcmpequw v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtuw v2, v2, v3 ; CHECK-NEXT: blr %a = icmp ne <4 x i32> %P, zeroinitializer %b = icmp ne <4 x i32> %Q, zeroinitializer diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll index 56382092dc8ee..6889bba22e064 100644 --- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll @@ -17,8 +17,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR5-NEXT: vaddubm v3, v2, v3 ; PWR5-NEXT: vand v2, v2, v3 ; PWR5-NEXT: vxor v3, v3, v3 -; PWR5-NEXT: vcmpequb v2, v2, v3 -; PWR5-NEXT: vnot v2, v2 +; PWR5-NEXT: vcmpgtub v2, v2, v3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v16i8: @@ -27,8 +26,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR6-NEXT: vaddubm v3, v2, v3 ; PWR6-NEXT: vand v2, v2, v3 ; PWR6-NEXT: vxor v3, v3, v3 -; PWR6-NEXT: vcmpequb v2, v2, v3 -; PWR6-NEXT: vnot v2, v2 +; PWR6-NEXT: vcmpgtub v2, v2, v3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v16i8: @@ -37,8 +35,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR7-NEXT: vaddubm v3, v2, v3 ; PWR7-NEXT: xxland v2, v2, v3 ; PWR7-NEXT: xxlxor v3, v3, v3 -; PWR7-NEXT: vcmpequb v2, v2, v3 -; PWR7-NEXT: xxlnor v2, v2, v2 +; PWR7-NEXT: vcmpgtub v2, v2, v3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v16i8: @@ -1085,8 +1082,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR5-NEXT: vadduhm v3, v2, v3 ; PWR5-NEXT: vand v2, v2, v3 ; PWR5-NEXT: vxor v3, v3, v3 -; PWR5-NEXT: vcmpequh v2, v2, v3 -; PWR5-NEXT: vnot v2, v2 +; PWR5-NEXT: vcmpgtuh v2, v2, v3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v8i16: @@ -1095,8 +1091,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR6-NEXT: vadduhm v3, v2, v3 ; PWR6-NEXT: vand v2, v2, v3 ; PWR6-NEXT: vxor v3, v3, v3 -; PWR6-NEXT: vcmpequh v2, v2, v3 -; PWR6-NEXT: vnot v2, v2 +; PWR6-NEXT: vcmpgtuh v2, v2, v3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v8i16: @@ -1105,8 +1100,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR7-NEXT: vadduhm v3, v2, v3 ; PWR7-NEXT: xxland v2, v2, v3 ; PWR7-NEXT: xxlxor v3, v3, v3 -; PWR7-NEXT: vcmpequh v2, v2, v3 -; PWR7-NEXT: xxlnor v2, v2, v2 +; PWR7-NEXT: vcmpgtuh v2, v2, v3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v8i16: @@ -4105,8 +4099,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR5-NEXT: vadduwm v3, v2, v3 ; PWR5-NEXT: vand v2, v2, v3 ; PWR5-NEXT: vxor v3, v3, v3 -; PWR5-NEXT: vcmpequw v2, v2, v3 -; PWR5-NEXT: vnot v2, v2 +; PWR5-NEXT: vcmpgtuw v2, v2, v3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v4i32: @@ -4115,8 +4108,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR6-NEXT: vadduwm v3, v2, v3 ; PWR6-NEXT: vand v2, v2, v3 ; PWR6-NEXT: vxor v3, v3, v3 -; PWR6-NEXT: vcmpequw v2, v2, v3 -; PWR6-NEXT: vnot v2, v2 +; PWR6-NEXT: vcmpgtuw v2, v2, v3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v4i32: @@ -4125,8 +4117,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR7-NEXT: vadduwm v3, v2, v3 ; PWR7-NEXT: xxland v2, v2, v3 ; PWR7-NEXT: xxlxor v3, v3, v3 -; PWR7-NEXT: vcmpequw v2, v2, v3 -; PWR7-NEXT: xxlnor v2, v2, v2 +; PWR7-NEXT: vcmpgtuw v2, v2, v3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v4i32: @@ -11971,9 +11962,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { ; PWR7-NEXT: addis r3, r2, .LCPI100_0@toc@ha ; PWR7-NEXT: addi r3, r3, .LCPI100_0@toc@l ; PWR7-NEXT: xxland v2, v2, vs0 -; PWR7-NEXT: vcmpequw v2, v2, v3 +; PWR7-NEXT: vcmpgtuw v2, v2, v3 ; PWR7-NEXT: lxvw4x v3, 0, r3 -; PWR7-NEXT: xxlnor v2, v2, v2 ; PWR7-NEXT: vperm v3, v2, v2, v3 ; PWR7-NEXT: xxlor v2, v3, v2 ; PWR7-NEXT: blr