Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4571,6 +4571,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return false;

EVT VecVT = LHS.getValueType();
// Optimize 'Not equal to zero-vector' comparisons to 'Greater than or
// less than' operators.
// Example: Consider k to be any non-zero positive value.
// * for k != 0, change SETNE to SETUGT (k > 0)
// * for 0 != k, change SETNE to SETULT (0 < k)
if (CC == ISD::SETNE) {
// Only optimize for integer types (avoid FP completely)
if (VecVT.getVectorElementType().isInteger()) {
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
CC = ISD::SETUGT;
else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
CC = ISD::SETULT;
}
}
bool Swap, Negate;
unsigned int VCmpInst =
getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,15 @@

define i32 @test_Greater_than(ptr %colauths) {
; This testcase is for the special case of zero-vector comparisons.
; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor).
; This pattern is expected to be optimized in a future patch.
; Optimize zero-vector `vcmpequh` compares followed by negate to `vcmpgtuh`.
; POWERPC_64LE-LABEL: test_Greater_than:
; POWERPC_64LE: # %bb.0: # %entry
; POWERPC_64LE-NEXT: lfd f0, 0(r3)
; POWERPC_64LE-NEXT: xxlxor v3, v3, v3
; POWERPC_64LE-NEXT: li r4, 0
; POWERPC_64LE-NEXT: li r3, 4
; POWERPC_64LE-NEXT: xxswapd v2, f0
; POWERPC_64LE-NEXT: vcmpequh v2, v2, v3
; POWERPC_64LE-NEXT: xxlnor v2, v2, v2
; POWERPC_64LE-NEXT: vcmpgtuh v2, v2, v3
; POWERPC_64LE-NEXT: vmrglh v3, v2, v2
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v3
Expand All @@ -42,8 +40,7 @@ define i32 @test_Greater_than(ptr %colauths) {
; POWERPC_64-NEXT: xxlxor v3, v3, v3
; POWERPC_64-NEXT: li r4, 12
; POWERPC_64-NEXT: li r3, 8
; POWERPC_64-NEXT: vcmpequh v2, v2, v3
; POWERPC_64-NEXT: xxlnor v2, v2, v2
; POWERPC_64-NEXT: vcmpgtuh v2, v2, v3
; POWERPC_64-NEXT: vmrghh v2, v2, v2
; POWERPC_64-NEXT: vextuwlx r4, r4, v2
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
Expand All @@ -66,8 +63,7 @@ define i32 @test_Greater_than(ptr %colauths) {
; POWERPC_32-NEXT: xxlxor v3, v3, v3
; POWERPC_32-NEXT: lxvwsx vs0, r3, r4
; POWERPC_32-NEXT: xxmrghw v2, vs1, vs0
; POWERPC_32-NEXT: vcmpequh v2, v2, v3
; POWERPC_32-NEXT: xxlnor v2, v2, v2
; POWERPC_32-NEXT: vcmpgtuh v2, v2, v3
; POWERPC_32-NEXT: vmrghh v2, v2, v2
; POWERPC_32-NEXT: stxv v2, -32(r1)
; POWERPC_32-NEXT: lwz r3, -20(r1)
Expand Down
67 changes: 7 additions & 60 deletions llvm/test/CodeGen/PowerPC/pr61315.ll
Original file line number Diff line number Diff line change
@@ -1,23 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI0_0:
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) {
; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha
Expand All @@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
; CHECK-NEXT: vcmpequb v2, v2, v3
; CHECK-NEXT: xxlnor v2, v2, v2
; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <16 x i8> %0, i64 7
%a5 = zext i8 %a4 to i16
Expand All @@ -43,24 +26,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
ret <16 x i8> %a11
}

define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI1_0:
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) {
; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha
Expand All @@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
; CHECK-NEXT: vcmpequb v2, v2, v3
; CHECK-NEXT: xxlnor v2, v2, v2
; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <16 x i8> %0, i64 7
%a5 = zext i8 %a4 to i32
Expand All @@ -86,24 +51,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
ret <16 x i8> %a11
}

define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
; CHECK: .LCPI2_0:
; CHECK-NEXT: .byte 22 # 0x16
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 22 # 0x16
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 22 # 0x16
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 22 # 0x16
; CHECK-NEXT: .byte 23 # 0x17
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
; CHECK-NEXT: .byte 0 # 0x0
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) {
; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
Expand All @@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
; CHECK-NEXT: vcmpequb v2, v2, v3
; CHECK-NEXT: xxlnor v2, v2, v2
; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <8 x i16> %0, i64 3
%a5 = zext i16 %a4 to i32
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/PowerPC/setcc-logic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK: # %bb.0:
; CHECK-NEXT: xxlor v2, v2, v3
; CHECK-NEXT: xxlxor v3, v3, v3
; CHECK-NEXT: vcmpequw v2, v2, v3
; CHECK-NEXT: xxlnor v2, v2, v2
; CHECK-NEXT: vcmpgtuw v2, v2, v3
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, zeroinitializer
%b = icmp ne <4 x i32> %Q, zeroinitializer
Expand Down
30 changes: 10 additions & 20 deletions llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR5-NEXT: vaddubm v3, v2, v3
; PWR5-NEXT: vand v2, v2, v3
; PWR5-NEXT: vxor v3, v3, v3
; PWR5-NEXT: vcmpequb v2, v2, v3
; PWR5-NEXT: vnot v2, v2
; PWR5-NEXT: vcmpgtub v2, v2, v3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v16i8:
Expand All @@ -27,8 +26,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR6-NEXT: vaddubm v3, v2, v3
; PWR6-NEXT: vand v2, v2, v3
; PWR6-NEXT: vxor v3, v3, v3
; PWR6-NEXT: vcmpequb v2, v2, v3
; PWR6-NEXT: vnot v2, v2
; PWR6-NEXT: vcmpgtub v2, v2, v3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v16i8:
Expand All @@ -37,8 +35,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR7-NEXT: vaddubm v3, v2, v3
; PWR7-NEXT: xxland v2, v2, v3
; PWR7-NEXT: xxlxor v3, v3, v3
; PWR7-NEXT: vcmpequb v2, v2, v3
; PWR7-NEXT: xxlnor v2, v2, v2
; PWR7-NEXT: vcmpgtub v2, v2, v3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v16i8:
Expand Down Expand Up @@ -1085,8 +1082,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR5-NEXT: vadduhm v3, v2, v3
; PWR5-NEXT: vand v2, v2, v3
; PWR5-NEXT: vxor v3, v3, v3
; PWR5-NEXT: vcmpequh v2, v2, v3
; PWR5-NEXT: vnot v2, v2
; PWR5-NEXT: vcmpgtuh v2, v2, v3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v8i16:
Expand All @@ -1095,8 +1091,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR6-NEXT: vadduhm v3, v2, v3
; PWR6-NEXT: vand v2, v2, v3
; PWR6-NEXT: vxor v3, v3, v3
; PWR6-NEXT: vcmpequh v2, v2, v3
; PWR6-NEXT: vnot v2, v2
; PWR6-NEXT: vcmpgtuh v2, v2, v3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v8i16:
Expand All @@ -1105,8 +1100,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR7-NEXT: vadduhm v3, v2, v3
; PWR7-NEXT: xxland v2, v2, v3
; PWR7-NEXT: xxlxor v3, v3, v3
; PWR7-NEXT: vcmpequh v2, v2, v3
; PWR7-NEXT: xxlnor v2, v2, v2
; PWR7-NEXT: vcmpgtuh v2, v2, v3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v8i16:
Expand Down Expand Up @@ -4105,8 +4099,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR5-NEXT: vadduwm v3, v2, v3
; PWR5-NEXT: vand v2, v2, v3
; PWR5-NEXT: vxor v3, v3, v3
; PWR5-NEXT: vcmpequw v2, v2, v3
; PWR5-NEXT: vnot v2, v2
; PWR5-NEXT: vcmpgtuw v2, v2, v3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v4i32:
Expand All @@ -4115,8 +4108,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR6-NEXT: vadduwm v3, v2, v3
; PWR6-NEXT: vand v2, v2, v3
; PWR6-NEXT: vxor v3, v3, v3
; PWR6-NEXT: vcmpequw v2, v2, v3
; PWR6-NEXT: vnot v2, v2
; PWR6-NEXT: vcmpgtuw v2, v2, v3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v4i32:
Expand All @@ -4125,8 +4117,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR7-NEXT: vadduwm v3, v2, v3
; PWR7-NEXT: xxland v2, v2, v3
; PWR7-NEXT: xxlxor v3, v3, v3
; PWR7-NEXT: vcmpequw v2, v2, v3
; PWR7-NEXT: xxlnor v2, v2, v2
; PWR7-NEXT: vcmpgtuw v2, v2, v3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v4i32:
Expand Down Expand Up @@ -11971,9 +11962,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
; PWR7-NEXT: addis r3, r2, .LCPI100_0@toc@ha
; PWR7-NEXT: addi r3, r3, .LCPI100_0@toc@l
; PWR7-NEXT: xxland v2, v2, vs0
; PWR7-NEXT: vcmpequw v2, v2, v3
; PWR7-NEXT: vcmpgtuw v2, v2, v3
; PWR7-NEXT: lxvw4x v3, 0, r3
; PWR7-NEXT: xxlnor v2, v2, v2
; PWR7-NEXT: vperm v3, v2, v2, v3
; PWR7-NEXT: xxlor v2, v3, v2
; PWR7-NEXT: blr
Expand Down