Skip to content

Commit 86f6cee

Browse files
author
himadhith
committed
[PowerPC] vector compare greater than support for Zero vector comparisons
vector compare greater than support for Zero vector comparisons review changes
1 parent 310811a commit 86f6cee

File tree

6 files changed

+34
-91
lines changed

6 files changed

+34
-91
lines changed

llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4570,6 +4570,17 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
45704570
if (Subtarget->hasSPE())
45714571
return false;
45724572

4573+
// Optimize 'Not equal to zero-vector' comparisons to 'Greater than or
4574+
// less than' operators.
4575+
// Example: Consider k to be any non-zero positive value.
4576+
// * for k != 0, change SETNE to SETUGT (k > 0)
4577+
// * for 0 != k, change SETNE to SETULT (0 < k)
4578+
if (CC == ISD::SETNE) {
4579+
if (ISD::isBuildVectorAllZeros(RHS.getNode()))
4580+
CC = ISD::SETUGT;
4581+
else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
4582+
CC = ISD::SETULT;
4583+
}
45734584
EVT VecVT = LHS.getValueType();
45744585
bool Swap, Negate;
45754586
unsigned int VCmpInst =

llvm/test/CodeGen/PowerPC/check-zero-vector.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,15 @@
1010

1111
define i32 @test_Greater_than(ptr %colauths) {
1212
; This testcase is for the special case of zero-vector comparisons.
13-
; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor).
14-
; This pattern is expected to be optimized in a future patch.
13+
; Optimize zero-vector `vcmpequh` compares followed by negate to `vcmpgtuh`.
1514
; POWERPC_64LE-LABEL: test_Greater_than:
1615
; POWERPC_64LE: # %bb.0: # %entry
1716
; POWERPC_64LE-NEXT: lfd f0, 0(r3)
1817
; POWERPC_64LE-NEXT: xxlxor v3, v3, v3
1918
; POWERPC_64LE-NEXT: li r4, 0
2019
; POWERPC_64LE-NEXT: li r3, 4
2120
; POWERPC_64LE-NEXT: xxswapd v2, f0
22-
; POWERPC_64LE-NEXT: vcmpequh v2, v2, v3
23-
; POWERPC_64LE-NEXT: xxlnor v2, v2, v2
21+
; POWERPC_64LE-NEXT: vcmpgtuh v2, v2, v3
2422
; POWERPC_64LE-NEXT: vmrglh v3, v2, v2
2523
; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
2624
; POWERPC_64LE-NEXT: vextuwrx r3, r3, v3
@@ -42,8 +40,7 @@ define i32 @test_Greater_than(ptr %colauths) {
4240
; POWERPC_64-NEXT: xxlxor v3, v3, v3
4341
; POWERPC_64-NEXT: li r4, 12
4442
; POWERPC_64-NEXT: li r3, 8
45-
; POWERPC_64-NEXT: vcmpequh v2, v2, v3
46-
; POWERPC_64-NEXT: xxlnor v2, v2, v2
43+
; POWERPC_64-NEXT: vcmpgtuh v2, v2, v3
4744
; POWERPC_64-NEXT: vmrghh v2, v2, v2
4845
; POWERPC_64-NEXT: vextuwlx r4, r4, v2
4946
; POWERPC_64-NEXT: vextuwlx r3, r3, v2
@@ -66,8 +63,7 @@ define i32 @test_Greater_than(ptr %colauths) {
6663
; POWERPC_32-NEXT: xxlxor v3, v3, v3
6764
; POWERPC_32-NEXT: lxvwsx vs0, r3, r4
6865
; POWERPC_32-NEXT: xxmrghw v2, vs1, vs0
69-
; POWERPC_32-NEXT: vcmpequh v2, v2, v3
70-
; POWERPC_32-NEXT: xxlnor v2, v2, v2
66+
; POWERPC_32-NEXT: vcmpgtuh v2, v2, v3
7167
; POWERPC_32-NEXT: vmrghh v2, v2, v2
7268
; POWERPC_32-NEXT: stxv v2, -32(r1)
7369
; POWERPC_32-NEXT: lwz r3, -20(r1)

llvm/test/CodeGen/PowerPC/pr61315.ll

Lines changed: 7 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,7 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
23
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
3-
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
4-
; CHECK: .LCPI0_0:
5-
; CHECK-NEXT: .byte 23 # 0x17
6-
; CHECK-NEXT: .byte 23 # 0x17
7-
; CHECK-NEXT: .byte 23 # 0x17
8-
; CHECK-NEXT: .byte 23 # 0x17
9-
; CHECK-NEXT: .byte 23 # 0x17
10-
; CHECK-NEXT: .byte 23 # 0x17
11-
; CHECK-NEXT: .byte 23 # 0x17
12-
; CHECK-NEXT: .byte 23 # 0x17
13-
; CHECK-NEXT: .byte 0 # 0x0
14-
; CHECK-NEXT: .byte 0 # 0x0
15-
; CHECK-NEXT: .byte 0 # 0x0
16-
; CHECK-NEXT: .byte 0 # 0x0
17-
; CHECK-NEXT: .byte 0 # 0x0
18-
; CHECK-NEXT: .byte 0 # 0x0
19-
; CHECK-NEXT: .byte 0 # 0x0
20-
; CHECK-NEXT: .byte 0 # 0x0
4+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) {
215
; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
226
; CHECK: # %bb.0:
237
; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha
@@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
2913
; CHECK-NEXT: xxperm v2, v3, vs0
3014
; CHECK-NEXT: lxv vs0, 0(r3)
3115
; CHECK-NEXT: xxland v2, v2, vs0
32-
; CHECK-NEXT: vcmpequb v2, v2, v3
33-
; CHECK-NEXT: xxlnor v2, v2, v2
16+
; CHECK-NEXT: vcmpgtub v2, v2, v3
3417
; CHECK-NEXT: blr
3518
%a4 = extractelement <16 x i8> %0, i64 7
3619
%a5 = zext i8 %a4 to i16
@@ -43,24 +26,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
4326
ret <16 x i8> %a11
4427
}
4528

46-
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
47-
; CHECK: .LCPI1_0:
48-
; CHECK-NEXT: .byte 23 # 0x17
49-
; CHECK-NEXT: .byte 23 # 0x17
50-
; CHECK-NEXT: .byte 23 # 0x17
51-
; CHECK-NEXT: .byte 23 # 0x17
52-
; CHECK-NEXT: .byte 23 # 0x17
53-
; CHECK-NEXT: .byte 23 # 0x17
54-
; CHECK-NEXT: .byte 23 # 0x17
55-
; CHECK-NEXT: .byte 23 # 0x17
56-
; CHECK-NEXT: .byte 0 # 0x0
57-
; CHECK-NEXT: .byte 0 # 0x0
58-
; CHECK-NEXT: .byte 0 # 0x0
59-
; CHECK-NEXT: .byte 0 # 0x0
60-
; CHECK-NEXT: .byte 0 # 0x0
61-
; CHECK-NEXT: .byte 0 # 0x0
62-
; CHECK-NEXT: .byte 0 # 0x0
63-
; CHECK-NEXT: .byte 0 # 0x0
29+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) {
6430
; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
6531
; CHECK: # %bb.0:
6632
; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha
@@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
7238
; CHECK-NEXT: xxperm v2, v3, vs0
7339
; CHECK-NEXT: lxv vs0, 0(r3)
7440
; CHECK-NEXT: xxland v2, v2, vs0
75-
; CHECK-NEXT: vcmpequb v2, v2, v3
76-
; CHECK-NEXT: xxlnor v2, v2, v2
41+
; CHECK-NEXT: vcmpgtub v2, v2, v3
7742
; CHECK-NEXT: blr
7843
%a4 = extractelement <16 x i8> %0, i64 7
7944
%a5 = zext i8 %a4 to i32
@@ -86,24 +51,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
8651
ret <16 x i8> %a11
8752
}
8853

89-
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
90-
; CHECK: .LCPI2_0:
91-
; CHECK-NEXT: .byte 22 # 0x16
92-
; CHECK-NEXT: .byte 23 # 0x17
93-
; CHECK-NEXT: .byte 22 # 0x16
94-
; CHECK-NEXT: .byte 23 # 0x17
95-
; CHECK-NEXT: .byte 22 # 0x16
96-
; CHECK-NEXT: .byte 23 # 0x17
97-
; CHECK-NEXT: .byte 22 # 0x16
98-
; CHECK-NEXT: .byte 23 # 0x17
99-
; CHECK-NEXT: .byte 0 # 0x0
100-
; CHECK-NEXT: .byte 0 # 0x0
101-
; CHECK-NEXT: .byte 0 # 0x0
102-
; CHECK-NEXT: .byte 0 # 0x0
103-
; CHECK-NEXT: .byte 0 # 0x0
104-
; CHECK-NEXT: .byte 0 # 0x0
105-
; CHECK-NEXT: .byte 0 # 0x0
106-
; CHECK-NEXT: .byte 0 # 0x0
54+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) {
10755
; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
10856
; CHECK: # %bb.0:
10957
; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
@@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0
11563
; CHECK-NEXT: xxperm v2, v3, vs0
11664
; CHECK-NEXT: lxv vs0, 0(r3)
11765
; CHECK-NEXT: xxland v2, v2, vs0
118-
; CHECK-NEXT: vcmpequb v2, v2, v3
119-
; CHECK-NEXT: xxlnor v2, v2, v2
66+
; CHECK-NEXT: vcmpgtub v2, v2, v3
12067
; CHECK-NEXT: blr
12168
%a4 = extractelement <8 x i16> %0, i64 3
12269
%a5 = zext i16 %a4 to i32

llvm/test/CodeGen/PowerPC/recipest.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1034,7 +1034,7 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
10341034
; CHECK-P7-NEXT: vmaddfp v5, v5, v0, v3
10351035
; CHECK-P7-NEXT: vmaddfp v3, v5, v4, v3
10361036
; CHECK-P7-NEXT: vxor v4, v4, v4
1037-
; CHECK-P7-NEXT: vcmpeqfp v2, v2, v4
1037+
; CHECK-P7-NEXT: vcmpgefp v2, v4, v2
10381038
; CHECK-P7-NEXT: vnot v2, v2
10391039
; CHECK-P7-NEXT: vand v2, v2, v3
10401040
; CHECK-P7-NEXT: blr

llvm/test/CodeGen/PowerPC/setcc-logic.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
367367
; CHECK: # %bb.0:
368368
; CHECK-NEXT: xxlor v2, v2, v3
369369
; CHECK-NEXT: xxlxor v3, v3, v3
370-
; CHECK-NEXT: vcmpequw v2, v2, v3
371-
; CHECK-NEXT: xxlnor v2, v2, v2
370+
; CHECK-NEXT: vcmpgtuw v2, v2, v3
372371
; CHECK-NEXT: blr
373372
%a = icmp ne <4 x i32> %P, zeroinitializer
374373
%b = icmp ne <4 x i32> %Q, zeroinitializer

llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
1717
; PWR5-NEXT: vaddubm v3, v2, v3
1818
; PWR5-NEXT: vand v2, v2, v3
1919
; PWR5-NEXT: vxor v3, v3, v3
20-
; PWR5-NEXT: vcmpequb v2, v2, v3
21-
; PWR5-NEXT: vnot v2, v2
20+
; PWR5-NEXT: vcmpgtub v2, v2, v3
2221
; PWR5-NEXT: blr
2322
;
2423
; PWR6-LABEL: ugt_1_v16i8:
@@ -27,8 +26,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
2726
; PWR6-NEXT: vaddubm v3, v2, v3
2827
; PWR6-NEXT: vand v2, v2, v3
2928
; PWR6-NEXT: vxor v3, v3, v3
30-
; PWR6-NEXT: vcmpequb v2, v2, v3
31-
; PWR6-NEXT: vnot v2, v2
29+
; PWR6-NEXT: vcmpgtub v2, v2, v3
3230
; PWR6-NEXT: blr
3331
;
3432
; PWR7-LABEL: ugt_1_v16i8:
@@ -37,8 +35,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
3735
; PWR7-NEXT: vaddubm v3, v2, v3
3836
; PWR7-NEXT: xxland v2, v2, v3
3937
; PWR7-NEXT: xxlxor v3, v3, v3
40-
; PWR7-NEXT: vcmpequb v2, v2, v3
41-
; PWR7-NEXT: xxlnor v2, v2, v2
38+
; PWR7-NEXT: vcmpgtub v2, v2, v3
4239
; PWR7-NEXT: blr
4340
;
4441
; PWR8-LABEL: ugt_1_v16i8:
@@ -1085,8 +1082,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
10851082
; PWR5-NEXT: vadduhm v3, v2, v3
10861083
; PWR5-NEXT: vand v2, v2, v3
10871084
; PWR5-NEXT: vxor v3, v3, v3
1088-
; PWR5-NEXT: vcmpequh v2, v2, v3
1089-
; PWR5-NEXT: vnot v2, v2
1085+
; PWR5-NEXT: vcmpgtuh v2, v2, v3
10901086
; PWR5-NEXT: blr
10911087
;
10921088
; PWR6-LABEL: ugt_1_v8i16:
@@ -1095,8 +1091,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
10951091
; PWR6-NEXT: vadduhm v3, v2, v3
10961092
; PWR6-NEXT: vand v2, v2, v3
10971093
; PWR6-NEXT: vxor v3, v3, v3
1098-
; PWR6-NEXT: vcmpequh v2, v2, v3
1099-
; PWR6-NEXT: vnot v2, v2
1094+
; PWR6-NEXT: vcmpgtuh v2, v2, v3
11001095
; PWR6-NEXT: blr
11011096
;
11021097
; PWR7-LABEL: ugt_1_v8i16:
@@ -1105,8 +1100,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
11051100
; PWR7-NEXT: vadduhm v3, v2, v3
11061101
; PWR7-NEXT: xxland v2, v2, v3
11071102
; PWR7-NEXT: xxlxor v3, v3, v3
1108-
; PWR7-NEXT: vcmpequh v2, v2, v3
1109-
; PWR7-NEXT: xxlnor v2, v2, v2
1103+
; PWR7-NEXT: vcmpgtuh v2, v2, v3
11101104
; PWR7-NEXT: blr
11111105
;
11121106
; PWR8-LABEL: ugt_1_v8i16:
@@ -4105,8 +4099,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
41054099
; PWR5-NEXT: vadduwm v3, v2, v3
41064100
; PWR5-NEXT: vand v2, v2, v3
41074101
; PWR5-NEXT: vxor v3, v3, v3
4108-
; PWR5-NEXT: vcmpequw v2, v2, v3
4109-
; PWR5-NEXT: vnot v2, v2
4102+
; PWR5-NEXT: vcmpgtuw v2, v2, v3
41104103
; PWR5-NEXT: blr
41114104
;
41124105
; PWR6-LABEL: ugt_1_v4i32:
@@ -4115,8 +4108,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
41154108
; PWR6-NEXT: vadduwm v3, v2, v3
41164109
; PWR6-NEXT: vand v2, v2, v3
41174110
; PWR6-NEXT: vxor v3, v3, v3
4118-
; PWR6-NEXT: vcmpequw v2, v2, v3
4119-
; PWR6-NEXT: vnot v2, v2
4111+
; PWR6-NEXT: vcmpgtuw v2, v2, v3
41204112
; PWR6-NEXT: blr
41214113
;
41224114
; PWR7-LABEL: ugt_1_v4i32:
@@ -4125,8 +4117,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
41254117
; PWR7-NEXT: vadduwm v3, v2, v3
41264118
; PWR7-NEXT: xxland v2, v2, v3
41274119
; PWR7-NEXT: xxlxor v3, v3, v3
4128-
; PWR7-NEXT: vcmpequw v2, v2, v3
4129-
; PWR7-NEXT: xxlnor v2, v2, v2
4120+
; PWR7-NEXT: vcmpgtuw v2, v2, v3
41304121
; PWR7-NEXT: blr
41314122
;
41324123
; PWR8-LABEL: ugt_1_v4i32:
@@ -11971,9 +11962,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
1197111962
; PWR7-NEXT: addis r3, r2, .LCPI100_0@toc@ha
1197211963
; PWR7-NEXT: addi r3, r3, .LCPI100_0@toc@l
1197311964
; PWR7-NEXT: xxland v2, v2, vs0
11974-
; PWR7-NEXT: vcmpequw v2, v2, v3
11965+
; PWR7-NEXT: vcmpgtuw v2, v2, v3
1197511966
; PWR7-NEXT: lxvw4x v3, 0, r3
11976-
; PWR7-NEXT: xxlnor v2, v2, v2
1197711967
; PWR7-NEXT: vperm v3, v2, v2, v3
1197811968
; PWR7-NEXT: xxlor v2, v3, v2
1197911969
; PWR7-NEXT: blr

0 commit comments

Comments
 (0)