Skip to content

Commit 2e7ea9c

Browse files
tonykuttaiTony Varghese
andauthored
[PowerPC] Exploit xxeval instruction for operations of the form ternary(A,X,B) and ternary(A,X,C). (#152956)
Adds support for ternary equivalent operations of the form `ternary(A, X, B)` and `ternary(A, X, C)` where `X=[and(B,C)| nor(B,C)| eqv(B,C)| nand(B,C)]`. The following are the patterns involved and the imm values: | **Operation** | **Immediate Value** | |----------------------------|---------------------| | ternary(A, and(B,C), B) | 49 | | ternary(A, nor(B,C), B) | 56 | | ternary(A, eqv(B,C), B) | 57 | | ternary(A, nand(B,C), B) | 62 | | | | | ternary(A, and(B,C), C) | 81 | | ternary(A, nor(B,C), C) | 88 | | ternary(A, eqv(B,C), C) | 89 | | ternary(A, nand(B,C), C) | 94 | eg. `xxeval XT, XA, XB, XC, 49` - performs `XA ? and(XB, XC) : B`and places the result in `XT`. This is the continuation of [[PowerPC] Exploit xxeval instruction for ternary patterns - ternary(A, X, and(B,C))](#141733 (comment)). --------- Co-authored-by: Tony Varghese <[email protected]>
1 parent 0b42e11 commit 2e7ea9c

File tree

3 files changed

+93
-66
lines changed

3 files changed

+93
-66
lines changed

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,6 +2230,13 @@ def VEqv
22302230
(v4i32(bitconvert node:$a)),
22312231
(v4i32(bitconvert node:$b)))))]>;
22322232

2233+
// Vector NAND operation (not(and))
2234+
def VNand
2235+
: PatFrags<(ops node:$a, node:$b), [(vnot(and node:$a, node:$b)),
2236+
(bitconvert(vnot(and
2237+
(v4i32(bitconvert node:$a)),
2238+
(v4i32(bitconvert node:$b)))))]>;
2239+
22332240
// =============================================================================
22342241
// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectAnd
22352242
// This class matches the equivalent Ternary Operation: A ? f(B,C) : AND(B,C)
@@ -2265,6 +2272,56 @@ multiclass XXEvalTernarySelectAnd<ValueType Vt> {
22652272
Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VAnd Vt:$vB, Vt:$vC)), 28>;
22662273
}
22672274

2275+
// =============================================================================
2276+
// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectB
2277+
// This class matches the equivalent Ternary Operation: A ? f(B,C) : B
2278+
// and emit the corresponding xxeval instruction with the imm value.
2279+
//
2280+
// The patterns implement xxeval vector select operations where:
2281+
// - A is the selector vector
2282+
// - f(B,C) is the "true" case op on vectors B and C (AND, NOR, EQV, NAND)
2283+
// - B is the "false" case operand (vector B)
2284+
//
2285+
// Note: Patterns (A? C : B) and (A? not(C) : B) are not considered
2286+
// for XXEVAL instruction (4 Cycle) as XXSEL (3 cycle) instruction performs
2287+
// better.
2288+
// =============================================================================
2289+
multiclass XXEvalTernarySelectB<ValueType Vt>{
2290+
// Pattern: (A ? AND(B,C) : B) XXEVAL immediate value: 49
2291+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), Vt:$vB), 49>;
2292+
// Pattern: (A ? NOR(B,C) : B) XXEVAL immediate value: 56
2293+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), Vt:$vB), 56>;
2294+
// Pattern: (A ? EQV(B,C) : B) XXEVAL immediate value: 57
2295+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), Vt:$vB), 57>;
2296+
// Pattern: (A ? NAND(B,C) : B) XXEVAL immediate value: 62
2297+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), Vt:$vB), 62>;
2298+
}
2299+
2300+
// =============================================================================
2301+
// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectC
2302+
// This class matches the equivalent Ternary Operation: A ? f(B,C) : C
2303+
// and emit the corresponding xxeval instruction with the imm value.
2304+
//
2305+
// The patterns implement xxeval vector select operations where:
2306+
// - A is the selector vector
2307+
// - f(B,C) is the "true" case op on vectors B and C (AND, NOR, EQV, NAND)
2308+
// - C is the "false" case operand (vector C)
2309+
//
2310+
// Note: Patterns (A? B : C) and (A? not(B) : C) are not considered
2311+
// for XXEVAL instruction (4 Cycle) as XXSEL (3 cycle) instruction performs
2312+
// better.
2313+
// =============================================================================
2314+
multiclass XXEvalTernarySelectC<ValueType Vt>{
2315+
// Pattern: (A ? AND(B,C) : C) XXEVAL immediate value: 81
2316+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), Vt:$vC), 81>;
2317+
// Pattern: (A ? NOR(B,C) : C) XXEVAL immediate value: 88
2318+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), Vt:$vC), 88>;
2319+
// Pattern: (A ? EQV(B,C) : C) XXEVAL immediate value: 89
2320+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), Vt:$vC), 89>;
2321+
// Pattern: (A ? NAND(B,C) : C) XXEVAL immediate value: 94
2322+
def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), Vt:$vC), 94>;
2323+
}
2324+
22682325
let Predicates = [PrefixInstrs, HasP10Vector] in {
22692326
let AddedComplexity = 400 in {
22702327
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
@@ -2376,6 +2433,8 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
23762433
// XXEval Patterns for ternary Operations.
23772434
foreach Ty = [v4i32, v2i64, v8i16, v16i8] in {
23782435
defm : XXEvalTernarySelectAnd<Ty>;
2436+
defm : XXEvalTernarySelectB<Ty>;
2437+
defm : XXEvalTernarySelectC<Ty>;
23792438
}
23802439

23812440
// Anonymous patterns to select prefixed VSX loads and stores.

llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; Test file to verify the emission of Vector Selection instructions when ternary operators are used.
2+
; Test file to verify the emission of Vector Evaluation instructions when ternary operators are used.
33

44
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
55
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
1515
; CHECK-LABEL: ternary_A_and_BC_B_4x32:
1616
; CHECK: # %bb.0: # %entry
1717
; CHECK-NEXT: xxleqv v5, v5, v5
18-
; CHECK-NEXT: xxland vs0, v3, v4
1918
; CHECK-NEXT: vslw v2, v2, v5
2019
; CHECK-NEXT: vsraw v2, v2, v5
21-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
20+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
2221
; CHECK-NEXT: blr
2322
entry:
2423
%and = and <4 x i32> %B, %C
@@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
3130
; CHECK-LABEL: ternary_A_and_BC_B_2x64:
3231
; CHECK: # %bb.0: # %entry
3332
; CHECK-NEXT: xxlxor v5, v5, v5
34-
; CHECK-NEXT: xxland vs0, v3, v4
3533
; CHECK-NEXT: xxsplti32dx v5, 1, 63
3634
; CHECK-NEXT: vsld v2, v2, v5
3735
; CHECK-NEXT: vsrad v2, v2, v5
38-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
36+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
3937
; CHECK-NEXT: blr
4038
entry:
4139
%and = and <2 x i64> %B, %C
@@ -48,10 +46,9 @@ define <16 x i8> @ternary_A_and_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
4846
; CHECK-LABEL: ternary_A_and_BC_B_16x8:
4947
; CHECK: # %bb.0: # %entry
5048
; CHECK-NEXT: xxspltib v5, 7
51-
; CHECK-NEXT: xxland vs0, v3, v4
5249
; CHECK-NEXT: vslb v2, v2, v5
5350
; CHECK-NEXT: vsrab v2, v2, v5
54-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
51+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
5552
; CHECK-NEXT: blr
5653
entry:
5754
%and = and <16 x i8> %B, %C
@@ -64,10 +61,9 @@ define <8 x i16> @ternary_A_and_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
6461
; CHECK-LABEL: ternary_A_and_BC_B_8x16:
6562
; CHECK: # %bb.0: # %entry
6663
; CHECK-NEXT: xxspltiw v5, 983055
67-
; CHECK-NEXT: xxland vs0, v3, v4
6864
; CHECK-NEXT: vslh v2, v2, v5
6965
; CHECK-NEXT: vsrah v2, v2, v5
70-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
66+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
7167
; CHECK-NEXT: blr
7268
entry:
7369
%and = and <8 x i16> %B, %C
@@ -80,10 +76,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
8076
; CHECK-LABEL: ternary_A_nor_BC_B_4x32:
8177
; CHECK: # %bb.0: # %entry
8278
; CHECK-NEXT: xxleqv v5, v5, v5
83-
; CHECK-NEXT: xxlnor vs0, v3, v4
8479
; CHECK-NEXT: vslw v2, v2, v5
8580
; CHECK-NEXT: vsraw v2, v2, v5
86-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
81+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
8782
; CHECK-NEXT: blr
8883
entry:
8984
%or = or <4 x i32> %B, %C
@@ -97,11 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
9792
; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
9893
; CHECK: # %bb.0: # %entry
9994
; CHECK-NEXT: xxlxor v5, v5, v5
100-
; CHECK-NEXT: xxlnor vs0, v3, v4
10195
; CHECK-NEXT: xxsplti32dx v5, 1, 63
10296
; CHECK-NEXT: vsld v2, v2, v5
10397
; CHECK-NEXT: vsrad v2, v2, v5
104-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
98+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
10599
; CHECK-NEXT: blr
106100
entry:
107101
%or = or <2 x i64> %B, %C
@@ -115,10 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
115109
; CHECK-LABEL: ternary_A_nor_BC_B_16x8:
116110
; CHECK: # %bb.0: # %entry
117111
; CHECK-NEXT: xxspltib v5, 7
118-
; CHECK-NEXT: xxlnor vs0, v3, v4
119112
; CHECK-NEXT: vslb v2, v2, v5
120113
; CHECK-NEXT: vsrab v2, v2, v5
121-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
114+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
122115
; CHECK-NEXT: blr
123116
entry:
124117
%or = or <16 x i8> %B, %C
@@ -132,10 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
132125
; CHECK-LABEL: ternary_A_nor_BC_B_8x16:
133126
; CHECK: # %bb.0: # %entry
134127
; CHECK-NEXT: xxspltiw v5, 983055
135-
; CHECK-NEXT: xxlnor vs0, v3, v4
136128
; CHECK-NEXT: vslh v2, v2, v5
137129
; CHECK-NEXT: vsrah v2, v2, v5
138-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
130+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
139131
; CHECK-NEXT: blr
140132
entry:
141133
%or = or <8 x i16> %B, %C
@@ -149,10 +141,9 @@ define <4 x i32> @ternary_A_eqv_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
149141
; CHECK-LABEL: ternary_A_eqv_BC_B_4x32:
150142
; CHECK: # %bb.0: # %entry
151143
; CHECK-NEXT: xxleqv v5, v5, v5
152-
; CHECK-NEXT: xxleqv vs0, v3, v4
153144
; CHECK-NEXT: vslw v2, v2, v5
154145
; CHECK-NEXT: vsraw v2, v2, v5
155-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
146+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
156147
; CHECK-NEXT: blr
157148
entry:
158149
%xor = xor <4 x i32> %B, %C
@@ -166,11 +157,10 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
166157
; CHECK-LABEL: ternary_A_eqv_BC_B_2x64:
167158
; CHECK: # %bb.0: # %entry
168159
; CHECK-NEXT: xxlxor v5, v5, v5
169-
; CHECK-NEXT: xxleqv vs0, v3, v4
170160
; CHECK-NEXT: xxsplti32dx v5, 1, 63
171161
; CHECK-NEXT: vsld v2, v2, v5
172162
; CHECK-NEXT: vsrad v2, v2, v5
173-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
163+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
174164
; CHECK-NEXT: blr
175165
entry:
176166
%xor = xor <2 x i64> %B, %C
@@ -184,10 +174,9 @@ define <16 x i8> @ternary_A_eqv_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
184174
; CHECK-LABEL: ternary_A_eqv_BC_B_16x8:
185175
; CHECK: # %bb.0: # %entry
186176
; CHECK-NEXT: xxspltib v5, 7
187-
; CHECK-NEXT: xxleqv vs0, v3, v4
188177
; CHECK-NEXT: vslb v2, v2, v5
189178
; CHECK-NEXT: vsrab v2, v2, v5
190-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
179+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
191180
; CHECK-NEXT: blr
192181
entry:
193182
%xor = xor <16 x i8> %B, %C
@@ -201,10 +190,9 @@ define <8 x i16> @ternary_A_eqv_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %
201190
; CHECK-LABEL: ternary_A_eqv_BC_B_8x16:
202191
; CHECK: # %bb.0: # %entry
203192
; CHECK-NEXT: xxspltiw v5, 983055
204-
; CHECK-NEXT: xxleqv vs0, v3, v4
205193
; CHECK-NEXT: vslh v2, v2, v5
206194
; CHECK-NEXT: vsrah v2, v2, v5
207-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
195+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
208196
; CHECK-NEXT: blr
209197
entry:
210198
%xor = xor <8 x i16> %B, %C
@@ -218,10 +206,9 @@ define <4 x i32> @ternary_A_nand_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32>
218206
; CHECK-LABEL: ternary_A_nand_BC_B_4x32:
219207
; CHECK: # %bb.0: # %entry
220208
; CHECK-NEXT: xxleqv v5, v5, v5
221-
; CHECK-NEXT: xxlnand vs0, v3, v4
222209
; CHECK-NEXT: vslw v2, v2, v5
223210
; CHECK-NEXT: vsraw v2, v2, v5
224-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
211+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
225212
; CHECK-NEXT: blr
226213
entry:
227214
%and = and <4 x i32> %B, %C
@@ -235,11 +222,10 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
235222
; CHECK-LABEL: ternary_A_nand_BC_B_2x64:
236223
; CHECK: # %bb.0: # %entry
237224
; CHECK-NEXT: xxlxor v5, v5, v5
238-
; CHECK-NEXT: xxlnand vs0, v3, v4
239225
; CHECK-NEXT: xxsplti32dx v5, 1, 63
240226
; CHECK-NEXT: vsld v2, v2, v5
241227
; CHECK-NEXT: vsrad v2, v2, v5
242-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
228+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
243229
; CHECK-NEXT: blr
244230
entry:
245231
%and = and <2 x i64> %B, %C
@@ -253,10 +239,9 @@ define <16 x i8> @ternary_A_nand_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8>
253239
; CHECK-LABEL: ternary_A_nand_BC_B_16x8:
254240
; CHECK: # %bb.0: # %entry
255241
; CHECK-NEXT: xxspltib v5, 7
256-
; CHECK-NEXT: xxlnand vs0, v3, v4
257242
; CHECK-NEXT: vslb v2, v2, v5
258243
; CHECK-NEXT: vsrab v2, v2, v5
259-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
244+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
260245
; CHECK-NEXT: blr
261246
entry:
262247
%and = and <16 x i8> %B, %C
@@ -270,10 +255,9 @@ define <8 x i16> @ternary_A_nand_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16>
270255
; CHECK-LABEL: ternary_A_nand_BC_B_8x16:
271256
; CHECK: # %bb.0: # %entry
272257
; CHECK-NEXT: xxspltiw v5, 983055
273-
; CHECK-NEXT: xxlnand vs0, v3, v4
274258
; CHECK-NEXT: vslh v2, v2, v5
275259
; CHECK-NEXT: vsrah v2, v2, v5
276-
; CHECK-NEXT: xxsel v2, v3, vs0, v2
260+
; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
277261
; CHECK-NEXT: blr
278262
entry:
279263
%and = and <8 x i16> %B, %C

0 commit comments

Comments
 (0)