Skip to content

Commit 5de66e2

Browse files
author
himadhith
committed
[PowerPC] Replace vspltisw instruction with xxleqv as generation of vector of -1s is cheaper than vector of 1s
1 parent f026cb0 commit 5de66e2

File tree

2 files changed

+36
-0
lines changed

2 files changed

+36
-0
lines changed

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3627,6 +3627,10 @@ def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
36273627
immSExt5NonZero:$A, immSExt5NonZero:$A)),
36283628
(v4i32 (VSPLTISW imm:$A))>;
36293629

3630+
// Optimize for vector of 1s addition operation
3631+
def : Pat<(add v4i32:$A, (build_vector (i32 1), (i32 1), (i32 1), (i32 1))),
3632+
(VSUBUWM $A, (v4i32 (COPY_TO_REGCLASS (XXLEQVOnes), VSRC)))>;
3633+
36303634
// Splat loads.
36313635
def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
36323636
(v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>;
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64LE
4+
5+
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
6+
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64
7+
8+
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
9+
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_32
10+
11+
define dso_local noundef <4 x i32> @test1(<4 x i32> %a) {
12+
; POWERPC_64LE-LABEL: test1:
13+
; POWERPC_64LE: # %bb.0: # %entry
14+
; POWERPC_64LE-NEXT: xxleqv v3, v3, v3
15+
; POWERPC_64LE-NEXT: vsubuwm v2, v2, v3
16+
; POWERPC_64LE-NEXT: blr
17+
;
18+
; POWERPC_64-LABEL: test1:
19+
; POWERPC_64: # %bb.0: # %entry
20+
; POWERPC_64-NEXT: xxleqv v3, v3, v3
21+
; POWERPC_64-NEXT: vsubuwm v2, v2, v3
22+
; POWERPC_64-NEXT: blr
23+
;
24+
; POWERPC_32-LABEL: test1:
25+
; POWERPC_32: # %bb.0: # %entry
26+
; POWERPC_32-NEXT: xxleqv v3, v3, v3
27+
; POWERPC_32-NEXT: vsubuwm v2, v2, v3
28+
; POWERPC_32-NEXT: blr
29+
entry:
30+
%add = add <4 x i32> %a, splat (i32 1)
31+
ret <4 x i32> %add
32+
}

0 commit comments

Comments
 (0)