|
8 | 8 | ; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ |
9 | 9 | ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s |
10 | 10 |
|
11 | | -; The addition of vector `A` with vector of 1s currently uses `vspltisw` to generate vector of 1s followed by add operation. |
| 11 | +; Optimized version which `xxleqv` and `vsubu` to generate vector of -1s to leverage the identity A - (-1) = A + 1. |
12 | 12 |
|
13 | 13 | ; Function for the vector type v2i64 `a + {1, 1}` |
14 | 14 | define <2 x i64> @test_v2i64(<2 x i64> %a) { |
15 | 15 | ; CHECK-LABEL: test_v2i64: |
16 | 16 | ; CHECK: # %bb.0: # %entry |
17 | | -; CHECK-NEXT: vspltisw v3, 1 |
18 | | -; CHECK-NEXT: vupklsw v3, v3 |
19 | | -; CHECK-NEXT: vaddudm v2, v2, v3 |
| 17 | +; CHECK-NEXT: xxleqv v3, v3, v3 |
| 18 | +; CHECK-NEXT: vsubudm v2, v2, v3 |
20 | 19 | ; CHECK-NEXT: blr |
21 | 20 | entry: |
22 | 21 | %add = add <2 x i64> %a, splat (i64 1) |
|
27 | 26 | define <4 x i32> @test_v4i32(<4 x i32> %a) { |
28 | 27 | ; CHECK-LABEL: test_v4i32: |
29 | 28 | ; CHECK: # %bb.0: # %entry |
30 | | -; CHECK-NEXT: vspltisw v3, 1 |
31 | | -; CHECK-NEXT: vadduwm v2, v2, v3 |
| 29 | +; CHECK-NEXT: xxleqv v3, v3, v3 |
| 30 | +; CHECK-NEXT: vsubuwm v2, v2, v3 |
32 | 31 | ; CHECK-NEXT: blr |
33 | 32 | entry: |
34 | 33 | %add = add <4 x i32> %a, splat (i32 1) |
|
39 | 38 | define <8 x i16> @test_v8i16(<8 x i16> %a) { |
40 | 39 | ; CHECK-LABEL: test_v8i16: |
41 | 40 | ; CHECK: # %bb.0: # %entry |
42 | | -; CHECK-NEXT: vspltish v3, 1 |
43 | | -; CHECK-NEXT: vadduhm v2, v2, v3 |
| 41 | +; CHECK-NEXT: xxleqv v3, v3, v3 |
| 42 | +; CHECK-NEXT: vsubuhm v2, v2, v3 |
44 | 43 | ; CHECK-NEXT: blr |
45 | 44 | entry: |
46 | 45 | %add = add <8 x i16> %a, splat (i16 1) |
|
51 | 50 | define <16 x i8> @test_16i8(<16 x i8> %a) { |
52 | 51 | ; CHECK-LABEL: test_16i8: |
53 | 52 | ; CHECK: # %bb.0: # %entry |
54 | | -; CHECK-NEXT: xxspltib v3, 1 |
55 | | -; CHECK-NEXT: vaddubm v2, v2, v3 |
| 53 | +; CHECK-NEXT: xxleqv v3, v3, v3 |
| 54 | +; CHECK-NEXT: vsububm v2, v2, v3 |
56 | 55 | ; CHECK-NEXT: blr |
57 | 56 | entry: |
58 | 57 | %add = add <16 x i8> %a, splat (i8 1) |
|
0 commit comments