|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 | ; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s |
3 | 3 |
|
4 | | -define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) { |
5 | | -; CHECK-LABEL: test_avgceil_u: |
| 4 | +define <8 x i8> @avgceil_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) { |
| 5 | +; CHECK-LABEL: avgceil_u_i8_to_i16: |
6 | 6 | ; CHECK: // %bb.0: |
7 | | -; CHECK-NEXT: bic v0.8h, #255, lsl #8 |
8 | | -; CHECK-NEXT: bic v1.8h, #255, lsl #8 |
9 | | -; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h |
10 | | -; CHECK-NEXT: xtn v0.8b, v0.8h |
| 7 | +; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b |
11 | 8 | ; CHECK-NEXT: ret |
12 | | - %ta16 = and <8 x i16> %a, splat (i16 255) |
13 | | - %tb16 = and <8 x i16> %b, splat (i16 255) |
14 | | - %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16) |
15 | | - %res = trunc <8 x i16> %avg16 to <8 x i8> |
16 | | - ret <8 x i8> %res |
| 9 | + %a16 = zext <8 x i8> %a to <8 x i16> |
| 10 | + %b16 = zext <8 x i8> %b to <8 x i16> |
| 11 | + %avg16 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16) |
| 12 | + %r = trunc <8 x i16> %avg16 to <8 x i8> |
| 13 | + ret <8 x i8> %r |
17 | 14 | } |
18 | 15 |
|
19 | | -define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) { |
| 16 | + |
| 17 | +define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) { |
20 | 18 | ; CHECK-LABEL: test_avgceil_s: |
21 | 19 | ; CHECK: // %bb.0: |
22 | | -; CHECK-NEXT: movi v2.8h, #127 |
23 | | -; CHECK-NEXT: mvni v3.8h, #127 |
24 | | -; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h |
25 | | -; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h |
26 | | -; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h |
27 | | -; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h |
28 | | -; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h |
29 | | -; CHECK-NEXT: xtn v0.8b, v0.8h |
| 20 | +; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b |
30 | 21 | ; CHECK-NEXT: ret |
31 | | - %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127)) |
32 | | - %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128)) |
33 | | - %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127)) |
34 | | - %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128)) |
35 | | - %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped) |
36 | | - %res = trunc <8 x i16> %avg16 to <8 x i8> |
| 22 | + %a16 = sext <8 x i8> %a to <8 x i16> |
| 23 | + %b16 = sext <8 x i8> %b to <8 x i16> |
| 24 | + %avg16 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16) |
| 25 | + %res = trunc <8 x i16> %avg16 to <8 x i8> |
37 | 26 | ret <8 x i8> %res |
38 | 27 | } |
39 | 28 |
|
40 | | -define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) { |
41 | | -; CHECK-LABEL: test_avgfloor_u: |
| 29 | +define <8 x i8> @avgfloor_u_from_intrin(<8 x i8> %a, <8 x i8> %b) { |
| 30 | +; CHECK-LABEL: avgfloor_u_from_intrin: |
42 | 31 | ; CHECK: // %bb.0: |
43 | | -; CHECK-NEXT: bic v0.8h, #255, lsl #8 |
44 | | -; CHECK-NEXT: bic v1.8h, #255, lsl #8 |
45 | | -; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h |
46 | | -; CHECK-NEXT: xtn v0.8b, v0.8h |
| 32 | +; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b |
47 | 33 | ; CHECK-NEXT: ret |
48 | | - %ta16 = and <8 x i16> %a, splat (i16 255) |
49 | | - %tb16 = and <8 x i16> %b, splat (i16 255) |
50 | | - %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16) |
| 34 | + %a16 = zext <8 x i8> %a to <8 x i16> |
| 35 | + %b16 = zext <8 x i8> %b to <8 x i16> |
| 36 | + %avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %a16, <8 x i16> %b16) |
51 | 37 | %res = trunc <8 x i16> %avg16 to <8 x i8> |
52 | 38 | ret <8 x i8> %res |
53 | 39 | } |
54 | 40 |
|
55 | | -define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) { |
| 41 | +define <8 x i8> @test_avgfloor_s(<8 x i8> %a, <8 x i8> %b) { |
56 | 42 | ; CHECK-LABEL: test_avgfloor_s: |
57 | 43 | ; CHECK: // %bb.0: |
58 | | -; CHECK-NEXT: movi v2.8h, #127 |
59 | | -; CHECK-NEXT: mvni v3.8h, #127 |
60 | | -; CHECK-NEXT: smin v0.8h, v0.8h, v2.8h |
61 | | -; CHECK-NEXT: smin v1.8h, v1.8h, v2.8h |
62 | | -; CHECK-NEXT: smax v0.8h, v0.8h, v3.8h |
63 | | -; CHECK-NEXT: smax v1.8h, v1.8h, v3.8h |
64 | | -; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h |
65 | | -; CHECK-NEXT: xtn v0.8b, v0.8h |
| 44 | +; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b |
66 | 45 | ; CHECK-NEXT: ret |
67 | | - %ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127)) |
68 | | - %ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128)) |
69 | | - %tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127)) |
70 | | - %tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128)) |
71 | | - %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped) |
72 | | - %res = trunc <8 x i16> %avg16 to <8 x i8> |
| 46 | + %a16 = sext <8 x i8> %a to <8 x i16> |
| 47 | + %b16 = sext <8 x i8> %b to <8 x i16> |
| 48 | + %avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %a16, <8 x i16> %b16) |
| 49 | + %res = trunc <8 x i16> %avg16 to <8 x i8> |
73 | 50 | ret <8 x i8> %res |
74 | 51 | } |
75 | 52 |
|
76 | | -declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) |
77 | | -declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) |
78 | | -declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) |
79 | | -declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) |
80 | 53 |
|
0 commit comments