11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2- ; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
2+ ; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK-SD
3+ ; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon -global-isel < %s | FileCheck %s --check-prefixes=CHECK-GI
34
45define <8 x i8 > @avgceil_u_i8_to_i16 (<8 x i8 > %a , <8 x i8 > %b ) {
5- ; CHECK-LABEL: avgceil_u_i8_to_i16:
6- ; CHECK: // %bb.0:
7- ; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
8- ; CHECK-NEXT: ret
6+ ; CHECK-SD-LABEL: avgceil_u_i8_to_i16:
7+ ; CHECK-SD: // %bb.0:
8+ ; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
9+ ; CHECK-SD-NEXT: ret
10+ ;
11+ ; CHECK-GI-LABEL: avgceil_u_i8_to_i16:
12+ ; CHECK-GI: // %bb.0:
13+ ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
14+ ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
15+ ; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
16+ ; CHECK-GI-NEXT: xtn v0.8b, v0.8h
17+ ; CHECK-GI-NEXT: ret
918 %a16 = zext <8 x i8 > %a to <8 x i16 >
1019 %b16 = zext <8 x i8 > %b to <8 x i16 >
1120 %avg16 = call <8 x i16 > @llvm.aarch64.neon.urhadd.v8i16 (<8 x i16 > %a16 , <8 x i16 > %b16 )
@@ -15,10 +24,18 @@ define <8 x i8> @avgceil_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
1524
1625
1726define <8 x i8 > @test_avgceil_s (<8 x i8 > %a , <8 x i8 > %b ) {
18- ; CHECK-LABEL: test_avgceil_s:
19- ; CHECK: // %bb.0:
20- ; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
21- ; CHECK-NEXT: ret
27+ ; CHECK-SD-LABEL: test_avgceil_s:
28+ ; CHECK-SD: // %bb.0:
29+ ; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
30+ ; CHECK-SD-NEXT: ret
31+ ;
32+ ; CHECK-GI-LABEL: test_avgceil_s:
33+ ; CHECK-GI: // %bb.0:
34+ ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
35+ ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
36+ ; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
37+ ; CHECK-GI-NEXT: xtn v0.8b, v0.8h
38+ ; CHECK-GI-NEXT: ret
2239 %a16 = sext <8 x i8 > %a to <8 x i16 >
2340 %b16 = sext <8 x i8 > %b to <8 x i16 >
2441 %avg16 = call <8 x i16 > @llvm.aarch64.neon.srhadd.v8i16 (<8 x i16 > %a16 , <8 x i16 > %b16 )
@@ -27,10 +44,18 @@ define <8 x i8> @test_avgceil_s(<8 x i8> %a, <8 x i8> %b) {
2744}
2845
2946define <8 x i8 > @avgfloor_u_i8_to_i16 (<8 x i8 > %a , <8 x i8 > %b ) {
30- ; CHECK-LABEL: avgfloor_u_i8_to_i16:
31- ; CHECK: // %bb.0:
32- ; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
33- ; CHECK-NEXT: ret
47+ ; CHECK-SD-LABEL: avgfloor_u_i8_to_i16:
48+ ; CHECK-SD: // %bb.0:
49+ ; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
50+ ; CHECK-SD-NEXT: ret
51+ ;
52+ ; CHECK-GI-LABEL: avgfloor_u_i8_to_i16:
53+ ; CHECK-GI: // %bb.0:
54+ ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
55+ ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
56+ ; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
57+ ; CHECK-GI-NEXT: xtn v0.8b, v0.8h
58+ ; CHECK-GI-NEXT: ret
3459 %a16 = zext <8 x i8 > %a to <8 x i16 >
3560 %b16 = zext <8 x i8 > %b to <8 x i16 >
3661 %avg16 = call <8 x i16 > @llvm.aarch64.neon.uhadd.v8i16 (<8 x i16 > %a16 , <8 x i16 > %b16 )
@@ -39,15 +64,21 @@ define <8 x i8> @avgfloor_u_i8_to_i16(<8 x i8> %a, <8 x i8> %b) {
3964}
4065
4166define <8 x i8 > @test_avgfloor_s (<8 x i8 > %a , <8 x i8 > %b ) {
42- ; CHECK-LABEL: test_avgfloor_s:
43- ; CHECK: // %bb.0:
44- ; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
45- ; CHECK-NEXT: ret
67+ ; CHECK-SD-LABEL: test_avgfloor_s:
68+ ; CHECK-SD: // %bb.0:
69+ ; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
70+ ; CHECK-SD-NEXT: ret
71+ ;
72+ ; CHECK-GI-LABEL: test_avgfloor_s:
73+ ; CHECK-GI: // %bb.0:
74+ ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
75+ ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
76+ ; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
77+ ; CHECK-GI-NEXT: xtn v0.8b, v0.8h
78+ ; CHECK-GI-NEXT: ret
4679 %a16 = sext <8 x i8 > %a to <8 x i16 >
4780 %b16 = sext <8 x i8 > %b to <8 x i16 >
4881 %avg16 = call <8 x i16 > @llvm.aarch64.neon.shadd.v8i16 (<8 x i16 > %a16 , <8 x i16 > %b16 )
4982 %res = trunc <8 x i16 > %avg16 to <8 x i8 >
5083 ret <8 x i8 > %res
5184}
52-
53-
0 commit comments