11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
33
4-
54define <8 x i8 > @test_avgceil_u (<8 x i16 > %a , <8 x i16 > %b ) {
65; CHECK-LABEL: test_avgceil_u:
76; CHECK: // %bb.0:
87; CHECK-NEXT: xtn v0.8b, v0.8h
98; CHECK-NEXT: xtn v1.8b, v1.8h
109; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
1110; CHECK-NEXT: ret
12-
1311 %mask = insertelement <8 x i16 > poison, i16 255 , i32 0
1412 %mask.splat = shufflevector <8 x i16 > %mask , <8 x i16 > poison, <8 x i32 > zeroinitializer
1513 %ta16 = and <8 x i16 > %a , %mask.splat
@@ -20,80 +18,67 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
2018 ret <8 x i8 > %res
2119}
2220
23-
2421define <8 x i8 > @test_avgceil_s (<8 x i16 > %a , <8 x i16 > %b ) {
2522; CHECK-LABEL: test_avgceil_s:
2623; CHECK: // %bb.0:
2724; CHECK-NEXT: sqxtn v0.8b, v0.8h
2825; CHECK-NEXT: sqxtn v1.8b, v1.8h
2926; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
3027; CHECK-NEXT: ret
31-
32- %smin = insertelement <8 x i16 > poison, i16 -128 , i32 0
33- %smax = insertelement <8 x i16 > poison, i16 127 , i32 0
34- %min = shufflevector <8 x i16 > %smin , <8 x i16 > poison, <8 x i32 > zeroinitializer
35- %max = shufflevector <8 x i16 > %smax , <8 x i16 > poison, <8 x i32 > zeroinitializer
36-
37- %ta16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %a , <8 x i16 > %max )
38- %ta16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %ta16 , <8 x i16 > %min )
39- %tb16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %b , <8 x i16 > %max )
40- %tb16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %tb16 , <8 x i16 > %min )
41-
28+ %min = insertelement <8 x i16 > poison, i16 -128 , i32 0
29+ %min.splat = shufflevector <8 x i16 > %min , <8 x i16 > poison, <8 x i32 > zeroinitializer
30+ %max = insertelement <8 x i16 > poison, i16 127 , i32 0
31+ %max.splat = shufflevector <8 x i16 > %max , <8 x i16 > poison, <8 x i32 > zeroinitializer
32+ %ta16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %a , <8 x i16 > %max.splat )
33+ %ta16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %ta16 , <8 x i16 > %min.splat )
34+ %tb16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %b , <8 x i16 > %max.splat )
35+ %tb16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %tb16 , <8 x i16 > %min.splat )
4236 %ta8 = trunc <8 x i16 > %ta16.clamped to <8 x i8 >
4337 %tb8 = trunc <8 x i16 > %tb16.clamped to <8 x i8 >
4438 %res = call <8 x i8 > @llvm.aarch64.neon.shadd.v8i8 (<8 x i8 > %ta8 , <8 x i8 > %tb8 )
4539 ret <8 x i8 > %res
4640}
4741
48-
4942define <8 x i8 > @test_avgfloor_u (<8 x i16 > %a , <8 x i16 > %b ) {
5043; CHECK-LABEL: test_avgfloor_u:
5144; CHECK: // %bb.0:
5245; CHECK-NEXT: xtn v0.8b, v0.8h
5346; CHECK-NEXT: xtn v1.8b, v1.8h
54- ; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
47+ ; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
5548; CHECK-NEXT: ret
56-
57- %mask = insertelement <8 x i16 > undef , i16 255 , i32 0
58- %mask.splat = shufflevector <8 x i16 > %mask , <8 x i16 > undef , <8 x i32 > zeroinitializer
49+ %mask = insertelement <8 x i16 > poison, i16 255 , i32 0
50+ %mask.splat = shufflevector <8 x i16 > %mask , <8 x i16 > poison, <8 x i32 > zeroinitializer
5951 %ta16 = and <8 x i16 > %a , %mask.splat
6052 %tb16 = and <8 x i16 > %b , %mask.splat
6153 %ta8 = trunc <8 x i16 > %ta16 to <8 x i8 >
6254 %tb8 = trunc <8 x i16 > %tb16 to <8 x i8 >
63- %res = call <8 x i8 > @llvm.aarch64.neon.urhadd .v8i8 (<8 x i8 > %ta8 , <8 x i8 > %tb8 )
55+ %res = call <8 x i8 > @llvm.aarch64.neon.uhadd .v8i8 (<8 x i8 > %ta8 , <8 x i8 > %tb8 )
6456 ret <8 x i8 > %res
6557}
6658
67-
6859define <8 x i8 > @test_avgfloor_s (<8 x i16 > %a , <8 x i16 > %b ) {
6960; CHECK-LABEL: test_avgfloor_s:
7061; CHECK: // %bb.0:
7162; CHECK-NEXT: sqxtn v0.8b, v0.8h
7263; CHECK-NEXT: sqxtn v1.8b, v1.8h
73- ; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
64+ ; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
7465; CHECK-NEXT: ret
75-
76- %smin = insertelement <8 x i16 > poison, i16 -128 , i32 0
77- %smax = insertelement <8 x i16 > poison, i16 127 , i32 0
78- %min = shufflevector <8 x i16 > %smin , <8 x i16 > poison, <8 x i32 > zeroinitializer
79- %max = shufflevector <8 x i16 > %smax , <8 x i16 > poison, <8 x i32 > zeroinitializer
80-
81- %ta16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %a , <8 x i16 > %max )
82- %ta16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %ta16 , <8 x i16 > %min )
83- %tb16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %b , <8 x i16 > %max )
84- %tb16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %tb16 , <8 x i16 > %min )
85-
66+ %min = insertelement <8 x i16 > poison, i16 -128 , i32 0
67+ %min.splat = shufflevector <8 x i16 > %min , <8 x i16 > poison, <8 x i32 > zeroinitializer
68+ %max = insertelement <8 x i16 > poison, i16 127 , i32 0
69+ %max.splat = shufflevector <8 x i16 > %max , <8 x i16 > poison, <8 x i32 > zeroinitializer
70+ %ta16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %a , <8 x i16 > %max.splat )
71+ %ta16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %ta16 , <8 x i16 > %min.splat )
72+ %tb16 = call <8 x i16 > @llvm.smin.v8i16 (<8 x i16 > %b , <8 x i16 > %max.splat )
73+ %tb16.clamped = call <8 x i16 > @llvm.smax.v8i16 (<8 x i16 > %tb16 , <8 x i16 > %min.splat )
8674 %ta8 = trunc <8 x i16 > %ta16.clamped to <8 x i8 >
8775 %tb8 = trunc <8 x i16 > %tb16.clamped to <8 x i8 >
88- %res = call <8 x i8 > @llvm.aarch64.neon.srhadd .v8i8 (<8 x i8 > %ta8 , <8 x i8 > %tb8 )
76+ %res = call <8 x i8 > @llvm.aarch64.neon.shadd .v8i8 (<8 x i8 > %ta8 , <8 x i8 > %tb8 )
8977 ret <8 x i8 > %res
9078}
9179
9280declare <8 x i8 > @llvm.aarch64.neon.uhadd.v8i8 (<8 x i8 >, <8 x i8 >)
9381declare <8 x i8 > @llvm.aarch64.neon.shadd.v8i8 (<8 x i8 >, <8 x i8 >)
94- declare <8 x i8 > @llvm.aarch64.neon.urhadd.v8i8 (<8 x i8 >, <8 x i8 >)
95- declare <8 x i8 > @llvm.aarch64.neon.srhadd.v8i8 (<8 x i8 >, <8 x i8 >)
96-
9782declare <8 x i16 > @llvm.smin.v8i16 (<8 x i16 >, <8 x i16 >)
9883declare <8 x i16 > @llvm.smax.v8i16 (<8 x i16 >, <8 x i16 >)
9984
0 commit comments