Skip to content

Commit 1115256

Browse files
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have sufficient leading zero/sign bits-4
1 parent c8cc2a9 commit 1115256

File tree

1 file changed

+22
-37
lines changed

1 file changed

+22
-37
lines changed
Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=aarch64-- -O2 -mattr=+neon < %s | FileCheck %s
33

4-
54
define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
65
; CHECK-LABEL: test_avgceil_u:
76
; CHECK: // %bb.0:
87
; CHECK-NEXT: xtn v0.8b, v0.8h
98
; CHECK-NEXT: xtn v1.8b, v1.8h
109
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
1110
; CHECK-NEXT: ret
12-
1311
%mask = insertelement <8 x i16> poison, i16 255, i32 0
1412
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
1513
%ta16 = and <8 x i16> %a, %mask.splat
@@ -20,80 +18,67 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
2018
ret <8 x i8> %res
2119
}
2220

23-
2421
define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
2522
; CHECK-LABEL: test_avgceil_s:
2623
; CHECK: // %bb.0:
2724
; CHECK-NEXT: sqxtn v0.8b, v0.8h
2825
; CHECK-NEXT: sqxtn v1.8b, v1.8h
2926
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
3027
; CHECK-NEXT: ret
31-
32-
%smin = insertelement <8 x i16> poison, i16 -128, i32 0
33-
%smax = insertelement <8 x i16> poison, i16 127, i32 0
34-
%min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
35-
%max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
36-
37-
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
38-
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
39-
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
40-
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
41-
28+
%min = insertelement <8 x i16> poison, i16 -128, i32 0
29+
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
30+
%max = insertelement <8 x i16> poison, i16 127, i32 0
31+
%max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
32+
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
33+
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
34+
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
35+
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
4236
%ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
4337
%tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
4438
%res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
4539
ret <8 x i8> %res
4640
}
4741

48-
4942
define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
5043
; CHECK-LABEL: test_avgfloor_u:
5144
; CHECK: // %bb.0:
5245
; CHECK-NEXT: xtn v0.8b, v0.8h
5346
; CHECK-NEXT: xtn v1.8b, v1.8h
54-
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
47+
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
5548
; CHECK-NEXT: ret
56-
57-
%mask = insertelement <8 x i16> undef, i16 255, i32 0
58-
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> undef, <8 x i32> zeroinitializer
49+
%mask = insertelement <8 x i16> poison, i16 255, i32 0
50+
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
5951
%ta16 = and <8 x i16> %a, %mask.splat
6052
%tb16 = and <8 x i16> %b, %mask.splat
6153
%ta8 = trunc <8 x i16> %ta16 to <8 x i8>
6254
%tb8 = trunc <8 x i16> %tb16 to <8 x i8>
63-
%res = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
55+
%res = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
6456
ret <8 x i8> %res
6557
}
6658

67-
6859
define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
6960
; CHECK-LABEL: test_avgfloor_s:
7061
; CHECK: // %bb.0:
7162
; CHECK-NEXT: sqxtn v0.8b, v0.8h
7263
; CHECK-NEXT: sqxtn v1.8b, v1.8h
73-
; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
64+
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
7465
; CHECK-NEXT: ret
75-
76-
%smin = insertelement <8 x i16> poison, i16 -128, i32 0
77-
%smax = insertelement <8 x i16> poison, i16 127, i32 0
78-
%min = shufflevector <8 x i16> %smin, <8 x i16> poison, <8 x i32> zeroinitializer
79-
%max = shufflevector <8 x i16> %smax, <8 x i16> poison, <8 x i32> zeroinitializer
80-
81-
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max)
82-
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min)
83-
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max)
84-
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min)
85-
66+
%min = insertelement <8 x i16> poison, i16 -128, i32 0
67+
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
68+
%max = insertelement <8 x i16> poison, i16 127, i32 0
69+
%max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
70+
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
71+
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
72+
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
73+
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
8674
%ta8 = trunc <8 x i16> %ta16.clamped to <8 x i8>
8775
%tb8 = trunc <8 x i16> %tb16.clamped to <8 x i8>
88-
%res = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
76+
%res = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %ta8, <8 x i8> %tb8)
8977
ret <8 x i8> %res
9078
}
9179

9280
declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
9381
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
94-
declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
95-
declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
96-
9782
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
9883
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
9984

0 commit comments

Comments
 (0)