Skip to content

Commit 44609a3

Browse files
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have sufficient leading zero/sign bits-7
1 parent 728b37d commit 44609a3

File tree

1 file changed

+12
-24
lines changed

1 file changed

+12
-24
lines changed

llvm/test/CodeGen/AArch64/trunc-avg-fold.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ define <8 x i8> @test_avgceil_u(<8 x i16> %a, <8 x i16> %b) {
99
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
1010
; CHECK-NEXT: xtn v0.8b, v0.8h
1111
; CHECK-NEXT: ret
12-
%mask = insertelement <8 x i16> poison, i16 255, i32 0
13-
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
14-
%ta16 = and <8 x i16> %a, %mask.splat
15-
%tb16 = and <8 x i16> %b, %mask.splat
12+
%ta16 = and <8 x i16> %a, splat (i16 255)
13+
%tb16 = and <8 x i16> %b, splat (i16 255)
1614
%avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
1715
%res = trunc <8 x i16> %avg16 to <8 x i8>
1816
ret <8 x i8> %res
@@ -30,14 +28,10 @@ define <8 x i8> @test_avgceil_s(<8 x i16> %a, <8 x i16> %b) {
3028
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
3129
; CHECK-NEXT: xtn v0.8b, v0.8h
3230
; CHECK-NEXT: ret
33-
%min = insertelement <8 x i16> poison, i16 -128, i32 0
34-
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
35-
%max = insertelement <8 x i16> poison, i16 127, i32 0
36-
%max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
37-
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
38-
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
39-
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
40-
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
31+
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
32+
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
33+
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
34+
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
4135
%avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
4236
%res = trunc <8 x i16> %avg16 to <8 x i8>
4337
ret <8 x i8> %res
@@ -51,10 +45,8 @@ define <8 x i8> @test_avgfloor_u(<8 x i16> %a, <8 x i16> %b) {
5145
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
5246
; CHECK-NEXT: xtn v0.8b, v0.8h
5347
; CHECK-NEXT: ret
54-
%mask = insertelement <8 x i16> poison, i16 255, i32 0
55-
%mask.splat = shufflevector <8 x i16> %mask, <8 x i16> poison, <8 x i32> zeroinitializer
56-
%ta16 = and <8 x i16> %a, %mask.splat
57-
%tb16 = and <8 x i16> %b, %mask.splat
48+
%ta16 = and <8 x i16> %a, splat (i16 255)
49+
%tb16 = and <8 x i16> %b, splat (i16 255)
5850
%avg16 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %ta16, <8 x i16> %tb16)
5951
%res = trunc <8 x i16> %avg16 to <8 x i8>
6052
ret <8 x i8> %res
@@ -72,14 +64,10 @@ define <8 x i8> @test_avgfloor_s(<8 x i16> %a, <8 x i16> %b) {
7264
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
7365
; CHECK-NEXT: xtn v0.8b, v0.8h
7466
; CHECK-NEXT: ret
75-
%min = insertelement <8 x i16> poison, i16 -128, i32 0
76-
%min.splat = shufflevector <8 x i16> %min, <8 x i16> poison, <8 x i32> zeroinitializer
77-
%max = insertelement <8 x i16> poison, i16 127, i32 0
78-
%max.splat = shufflevector <8 x i16> %max, <8 x i16> poison, <8 x i32> zeroinitializer
79-
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %max.splat)
80-
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> %min.splat)
81-
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> %max.splat)
82-
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> %min.splat)
67+
%ta16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> splat (i16 127))
68+
%ta16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %ta16, <8 x i16> splat (i16 -128))
69+
%tb16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %b, <8 x i16> splat (i16 127))
70+
%tb16.clamped = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %tb16, <8 x i16> splat (i16 -128))
8371
%avg16 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %ta16.clamped, <8 x i16> %tb16.clamped)
8472
%res = trunc <8 x i16> %avg16 to <8 x i8>
8573
ret <8 x i8> %res

0 commit comments

Comments
 (0)