Skip to content

Commit 2e5a5fd

Browse files
authored
[X86] Add test showing failure to remove sign splats from PACKSS intrinsics (llvm#161518)
PACKSS intrinsic calls are only expanded to X86ISD::PACKSS nodes during legalisation, after which time we fail to handle cases where ASHR sign splats (now lowered to X86ISD::VSRAI) are unnecessary. Add additional example of FREEZE(PACKSS()) as that's an issue as well.
1 parent 8df0575 commit 2e5a5fd

File tree

1 file changed

+55
-0
lines changed

1 file changed

+55
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
5+
6+
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
7+
8+
; TODO: Failure to remove unnecessary signsplat
9+
define <8 x i16> @combine_packss_v4i32_signsplat(<4 x i32> %a0, <4 x i32> %a1) {
10+
; SSE-LABEL: combine_packss_v4i32_signsplat:
11+
; SSE: # %bb.0:
12+
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
13+
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
14+
; SSE-NEXT: packssdw %xmm1, %xmm0
15+
; SSE-NEXT: psraw $15, %xmm0
16+
; SSE-NEXT: retq
17+
;
18+
; AVX-LABEL: combine_packss_v4i32_signsplat:
19+
; AVX: # %bb.0:
20+
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
21+
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
22+
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
23+
; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
24+
; AVX-NEXT: retq
25+
%cmp = icmp sgt <4 x i32> %a0, %a1
26+
%ext = sext <4 x i1> %cmp to <4 x i32>
27+
%pack = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %ext, <4 x i32> splat (i32 -1))
28+
%signsplat = ashr <8 x i16> %pack, splat (i16 15)
29+
ret <8 x i16> %signsplat
30+
}
31+
32+
; TODO: Failure to remove unnecessary signsplat through freeze
33+
define <8 x i16> @combine_packss_v4i32_freeze_signsplat(<4 x i32> %a0, <4 x i32> %a1) {
34+
; SSE-LABEL: combine_packss_v4i32_freeze_signsplat:
35+
; SSE: # %bb.0:
36+
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
37+
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
38+
; SSE-NEXT: packssdw %xmm1, %xmm0
39+
; SSE-NEXT: psraw $15, %xmm0
40+
; SSE-NEXT: retq
41+
;
42+
; AVX-LABEL: combine_packss_v4i32_freeze_signsplat:
43+
; AVX: # %bb.0:
44+
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
45+
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
46+
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
47+
; AVX-NEXT: vpsraw $15, %xmm0, %xmm0
48+
; AVX-NEXT: retq
49+
%cmp = icmp sgt <4 x i32> %a0, %a1
50+
%ext = sext <4 x i1> %cmp to <4 x i32>
51+
%pack = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %ext, <4 x i32> splat (i32 -1))
52+
%freeze = freeze <8 x i16> %pack
53+
%signsplat = ashr <8 x i16> %freeze, splat (i16 15)
54+
ret <8 x i16> %signsplat
55+
}

0 commit comments

Comments
 (0)