-
Notifications
You must be signed in to change notification settings - Fork 15.3k
release/20.x: [X86] Ignore NSW when DstSVT is i32 (#131755) #147034
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@RKSimon What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-backend-x86 Author: None (llvmbot) ChangesBackport 3d63191 Requested by: @nikic Full diff: https://github.com/llvm/llvm-project/pull/147034.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4413fbb77f415..12c40b501f627 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20889,7 +20889,8 @@ static SDValue matchTruncateWithPACK(unsigned &PackOpcode, EVT DstVT,
return SDValue();
unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;
- if (Flags.hasNoSignedWrap() || MinSignBits < NumSignBits) {
+ if ((Flags.hasNoSignedWrap() && DstSVT != MVT::i32) ||
+ MinSignBits < NumSignBits) {
PackOpcode = X86ISD::PACKSS;
return In;
}
diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
index 2b8eedfbbdc9c..85cca4f6f9a57 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
@@ -1592,3 +1592,89 @@ entry:
%1 = bitcast <8 x i8> %0 to i64
ret i64 %1
}
+
+define void @foo(<4 x i64> %a, <4 x i64> %b, ptr %p) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
+; SSE-LABEL: foo:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT: movaps %xmm2, 16(%rdi)
+; SSE-NEXT: movaps %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: foo:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: foo:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX2-SLOW-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX2-SLOW-NEXT: vmovaps %xmm1, 16(%rdi)
+; AVX2-SLOW-NEXT: vmovaps %xmm0, (%rdi)
+; AVX2-SLOW-NEXT: vzeroupper
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-ALL-LABEL: foo:
+; AVX2-FAST-ALL: # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT: vmovaps %xmm1, 16(%rdi)
+; AVX2-FAST-ALL-NEXT: vmovaps %xmm0, (%rdi)
+; AVX2-FAST-ALL-NEXT: vzeroupper
+; AVX2-FAST-ALL-NEXT: retq
+;
+; AVX2-FAST-PERLANE-LABEL: foo:
+; AVX2-FAST-PERLANE: # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm1, 16(%rdi)
+; AVX2-FAST-PERLANE-NEXT: vmovaps %xmm0, (%rdi)
+; AVX2-FAST-PERLANE-NEXT: vzeroupper
+; AVX2-FAST-PERLANE-NEXT: retq
+;
+; AVX512F-LABEL: foo:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpmovqd %zmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: foo:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovqd %ymm1, 16(%rdi)
+; AVX512VL-NEXT: vpmovqd %ymm0, (%rdi)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: foo:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovqd %zmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: foo:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovqd %ymm1, 16(%rdi)
+; AVX512BWVL-NEXT: vpmovqd %ymm0, (%rdi)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = trunc nsw <8 x i64> %0 to <8 x i32>
+ store <8 x i32> %1, ptr %p, align 16
+ ret void
+}
|
We don't have PACKSS for i64->i32. Fixes: https://godbolt.org/z/qb8nxnPbK, which was introduced by ddd2f57 (cherry picked from commit 3d63191)
|
@nikic (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport 3d63191
Requested by: @nikic