Skip to content

Commit 9c2bbfe

Browse files
authored
[X86] X86ISelDAGToDAG - don't let ADD/SUB(X,1) -> SUB/ADD(X,-1) constant fold (#168726)
This late into lowering we don't have a good way to handle constant build_vector lowering Fixes #168594
1 parent f3d8a5c commit 9c2bbfe

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10041004
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
10051005
N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
10061006
APInt SplatVal;
1007-
if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
1007+
if (!ISD::isBuildVectorOfConstantSDNodes(N->getOperand(0).getNode()) &&
1008+
X86::isConstantSplat(N->getOperand(1), SplatVal) &&
10081009
SplatVal.isOne()) {
10091010
SDLoc DL(N);
10101011

llvm/test/CodeGen/X86/pr168594.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
5+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
6+
7+
define <8 x i16> @PR168594() {
8+
; SSE-LABEL: PR168594:
9+
; SSE: # %bb.0:
10+
; SSE-NEXT: pxor %xmm0, %xmm0
11+
; SSE-NEXT: psubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12+
; SSE-NEXT: retq
13+
;
14+
; AVX-LABEL: PR168594:
15+
; AVX: # %bb.0:
16+
; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
17+
; AVX-NEXT: vpsubw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18+
; AVX-NEXT: retq
19+
%call = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> splat (i16 1), <8 x i16> zeroinitializer)
20+
%sub = sub <8 x i16> zeroinitializer, %call
21+
ret <8 x i16> %sub
22+
}

0 commit comments

Comments
 (0)