Skip to content

Commit ead9659

Browse files
committed
[SelectionDAG] Add DoNotPoisonEltMask to SimplifyDemandedVectorEltsForTargetNode
Add DoNotPoisonEltMask to SimplifyDemandedVectorEltsForTargetNode and try to handle it for a number of X86 opcodes. In some situations we just fallback and assume that the DoNotPoisonEltMask elements are demanded. Goal is to reduce amount of regressions after fix of #138513.
1 parent b327962 commit ead9659

File tree

79 files changed

+2538
-3054
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+2538
-3054
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4356,8 +4356,9 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
43564356
/// (used to simplify the caller). The KnownUndef/Zero elements may only be
43574357
/// accurate for those bits in the DemandedMask.
43584358
virtual bool SimplifyDemandedVectorEltsForTargetNode(
4359-
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
4360-
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
4359+
SDValue Op, const APInt &DemandedElts, const APInt &DoNotPoisonEltMask,
4360+
APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO,
4361+
unsigned Depth = 0) const;
43614362

43624363
/// Attempt to simplify any target nodes based on the demanded bits/elts,
43634364
/// returning true on success. Otherwise, analyze the

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3912,7 +3912,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
39123912
default: {
39133913
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
39143914
if (SimplifyDemandedVectorEltsForTargetNode(
3915-
Op, DemandedElts | DoNotPoisonEltMask, KnownUndef, KnownZero, TLO,
3915+
Op, DemandedElts, DoNotPoisonEltMask, KnownUndef, KnownZero, TLO,
39163916
Depth))
39173917
return true;
39183918
} else {
@@ -4000,8 +4000,9 @@ unsigned TargetLowering::computeNumSignBitsForTargetInstr(
40004000
}
40014001

40024002
bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4003-
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
4004-
TargetLoweringOpt &TLO, unsigned Depth) const {
4003+
SDValue Op, const APInt &DemandedElts, const APInt &DoNotPoisonEltMask,
4004+
APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO,
4005+
unsigned Depth) const {
40054006
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
40064007
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
40074008
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 100 additions & 76 deletions
Large diffs are not rendered by default.

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,18 +1307,14 @@ namespace llvm {
13071307
const SelectionDAG &DAG,
13081308
unsigned Depth) const override;
13091309

1310-
bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1311-
const APInt &DemandedElts,
1312-
APInt &KnownUndef,
1313-
APInt &KnownZero,
1314-
TargetLoweringOpt &TLO,
1315-
unsigned Depth) const override;
1316-
1317-
bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1318-
const APInt &DemandedElts,
1319-
unsigned MaskIndex,
1320-
TargetLoweringOpt &TLO,
1321-
unsigned Depth) const;
1310+
bool SimplifyDemandedVectorEltsForTargetNode(
1311+
SDValue Op, const APInt &DemandedElts, const APInt &DoNotPoisonElts,
1312+
APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO,
1313+
unsigned Depth) const override;
1314+
1315+
bool SimplifyDemandedVectorEltsForTargetShuffle(
1316+
SDValue Op, const APInt &DemandedElts, const APInt &DoNotPoisonElts,
1317+
unsigned MaskIndex, TargetLoweringOpt &TLO, unsigned Depth) const;
13221318

13231319
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
13241320
const APInt &DemandedBits,

llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6625,7 +6625,7 @@ define i64 @test_mm512_reduce_mul_epi64(<8 x i64> %__W) {
66256625
; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
66266626
; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
66276627
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
6628-
; X64-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
6628+
; X64-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
66296629
; X64-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
66306630
; X64-NEXT: vpsrlq $32, %xmm0, %xmm3
66316631
; X64-NEXT: vpmuludq %xmm3, %xmm1, %xmm3
@@ -6833,7 +6833,7 @@ define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) {
68336833
; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
68346834
; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
68356835
; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
6836-
; X64-NEXT: vpsrldq {{.*#+}} xmm2 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
6836+
; X64-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
68376837
; X64-NEXT: vpmuludq %xmm0, %xmm2, %xmm2
68386838
; X64-NEXT: vpsrlq $32, %xmm0, %xmm3
68396839
; X64-NEXT: vpmuludq %xmm3, %xmm1, %xmm3

llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
195195
; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
196196
; SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
197197
; SSSE3-NEXT: pand %xmm0, %xmm2
198-
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
198+
; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
199199
; SSSE3-NEXT: movmskpd %xmm2, %eax
200200
; SSSE3-NEXT: # kill: def $al killed $al killed $eax
201201
; SSSE3-NEXT: retq
@@ -342,25 +342,21 @@ define i2 @v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
342342
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm1
343343
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0
344344
; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm5
345-
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
346-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
347-
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
348-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
349-
; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0
350-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
345+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm5
346+
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
347+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
348+
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
351349
; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
352350
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3
353351
; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm2
354352
; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0
355-
; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
356-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
357-
; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm2
358-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
359-
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
360-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
361-
; SSE2-SSSE3-NEXT: por %xmm2, %xmm0
362-
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
363-
; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
353+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm0
354+
; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
355+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
356+
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm3
357+
; SSE2-SSSE3-NEXT: por %xmm2, %xmm3
358+
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm3
359+
; SSE2-SSSE3-NEXT: movmskpd %xmm3, %eax
364360
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
365361
; SSE2-SSSE3-NEXT: retq
366362
;

llvm/test/CodeGen/X86/bitcast-setcc-128.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
157157
; SSSE3-LABEL: v2i8:
158158
; SSSE3: # %bb.0:
159159
; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
160-
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
160+
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
161161
; SSSE3-NEXT: movmskpd %xmm0, %eax
162162
; SSSE3-NEXT: # kill: def $al killed $al killed $eax
163163
; SSSE3-NEXT: retq
@@ -272,12 +272,10 @@ define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
272272
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
273273
; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
274274
; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
275-
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
276-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
277-
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
278-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
279-
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
280-
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
275+
; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
276+
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
277+
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
278+
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
281279
; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
282280
; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
283281
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax

llvm/test/CodeGen/X86/bitcast-vector-bool.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,7 @@ define i1 @trunc_v32i8_cmp(<32 x i8> %a0) nounwind {
634634
define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
635635
; SSE-LABEL: bitcast_v8i64_to_v2i4:
636636
; SSE: # %bb.0:
637-
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
638-
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
639637
; SSE-NEXT: packssdw %xmm3, %xmm2
640-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
641-
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
642638
; SSE-NEXT: packssdw %xmm1, %xmm0
643639
; SSE-NEXT: packssdw %xmm2, %xmm0
644640
; SSE-NEXT: packsswb %xmm0, %xmm0

llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -264,18 +264,16 @@ define i64 @dot_ext_v2i8_v2i64(ptr %a, i64 %a_stride, ptr %b) nounwind {
264264
; SSE2-NEXT: movd %eax, %xmm1
265265
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
266266
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
267-
; SSE2-NEXT: pxor %xmm2, %xmm2
268-
; SSE2-NEXT: pxor %xmm3, %xmm3
269-
; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
270267
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
268+
; SSE2-NEXT: pxor %xmm2, %xmm2
269+
; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
271270
; SSE2-NEXT: psrad $24, %xmm1
272271
; SSE2-NEXT: pmuludq %xmm0, %xmm1
273-
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
274-
; SSE2-NEXT: pmuludq %xmm0, %xmm3
275-
; SSE2-NEXT: psllq $32, %xmm3
276-
; SSE2-NEXT: paddq %xmm1, %xmm3
277-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
278-
; SSE2-NEXT: paddq %xmm3, %xmm0
272+
; SSE2-NEXT: pmuludq %xmm0, %xmm2
273+
; SSE2-NEXT: psllq $32, %xmm2
274+
; SSE2-NEXT: paddq %xmm1, %xmm2
275+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
276+
; SSE2-NEXT: paddq %xmm2, %xmm0
279277
; SSE2-NEXT: movq %xmm0, %rax
280278
; SSE2-NEXT: retq
281279
;
@@ -460,18 +458,16 @@ define i64 @dot_ext_v2i32_v2i64(ptr %a, i64 %a_stride, ptr %b) nounwind {
460458
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
461459
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
462460
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
463-
; SSE2-NEXT: pxor %xmm2, %xmm2
464-
; SSE2-NEXT: pxor %xmm3, %xmm3
465-
; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
466461
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
467-
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
468-
; SSE2-NEXT: pmuludq %xmm1, %xmm3
469-
; SSE2-NEXT: psllq $32, %xmm3
470-
; SSE2-NEXT: pmuludq %xmm1, %xmm0
471-
; SSE2-NEXT: paddq %xmm3, %xmm0
472-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
473-
; SSE2-NEXT: paddq %xmm0, %xmm1
474-
; SSE2-NEXT: movq %xmm1, %rax
462+
; SSE2-NEXT: pxor %xmm2, %xmm2
463+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
464+
; SSE2-NEXT: pmuludq %xmm1, %xmm2
465+
; SSE2-NEXT: psllq $32, %xmm2
466+
; SSE2-NEXT: pmuludq %xmm0, %xmm1
467+
; SSE2-NEXT: paddq %xmm2, %xmm1
468+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
469+
; SSE2-NEXT: paddq %xmm1, %xmm0
470+
; SSE2-NEXT: movq %xmm0, %rax
475471
; SSE2-NEXT: retq
476472
;
477473
; SSE4-LABEL: dot_ext_v2i32_v2i64:

llvm/test/CodeGen/X86/combine-multiplies.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ define void @testCombineMultiplies_non_splat(<4 x i32> %v1) nounwind {
144144
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
145145
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [22,33,44,55]
146146
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
147-
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [33,33,55,55]
147+
; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [33,u,55,u]
148148
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
149149
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
150150
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [242,726,1452,2420]

0 commit comments

Comments
 (0)