Skip to content

Commit 5702dad

Browse files
committed
[DAG] Enable ISD::INSERT_SUBVECTOR SimplifyMultipleUseDemandedBits handling
This allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits to create a simpler ISD::INSERT_SUBVECTOR, which is particularly useful for cases where we're splitting into subvectors anyhow.
1 parent 5df1ac7 commit 5702dad

File tree

4 files changed

+49
-36
lines changed

4 files changed

+49
-36
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,22 @@ bool TargetLowering::SimplifyDemandedBits(
947947
Known.One &= KnownBase.One;
948948
Known.Zero &= KnownBase.Zero;
949949
}
950+
951+
// Attempt to avoid multi-use src if we don't need anything from it.
952+
if (!DemandedBits.isAllOnesValue() || !SubElts.isAllOnesValue() ||
953+
!BaseElts.isAllOnesValue()) {
954+
SDValue NewSub = SimplifyMultipleUseDemandedBits(
955+
Sub, DemandedBits, SubElts, TLO.DAG, Depth + 1);
956+
SDValue NewBase = SimplifyMultipleUseDemandedBits(
957+
Base, DemandedBits, BaseElts, TLO.DAG, Depth + 1);
958+
if (NewSub || NewBase) {
959+
NewSub = NewSub ? NewSub : Sub;
960+
NewBase = NewBase ? NewBase : Base;
961+
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewBase, NewSub,
962+
Op.getOperand(2));
963+
return TLO.CombineTo(Op, NewOp);
964+
}
965+
}
950966
break;
951967
}
952968
case ISD::EXTRACT_SUBVECTOR: {

llvm/test/CodeGen/X86/horizontal-reduce-smin.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,14 +1118,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
11181118
;
11191119
; X86-AVX1-LABEL: test_reduce_v8i64:
11201120
; X86-AVX1: ## %bb.0:
1121-
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1122-
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1123-
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
1124-
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5
1125-
; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0
1126-
; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
11271121
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1122+
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1123+
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1124+
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
1125+
; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
11281126
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1127+
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1
1128+
; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
11291129
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
11301130
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
11311131
; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
@@ -1236,14 +1236,14 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
12361236
;
12371237
; X64-AVX1-LABEL: test_reduce_v8i64:
12381238
; X64-AVX1: ## %bb.0:
1239-
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1240-
; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1241-
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
1242-
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5
1243-
; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0
1244-
; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
12451239
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1240+
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1241+
; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1242+
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
1243+
; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
12461244
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1245+
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1
1246+
; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
12471247
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
12481248
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
12491249
; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0

llvm/test/CodeGen/X86/insertelement-ones.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
425425
; AVX1-NEXT: movl $255, %eax
426426
; AVX1-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
427427
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
428-
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
429428
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
430429
; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
431430
; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -437,7 +436,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
437436
; AVX2-NEXT: movl $255, %eax
438437
; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
439438
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
440-
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
441439
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
442440
; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
443441
; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -449,7 +447,6 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
449447
; AVX512-NEXT: movl $255, %eax
450448
; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
451449
; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
452-
; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
453450
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
454451
; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
455452
; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0

llvm/test/CodeGen/X86/vector-reduce-smin.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -309,14 +309,14 @@ define i64 @test_v8i64(<8 x i64> %a0) {
309309
;
310310
; AVX1-LABEL: test_v8i64:
311311
; AVX1: # %bb.0:
312-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
313-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
314-
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4
315-
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5
316-
; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0
317-
; AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1
318312
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
313+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
314+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
315+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
316+
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
319317
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
318+
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1
319+
; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
320320
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
321321
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
322322
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
@@ -589,24 +589,24 @@ define i64 @test_v16i64(<16 x i64> %a0) {
589589
;
590590
; AVX1-LABEL: test_v16i64:
591591
; AVX1: # %bb.0:
592-
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm8
593-
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm9
594592
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm11
595-
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7
596-
; AVX1-NEXT: vpcmpgtq %xmm11, %xmm7, %xmm10
597-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
598-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
599-
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm4, %xmm6
600-
; AVX1-NEXT: vblendvpd %xmm6, %xmm5, %xmm4, %xmm4
601-
; AVX1-NEXT: vblendvpd %xmm10, %xmm11, %xmm7, %xmm5
602-
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm6
603-
; AVX1-NEXT: vblendvpd %xmm9, %xmm0, %xmm2, %xmm0
604-
; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm3, %xmm1
605-
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
606-
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
607-
; AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm1
593+
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
594+
; AVX1-NEXT: vpcmpgtq %xmm11, %xmm5, %xmm8
595+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
596+
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
597+
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm9
598+
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm10
599+
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4
600+
; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm2, %xmm0
601+
; AVX1-NEXT: vblendvpd %xmm10, %xmm1, %xmm3, %xmm1
608602
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
603+
; AVX1-NEXT: vblendvpd %xmm9, %xmm7, %xmm6, %xmm3
604+
; AVX1-NEXT: vblendvpd %xmm8, %xmm11, %xmm5, %xmm4
605+
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5
606+
; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3
609607
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
608+
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm1
609+
; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
610610
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
611611
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
612612
; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0

0 commit comments

Comments
 (0)