Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10009,6 +10009,14 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
case X86ISD::VBROADCAST_LOAD:
// TODO: Handle MaskSize != VT.getVectorNumElements()?
return (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize);
case X86ISD::SUBV_BROADCAST_LOAD:
// TODO: Handle MaskSize != VT.getVectorNumElements()?
if (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize) {
auto *MemOp = cast<MemSDNode>(Op);
unsigned NumMemElts = MemOp->getMemoryVT().getVectorNumElements();
return (Idx % NumMemElts) == (ExpectedIdx % NumMemElts);
}
break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
Expand Down Expand Up @@ -3381,15 +3380,13 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
;
; AVX2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd (%rdi), %xmm0
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
;
; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
Expand Down Expand Up @@ -3723,53 +3722,49 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
;
; AVX2-SLOW-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %xmm0
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm0
; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %ymm1
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5],ymm0[6],ymm2[7]
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX2-FAST-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm1, %ymm1
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa %ymm0, 32(%rdx)
; AVX2-FAST-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
Expand Down Expand Up @@ -5317,40 +5312,17 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX2-SLOW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX2-SLOW: # %bb.0:
; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX2-FAST-PERLANE: # %bb.0:
; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-FAST-PERLANE-NEXT: vzeroupper
; AVX2-FAST-PERLANE-NEXT: retq
;
; AVX2-FAST-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
; AVX2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; AVX512F: # %bb.0:
Expand Down