Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13184,22 +13184,22 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,

// select Cond, -1, x → or Cond, x
if (IsTAllOne) {
SDValue X = DAG.getBitcast(CondVT, FVal);
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, Or);
}

// select Cond, x, 0 → and Cond, x
if (IsFAllZero) {
SDValue X = DAG.getBitcast(CondVT, TVal);
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, And);
}

// select Cond, 0, x -> and not(Cond), x
if (IsTAllZero &&
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
SDValue X = DAG.getBitcast(CondVT, FVal);
SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue And =
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
return DAG.getBitcast(VT, And);
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9132,11 +9132,17 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue SrcVec, IndicesVec;

auto PeekThroughFreeze = [](SDValue N) {
if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
return N->getOperand(0);
return N;
};
// Check for a match of the permute source vector and permute index elements.
// This is done by checking that the i-th build_vector operand is of the form:
// (extract_elt SrcVec, (extract_elt IndicesVec, i)).
for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
SDValue Op = V.getOperand(Idx);
SDValue Op = PeekThroughFreeze(V.getOperand(Idx));
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();

Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/vselect-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,12 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i1
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.8h v0, #128, lsl #8
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: ushll2.4s v2, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: cmhi.4s v3, v0, v1
; CHECK-NEXT: cmhi.4s v1, v2, v1
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ushll.4s v2, v0, #0
; CHECK-NEXT: ushll2.4s v0, v0, #0
; CHECK-NEXT: cmhi.4s v3, v2, v1
; CHECK-NEXT: cmhi.4s v1, v0, v1
; CHECK-NEXT: and.16b v1, v1, v0
; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: ret
%ext = zext <8 x i15> %a to <8 x i32>
%cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
Expand Down Expand Up @@ -289,12 +289,12 @@ define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13>
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.8h v0, #224, lsl #8
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: ushll2.4s v2, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: cmeq.4s v3, v0, v1
; CHECK-NEXT: cmeq.4s v1, v2, v1
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ushll.4s v2, v0, #0
; CHECK-NEXT: ushll2.4s v0, v0, #0
; CHECK-NEXT: cmeq.4s v3, v2, v1
; CHECK-NEXT: cmeq.4s v1, v0, v1
; CHECK-NEXT: and.16b v1, v1, v0
; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: ret
%ext = zext <8 x i13> %a to <8 x i32>
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
Expand Down Expand Up @@ -429,17 +429,17 @@ define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
; CHECK: ; %bb.0:
; CHECK-NEXT: ushll.4s v2, v0, #0
; CHECK-NEXT: ushll2.4s v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: shl.4s v0, v0, #19
; CHECK-NEXT: shl.4s v2, v2, #19
; CHECK-NEXT: sshr.4s v0, v0, #19
; CHECK-NEXT: sshr.4s v2, v2, #19
; CHECK-NEXT: cmeq.4s v3, v2, v1
; CHECK-NEXT: cmeq.4s v1, v0, v1
; CHECK-NEXT: and.16b v1, v1, v0
; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: cmeq.4s v3, v0, v1
; CHECK-NEXT: cmeq.4s v1, v2, v1
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ret
%ext = sext <8 x i13> %a to <8 x i32>
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
Expand Down Expand Up @@ -493,17 +493,17 @@ entry:
define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
; CHECK: ; %bb.0:
; CHECK-NEXT: ushll.4s v2, v0, #0
; CHECK-NEXT: ushll2.4s v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: shl.4s v0, v0, #17
; CHECK-NEXT: shl.4s v2, v2, #17
; CHECK-NEXT: sshr.4s v0, v0, #17
; CHECK-NEXT: sshr.4s v2, v2, #17
; CHECK-NEXT: cmge.4s v3, v2, v1
; CHECK-NEXT: cmge.4s v1, v0, v1
; CHECK-NEXT: and.16b v1, v1, v0
; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: cmge.4s v3, v0, v1
; CHECK-NEXT: cmge.4s v1, v2, v1
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ret
%ext = sext <8 x i15> %a to <8 x i32>
%cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/avg-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
; AVX512F-NEXT: kmovw %edi, %k4
Expand Down Expand Up @@ -364,11 +364,11 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Expand Down
36 changes: 24 additions & 12 deletions llvm/test/CodeGen/X86/avx512-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x8mem_to_8x16:
; KNL: # %bb.0:
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
Expand All @@ -21,7 +22,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
;
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
Expand All @@ -35,7 +37,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x8mem_to_8x16:
; KNL: # %bb.0:
; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; KNL-NEXT: vpmovsxbw %xmm1, %xmm1
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
Expand All @@ -50,7 +53,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
;
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -208,8 +212,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; KNL-NEXT: vmovdqu (%rdi), %ymm2
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
Expand All @@ -231,8 +237,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
Expand All @@ -253,8 +261,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
; KNL-NEXT: vmovdqu (%rdi), %ymm2
; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
Expand All @@ -276,8 +286,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr78897.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pxor %xmm0, %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
; X86-SSE2-NEXT: movq {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,0,0,0,0,0,0,0,0]
; X86-SSE2-NEXT: pand %xmm0, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
; X86-SSE2-NEXT: movd %xmm2, %esi
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/sqrt-fastmath.ll
Original file line number Diff line number Diff line change
Expand Up @@ -410,34 +410,34 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
; SSE-LABEL: v4f32_estimate2:
; SSE: # %bb.0:
; SSE-NEXT: rsqrtps %xmm0, %xmm2
; SSE-NEXT: mulps %xmm0, %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; SSE-NEXT: cmpleps %xmm0, %xmm1
; SSE-NEXT: andps %xmm2, %xmm1
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; SSE-NEXT: andps %xmm0, %xmm1
; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; SSE-NEXT: cmpleps %xmm1, %xmm2
; SSE-NEXT: rsqrtps %xmm0, %xmm1
; SSE-NEXT: mulps %xmm1, %xmm0
; SSE-NEXT: andps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: v4f32_estimate2:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtps %xmm0, %xmm1
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vrsqrtps %xmm0, %xmm2
; AVX1-NEXT: vmulps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: v4f32_estimate2:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vrsqrtps %xmm0, %xmm2
; AVX512-NEXT: vmulps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%sqrt = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
ret <4 x float> %sqrt
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/ushl_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-AVX2-NEXT: vpsllvd %ymm1, %ymm2, %ymm2
; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; X64-AVX2-NEXT: vpsrlvd %ymm1, %ymm3, %ymm1
Expand Down
Loading