diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index d1c481483dfad..b0d8409d97ffc 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -4271,14 +4271,39 @@ TEST(VectorSadTest, sse2_psad_bw) { } TEST(VectorMaddTest, mmx_pmadd_wd) { - V4x16 a = {Poisoned(), 1, 2, 3}; + V4x16 a = {Poisoned(0), 1, 2, 3}; V4x16 b = {100, 101, 102, 103}; V2x32 c = _mm_madd_pi16(a, b); + // Multiply step: + // {Poison * 100, 1 * 101, 2 * 102, 3 * 103} + // == {Poison, 1 * 101, 2 * 102, 3 * 103} + // Notice that for the poisoned value, we ignored the concrete zero value. + // + // Horizontal add step: + // {Poison + 1 * 101, 2 * 102 + 3 * 103} + // == {Poison, 2 * 102 + 3 * 103} EXPECT_POISONED(c[0]); EXPECT_NOT_POISONED(c[1]); EXPECT_EQ((unsigned)(2 * 102 + 3 * 103), c[1]); + + V4x16 d = {Poisoned(0), 1, 0, 3}; + V4x16 e = {100, 101, Poisoned(102), 103}; + V2x32 f = _mm_madd_pi16(d, e); + // Multiply step: + // {Poison * 100, 1 * 101, 0 * Poison, 3 * 103} + // == {Poison, 1 * 101, 0 , 3 * 103} + // Notice that 0 * Poison == 0. + // + // Horizontal add step: + // {Poison + 1 * 101, 0 + 3 * 103} + // == {Poison, 3 * 103} + + EXPECT_POISONED(f[0]); + EXPECT_NOT_POISONED(f[1]); + + EXPECT_EQ((unsigned)(3 * 103), f[1]); } TEST(VectorCmpTest, mm_cmpneq_ps) { diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 21bd4164385ab..3ecace5cfe6e4 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Get an MMX-sized vector type. - Type *getMMXVectorTy(unsigned EltSizeInBits) { - const unsigned X86_MMXSizeInBits = 64; + // Get an MMX-sized (64-bit) vector type, or optionally, other sized + // vectors. + Type *getMMXVectorTy(unsigned EltSizeInBits, + unsigned X86_MMXSizeInBits = 64) { assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && "Illegal MMX vector element size"); return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), @@ -3843,20 +3844,109 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Instrument multiply-add intrinsic. - void handleVectorPmaddIntrinsic(IntrinsicInst &I, - unsigned MMXEltSizeInBits = 0) { - Type *ResTy = - MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType(); + // Instrument multiply-add intrinsics. + // + // e.g., Two operands: + // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) + // + // Two operands which require an EltSizeInBits override: + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) + // + // Three operands are not implemented yet: + // <4 x i32> @llvm.x86.avx512.vpdpbusd.128 + // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b) + // (the result of multiply-add'ing %a and %b is accumulated with %s) + void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor, + unsigned EltSizeInBits = 0) { IRBuilder<> IRB(&I); - auto *Shadow0 = getShadow(&I, 0); - auto *Shadow1 = getShadow(&I, 1); - Value *S = IRB.CreateOr(Shadow0, Shadow1); - S = IRB.CreateBitCast(S, ResTy); - S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), - ResTy); - S = IRB.CreateBitCast(S, getShadowTy(&I)); - setShadow(&I, S); + + [[maybe_unused]] FixedVectorType *ReturnType = + cast(I.getType()); + assert(isa(ReturnType)); + + assert(I.arg_size() == 2); + + // Vectors A and B, and shadows + Value *Va = I.getOperand(0); + Value *Vb = I.getOperand(1); + + Value *Sa = getShadow(&I, 0); + Value *Sb = getShadow(&I, 1); + + FixedVectorType *ParamType = + cast(I.getArgOperand(0)->getType()); + assert(ParamType == I.getArgOperand(1)->getType()); + + assert(ParamType->getPrimitiveSizeInBits() == + ReturnType->getPrimitiveSizeInBits()); + + FixedVectorType *ImplicitReturnType = ReturnType; + // Step 1: instrument multiplication of corresponding vector elements + if (EltSizeInBits) { + ImplicitReturnType = cast(getMMXVectorTy( + EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits())); + ParamType = cast( + getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); + + Va = IRB.CreateBitCast(Va, ParamType); + Vb = IRB.CreateBitCast(Vb, ParamType); + + Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType)); + Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType)); + } else { + assert(ParamType->getNumElements() == + ReturnType->getNumElements() * ReductionFactor); + } + + // Multiplying an *initialized* zero by an uninitialized element results in + // an initialized zero element. + // + // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value + // results in an unpoisoned value. We can therefore adapt the visitAnd() + // instrumentation: + // OutShadow = (SaNonZero & SbNonZero) + // | (VaNonZero & SbNonZero) + // | (SaNonZero & VbNonZero) + // where non-zero is checked on a per-element basis (not per bit). + Value *SZero = Constant::getNullValue(Va->getType()); + Value *VZero = Constant::getNullValue(Sa->getType()); + Value *SaNonZero = IRB.CreateICmpNE(Sa, SZero); + Value *SbNonZero = IRB.CreateICmpNE(Sb, SZero); + Value *VaNonZero = IRB.CreateICmpNE(Va, VZero); + Value *VbNonZero = IRB.CreateICmpNE(Vb, VZero); + + Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero); + Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero); + Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero); + + // Each element of the vector is represented by a single bit (poisoned or + // not) e.g., <8 x i1>. + Value *And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero}); + + // Extend <8 x i1> to <8 x i16>. + // (The real pmadd intrinsic would have computed intermediate values of + // <8 x i32>, but that is irrelevant for our shadow purposes because we + // consider each element to be either fully initialized or fully + // uninitialized.) + And = IRB.CreateSExt(And, Sa->getType()); + + // Step 2: instrument horizontal add + // We don't need bit-precise horizontalReduce because we only want to check + // if each pair of elements is fully zero. + // Cast to <4 x i32>. + Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType); + + // Compute <4 x i1>, then extend back to <4 x i32>. + Value *OutShadow = IRB.CreateSExt( + IRB.CreateICmpNE(Horizontal, + Constant::getNullValue(Horizontal->getType())), + ImplicitReturnType); + + // For MMX, cast it back to the required fake return type (<1 x i64>). + if (EltSizeInBits) + OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); + + setShadow(&I, OutShadow); setOriginForNaryOp(I); } @@ -5391,19 +5481,28 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorSadIntrinsic(I); break; + // Multiply and Add Packed Words + // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) + // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) + // + // Multiply and Add Packed Signed and Unsigned Bytes + // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) + // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) case Intrinsic::x86_sse2_pmadd_wd: case Intrinsic::x86_avx2_pmadd_wd: case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); break; + // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) case Intrinsic::x86_ssse3_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, 8); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8); break; + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) case Intrinsic::x86_mmx_pmadd_wd: - handleVectorPmaddIntrinsic(I, 16); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); break; case Intrinsic::x86_sse_cmp_ss: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index f916130fe53e5..cc07958bd9f26 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -140,11 +140,20 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i16> [[TMP7]] to <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -677,11 +686,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP7]] to <16 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -706,11 +724,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP17]], [[TMP10]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP9]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP16]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <32 x i8> [[TMP12]] to <16 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i16> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index ac3bb56719038..99eafc13b2bf1 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1687,16 +1687,30 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = and <4 x i1> [[TMP22]], [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = and <4 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP37]], [[TMP36]] +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP38]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i16> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <2 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = sext <2 x i1> [[TMP25]] to <2 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i32> [[TMP27]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64> +; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3315,16 +3329,30 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = and <8 x i1> [[TMP35]], [[TMP33]] +; CHECK-NEXT: [[TMP39:%.*]] = and <8 x i1> [[TMP32]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = or <8 x i1> [[TMP34]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <8 x i1> [[TMP40]], [[TMP39]] +; CHECK-NEXT: [[TMP16:%.*]] = sext <8 x i1> [[TMP41]] to <8 x i8> +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i8> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <4 x i16> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP25]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i16> [[TMP29]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64> +; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index 8f915a59db8e5..7048050180792 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -762,11 +762,20 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP12]], [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i1> [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i16> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index 5cc56baf0e0de..9960b80f2856b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -149,11 +149,20 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i16> [[TMP8]] to <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -714,11 +723,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <32 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP19]] to <32 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i16> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP21]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -734,7 +752,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -744,11 +762,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <32 x i1> [[TMP10]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <32 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP23]] to <32 x i8> +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <32 x i8> [[TMP13]] to <16 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i16> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 0a3efaaea149f..74822de4962b2 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -1730,16 +1730,30 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <4 x i16> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = and <4 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = and <4 x i1> [[TMP23]], [[TMP31]] +; CHECK-NEXT: [[TMP37:%.*]] = and <4 x i1> [[TMP30]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP32]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <4 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP24:%.*]] = sext <4 x i1> [[TMP39]] to <4 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <4 x i16> [[TMP24]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <2 x i32> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = sext <2 x i1> [[TMP26]] to <2 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <2 x i32> [[TMP28]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP29]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <1 x i64> [[TMP34]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP35]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3401,16 +3415,30 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i8> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = and <8 x i1> [[TMP26]], [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = and <8 x i1> [[TMP33]], [[TMP36]] +; CHECK-NEXT: [[TMP40:%.*]] = or <8 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <8 x i1> [[TMP40]], [[TMP39]] +; CHECK-NEXT: [[TMP16:%.*]] = sext <8 x i1> [[TMP41]] to <8 x i8> +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <8 x i8> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne <4 x i16> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = sext <4 x i1> [[TMP28]] to <4 x i16> +; CHECK-NEXT: [[TMP30:%.*]] = bitcast <4 x i16> [[TMP42]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to <1 x i64> +; CHECK-NEXT: [[TMP31:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <1 x i64> [[TMP32]] to <8 x i8> +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i64> [[TMP31]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP25]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP37]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index e771e60e2f294..3a37eafd78ecb 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -800,11 +800,20 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i1> [[TMP5]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i16> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <4 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP21]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index d614bb85d8584..d1060fb33e1bc 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -17,10 +17,19 @@ define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i1> [[TMP2]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i1> [[TMP6]], [[TMP13]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP17]] to <4 x i32> ; CHECK-NEXT: [[C:%.*]] = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[C]] @@ -39,13 +48,27 @@ define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_me ; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = or <1 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i16> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i8> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i8> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP17]], [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i1> [[TMP14]], [[TMP21]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i1> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i1> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i1> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <4 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP24]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[TMP23]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP19]] to <1 x i64> ; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]] -; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[C]] ; entry: