@@ -4592,6 +4592,90 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
45924592 ConstantInt::get (IRB.getInt32Ty (), 0 ));
45934593 }
45944594
4595+ // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.512
4596+ //
4597+ // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512
4598+ // (<8 x i64>, <16 x i8>, i8)
4599+ // A WriteThru Mask
4600+ //
4601+ // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512
4602+ // (<16 x i32>, <16 x i8>, i16)
4603+ //
4604+ // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i]
4605+ // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i]
4606+ //
4607+ // If Dst has more elements than A, the excess elements are zeroed (and the
4608+ // corresponding shadow is initialized).
4609+ //
4610+ // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise
4611+ // and is much faster than this handler.
4612+ void handleAVX512VectorDownConvert (IntrinsicInst &I) {
4613+ IRBuilder<> IRB (&I);
4614+
4615+ assert (I.arg_size () == 3 );
4616+ Value *A = I.getOperand (0 );
4617+ Value *WriteThrough = I.getOperand (1 );
4618+ Value *Mask = I.getOperand (2 );
4619+
4620+ assert (isa<FixedVectorType>(A->getType ()));
4621+ assert (A->getType ()->isIntOrIntVectorTy ());
4622+
4623+ assert (isa<FixedVectorType>(WriteThrough->getType ()));
4624+ assert (WriteThrough->getType ()->isIntOrIntVectorTy ());
4625+
4626+ unsigned ANumElements =
4627+ cast<FixedVectorType>(A->getType ())->getNumElements ();
4628+ unsigned OutputNumElements =
4629+ cast<FixedVectorType>(WriteThrough->getType ())->getNumElements ();
4630+ assert (ANumElements == OutputNumElements ||
4631+ ANumElements * 2 == OutputNumElements);
4632+
4633+ assert (Mask->getType ()->isIntegerTy ());
4634+ assert (Mask->getType ()->getScalarSizeInBits () == ANumElements);
4635+ insertCheckShadowOf (Mask, &I);
4636+
4637+ assert (I.getType () == WriteThrough->getType ());
4638+
4639+ // Widen the mask, if necessary, to have one bit per element of the output
4640+ // vector.
4641+ // We want the extra bits to have '1's, so that the CreateSelect will
4642+ // select the values from AShadow instead of WriteThroughShadow ("maskless"
4643+ // versions of the intrinsics are sometimes implemented using an all-1's
4644+ // mask and an undefined value for WriteThroughShadow). We accomplish this
4645+ // by using bitwise NOT before and after the ZExt.
4646+ if (ANumElements != OutputNumElements) {
4647+ Mask = IRB.CreateNot (Mask);
4648+ Mask = IRB.CreateZExt (Mask, Type::getIntNTy (*MS.C , OutputNumElements),
4649+ " _ms_widen_mask" );
4650+ Mask = IRB.CreateNot (Mask);
4651+ }
4652+ Mask = IRB.CreateBitCast (
4653+ Mask, FixedVectorType::get (IRB.getInt1Ty (), OutputNumElements));
4654+
4655+ Value *AShadow = getShadow (A);
4656+
4657+ // The return type might have more elements than the input.
4658+ // Temporarily shrink the return type's number of elements.
4659+ VectorType *ShadowType = maybeShrinkVectorShadowType (A, I);
4660+
4661+ // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation.
4662+ // This handler treats them all as truncation, which leads to some rare
4663+ // false positives in the cases where the truncated bytes could
4664+ // unambiguously saturate the value e.g., if A = ??????10 ????????
4665+ // (big-endian), the unsigned saturated byte conversion is 11111111 i.e.,
4666+ // fully defined, but the truncated byte is ????????.
4667+ //
4668+ // TODO: use GetMinMaxUnsigned() to handle saturation precisely.
4669+ AShadow = IRB.CreateTrunc (AShadow, ShadowType, " _ms_trunc_shadow" );
4670+ AShadow = maybeExtendVectorShadowWithZeros (AShadow, I);
4671+
4672+ Value *WriteThroughShadow = getShadow (WriteThrough);
4673+
4674+ Value *Shadow = IRB.CreateSelect (Mask, AShadow, WriteThroughShadow);
4675+ setShadow (&I, Shadow);
4676+ setOriginForNaryOp (I);
4677+ }
4678+
45954679 // For sh.* compiler intrinsics:
45964680 // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
45974681 // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
@@ -5412,6 +5496,66 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
54125496 break ;
54135497 }
54145498
5499+ // AVX512 PMOV: Packed MOV, with truncation
5500+ // Precisely handled by applying the same intrinsic to the shadow
5501+ case Intrinsic::x86_avx512_mask_pmov_dw_512:
5502+ case Intrinsic::x86_avx512_mask_pmov_db_512:
5503+ case Intrinsic::x86_avx512_mask_pmov_qb_512:
5504+ case Intrinsic::x86_avx512_mask_pmov_qw_512: {
5505+ // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 were removed in
5506+ // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22
5507+ handleIntrinsicByApplyingToShadow (I, I.getIntrinsicID (),
5508+ /* trailingVerbatimArgs=*/ 1 );
5509+ break ;
5510+ }
5511+
5512+ // AVX512 PMVOV{S,US}: Packed MOV, with signed/unsigned saturation
5513+ // Approximately handled using the corresponding truncation intrinsic
5514+ // TODO: improve handleAVX512VectorDownConvert to precisely model saturation
5515+ case Intrinsic::x86_avx512_mask_pmovs_dw_512:
5516+ case Intrinsic::x86_avx512_mask_pmovus_dw_512: {
5517+ handleIntrinsicByApplyingToShadow (I,
5518+ Intrinsic::x86_avx512_mask_pmov_dw_512,
5519+ /* trailingVerbatimArgs=*/ 1 );
5520+ break ;
5521+ }
5522+
5523+ case Intrinsic::x86_avx512_mask_pmovs_db_512:
5524+ case Intrinsic::x86_avx512_mask_pmovus_db_512: {
5525+ handleIntrinsicByApplyingToShadow (I,
5526+ Intrinsic::x86_avx512_mask_pmov_db_512,
5527+ /* trailingVerbatimArgs=*/ 1 );
5528+ break ;
5529+ }
5530+
5531+ case Intrinsic::x86_avx512_mask_pmovs_qb_512:
5532+ case Intrinsic::x86_avx512_mask_pmovus_qb_512: {
5533+ handleIntrinsicByApplyingToShadow (I,
5534+ Intrinsic::x86_avx512_mask_pmov_qb_512,
5535+ /* trailingVerbatimArgs=*/ 1 );
5536+ break ;
5537+ }
5538+
5539+ case Intrinsic::x86_avx512_mask_pmovs_qw_512:
5540+ case Intrinsic::x86_avx512_mask_pmovus_qw_512: {
5541+ handleIntrinsicByApplyingToShadow (I,
5542+ Intrinsic::x86_avx512_mask_pmov_qw_512,
5543+ /* trailingVerbatimArgs=*/ 1 );
5544+ break ;
5545+ }
5546+
5547+ case Intrinsic::x86_avx512_mask_pmovs_qd_512:
5548+ case Intrinsic::x86_avx512_mask_pmovus_qd_512:
5549+ case Intrinsic::x86_avx512_mask_pmovs_wb_512:
5550+ case Intrinsic::x86_avx512_mask_pmovus_wb_512: {
5551+ // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 do not exist, we
5552+ // cannot use handleIntrinsicByApplyingToShadow. Instead, we call the
5553+ // slow-path handler.
5554+ handleAVX512VectorDownConvert (I);
5555+ break ;
5556+ }
5557+
5558+ // AVX512 FP16 Arithmetic
54155559 case Intrinsic::x86_avx512fp16_mask_add_sh_round:
54165560 case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
54175561 case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
0 commit comments