@@ -3046,7 +3046,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
30463046 if (maybeHandleSimpleNomemIntrinsic (I))
30473047 return true ;
30483048
3049- // FIXME: detect and handle SSE maskstore/maskload
3049+ // FIXME: detect and handle SSE maskstore/maskload?
3050+ // Some cases are now handled in handleAVXMasked{Load,Store}.
30503051 return false ;
30513052 }
30523053
@@ -3683,6 +3684,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36833684 // TODO: Store origin.
36843685 }
36853686
3687+ // Intrinsic::masked_store
3688+ //
3689+ // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
3690+ // stores are lowered to Intrinsic::masked_store.
36863691 void handleMaskedStore (IntrinsicInst &I) {
36873692 IRBuilder<> IRB (&I);
36883693 Value *V = I.getArgOperand (0 );
@@ -3713,6 +3718,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37133718 std::max (Alignment, kMinOriginAlignment ));
37143719 }
37153720
3721+ // Intrinsic::masked_load
3722+ //
3723+ // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
3724+ // loads are lowered to Intrinsic::masked_load.
37163725 void handleMaskedLoad (IntrinsicInst &I) {
37173726 IRBuilder<> IRB (&I);
37183727 Value *Ptr = I.getArgOperand (0 );
@@ -3754,6 +3763,125 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37543763 setOrigin (&I, Origin);
37553764 }
37563765
3766+ // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
3767+ // dst mask src
3768+ //
3769+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3770+ // by handleMaskedStore.
3771+ //
3772+ // This function handles AVX and AVX2 masked stores; these use the MSBs of a
3773+ // vector of integers, unlike the LLVM masked intrinsics, which require a
3774+ // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
3775+ // mentions that the x86 backend does not know how to efficiently convert
3776+ // from a vector of booleans back into the AVX mask format; therefore, they
3777+ // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
3778+ // intrinsics.
3779+ void handleAVXMaskedStore (IntrinsicInst &I) {
3780+ IRBuilder<> IRB (&I);
3781+
3782+ Value *Dst = I.getArgOperand (0 );
3783+ assert (Dst->getType ()->isPointerTy () && " Destination is not a pointer!" );
3784+
3785+ Value *Mask = I.getArgOperand (1 );
3786+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3787+
3788+ Value *Src = I.getArgOperand (2 );
3789+ assert (isa<VectorType>(Src->getType ()) && " Source is not a vector!" );
3790+
3791+ const Align Alignment = Align (1 );
3792+
3793+ Value *SrcShadow = getShadow (Src);
3794+
3795+ if (ClCheckAccessAddress) {
3796+ insertShadowCheck (Dst, &I);
3797+ insertShadowCheck (Mask, &I);
3798+ }
3799+
3800+ Value *DstShadowPtr;
3801+ Value *DstOriginPtr;
3802+ std::tie (DstShadowPtr, DstOriginPtr) = getShadowOriginPtr (
3803+ Dst, IRB, SrcShadow->getType (), Alignment, /* isStore*/ true );
3804+
3805+ SmallVector<Value *, 2 > ShadowArgs;
3806+ ShadowArgs.append (1 , DstShadowPtr);
3807+ ShadowArgs.append (1 , Mask);
3808+ // The intrinsic may require floating-point but shadows can be arbitrary
3809+ // bit patterns, of which some would be interpreted as "invalid"
3810+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3811+ // copy them.
3812+ ShadowArgs.append (1 , IRB.CreateBitCast (SrcShadow, Src->getType ()));
3813+
3814+ CallInst *CI =
3815+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs);
3816+ setShadow (&I, CI);
3817+
3818+ if (!MS.TrackOrigins )
3819+ return ;
3820+
3821+ // Approximation only
3822+ auto &DL = F.getDataLayout ();
3823+ paintOrigin (IRB, getOrigin (Src), DstOriginPtr,
3824+ DL.getTypeStoreSize (SrcShadow->getType ()),
3825+ std::max (Alignment, kMinOriginAlignment ));
3826+ }
3827+
3828+ // e.g., <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
3829+ // return src mask
3830+ //
3831+ // Masked-off values are replaced with 0, which conveniently also represents
3832+ // initialized memory.
3833+ //
3834+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3835+ // by handleMaskedStore.
3836+ //
3837+ // We do not combine this with handleMaskedLoad; see comment in
3838+ // handleAVXMaskedStore for the rationale.
3839+ //
3840+ // This is subtly different than handleIntrinsicByApplyingToShadow(I, 1)
3841+ // because we need to apply getShadowOriginPtr, not getShadow, to the first
3842+ // parameter.
3843+ void handleAVXMaskedLoad (IntrinsicInst &I) {
3844+ IRBuilder<> IRB (&I);
3845+
3846+ Value *Src = I.getArgOperand (0 );
3847+ assert (Src->getType ()->isPointerTy () && " Source is not a pointer!" );
3848+
3849+ Value *Mask = I.getArgOperand (1 );
3850+ assert (isa<VectorType>(Mask->getType ()) && " Mask is not a vector!" );
3851+
3852+ const Align Alignment = Align (1 );
3853+
3854+ if (ClCheckAccessAddress) {
3855+ insertShadowCheck (Mask, &I);
3856+ }
3857+
3858+ Type *SrcShadowTy = getShadowTy (Src);
3859+ Value *SrcShadowPtr, *SrcOriginPtr;
3860+ std::tie (SrcShadowPtr, SrcOriginPtr) =
3861+ getShadowOriginPtr (Src, IRB, SrcShadowTy, Alignment, /* isStore*/ false );
3862+
3863+ SmallVector<Value *, 2 > ShadowArgs;
3864+ ShadowArgs.append (1 , SrcShadowPtr);
3865+ ShadowArgs.append (1 , Mask);
3866+
3867+ CallInst *CI =
3868+ IRB.CreateIntrinsic (I.getType (), I.getIntrinsicID (), ShadowArgs);
3869+ // The intrinsic may require floating-point but shadows can be arbitrary
3870+ // bit patterns, of which some would be interpreted as "invalid"
3871+ // floating-point values (NaN etc.); we assume the intrinsic will happily
3872+ // copy them.
3873+ setShadow (&I, IRB.CreateBitCast (CI, getShadowTy (&I)));
3874+
3875+ if (!MS.TrackOrigins )
3876+ return ;
3877+
3878+ // The "pass-through" value is always zero (initialized). To the extent
3879+ // that that results in initialized aligned 4-byte chunks, the origin value
3880+ // is ignored. It is therefore correct to simply copy the origin from src.
3881+ Value *PtrSrcOrigin = IRB.CreateLoad (MS.OriginTy , SrcOriginPtr);
3882+ setOrigin (&I, PtrSrcOrigin);
3883+ }
3884+
37573885 // Instrument BMI / BMI2 intrinsics.
37583886 // All of these intrinsics are Z = I(X, Y)
37593887 // where the types of all operands and the result match, and are either i32 or
@@ -4466,6 +4594,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
44664594 break ;
44674595 }
44684596
4597+ case Intrinsic::x86_avx_maskstore_ps:
4598+ case Intrinsic::x86_avx_maskstore_pd:
4599+ case Intrinsic::x86_avx_maskstore_ps_256:
4600+ case Intrinsic::x86_avx_maskstore_pd_256:
4601+ case Intrinsic::x86_avx2_maskstore_d:
4602+ case Intrinsic::x86_avx2_maskstore_q:
4603+ case Intrinsic::x86_avx2_maskstore_d_256:
4604+ case Intrinsic::x86_avx2_maskstore_q_256: {
4605+ handleAVXMaskedStore (I);
4606+ break ;
4607+ }
4608+
4609+ case Intrinsic::x86_avx_maskload_ps:
4610+ case Intrinsic::x86_avx_maskload_pd:
4611+ case Intrinsic::x86_avx_maskload_ps_256:
4612+ case Intrinsic::x86_avx_maskload_pd_256:
4613+ case Intrinsic::x86_avx2_maskload_d:
4614+ case Intrinsic::x86_avx2_maskload_q:
4615+ case Intrinsic::x86_avx2_maskload_d_256:
4616+ case Intrinsic::x86_avx2_maskload_q_256: {
4617+ handleAVXMaskedLoad (I);
4618+ break ;
4619+ }
4620+
44694621 case Intrinsic::fshl:
44704622 case Intrinsic::fshr:
44714623 handleFunnelShift (I);
0 commit comments