@@ -4132,10 +4132,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41324132
41334133 CallInst *CI =
41344134 IRB.CreateIntrinsic (I.getType (), I.getIntrinsicID (), ShadowArgs);
4135- // The intrinsic may require floating-point but shadows can be arbitrary
4136- // bit patterns, of which some would be interpreted as "invalid"
4137- // floating-point values (NaN etc.); we assume the intrinsic will happily
4138- // copy them.
4135+ // The AVX masked load intrinsics do not have integer variants. We use the
4136+ // floating-point variants, which will happily copy the shadows even if
4137+ // they are interpreted as "invalid" floating-point values (NaN etc.).
41394138 setShadow (&I, IRB.CreateBitCast (CI, getShadowTy (&I)));
41404139
41414140 if (!MS.TrackOrigins )
@@ -4301,6 +4300,78 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
43014300 setOriginForNaryOp (I);
43024301 }
43034302
4303+ // Handle Arm NEON vector load intrinsics (vld*).
4304+ //
4305+ // The WithLane instructions (ld[234]lane) are similar to:
4306+ // call {<4 x i32>, <4 x i32>, <4 x i32>}
4307+ // @llvm.aarch64.neon.ld3lane.v4i32.p0
4308+ // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
4309+ // %A)
4310+ //
4311+ // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
4312+ // to:
4313+ // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
4314+ void handleNEONVectorLoad (IntrinsicInst &I, bool WithLane) {
4315+ unsigned int numArgs = I.arg_size ();
4316+
4317+ // Return type is a struct of vectors of integers or floating-point
4318+ assert (I.getType ()->isStructTy ());
4319+ [[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType ());
4320+ assert (RetTy->getNumElements () > 0 );
4321+ assert (RetTy->getElementType (0 )->isIntOrIntVectorTy () ||
4322+ RetTy->getElementType (0 )->isFPOrFPVectorTy ());
4323+ for (unsigned int i = 0 ; i < RetTy->getNumElements (); i++)
4324+ assert (RetTy->getElementType (i) == RetTy->getElementType (0 ));
4325+
4326+ if (WithLane) {
4327+ // 2, 3 or 4 vectors, plus lane number, plus input pointer
4328+ assert (4 <= numArgs && numArgs <= 6 );
4329+
4330+ // Return type is a struct of the input vectors
4331+ assert (RetTy->getNumElements () + 2 == numArgs);
4332+ for (unsigned int i = 0 ; i < RetTy->getNumElements (); i++)
4333+ assert (I.getArgOperand (i)->getType () == RetTy->getElementType (0 ));
4334+ } else {
4335+ assert (numArgs == 1 );
4336+ }
4337+
4338+ IRBuilder<> IRB (&I);
4339+
4340+ SmallVector<Value *, 6 > ShadowArgs;
4341+ if (WithLane) {
4342+ for (unsigned int i = 0 ; i < numArgs - 2 ; i++)
4343+ ShadowArgs.push_back (getShadow (I.getArgOperand (i)));
4344+
4345+ // Lane number, passed verbatim
4346+ Value *LaneNumber = I.getArgOperand (numArgs - 2 );
4347+ ShadowArgs.push_back (LaneNumber);
4348+
4349+ // TODO: blend shadow of lane number into output shadow?
4350+ insertShadowCheck (LaneNumber, &I);
4351+ }
4352+
4353+ Value *Src = I.getArgOperand (numArgs - 1 );
4354+ assert (Src->getType ()->isPointerTy () && " Source is not a pointer!" );
4355+
4356+ Type *SrcShadowTy = getShadowTy (Src);
4357+ auto [SrcShadowPtr, SrcOriginPtr] =
4358+ getShadowOriginPtr (Src, IRB, SrcShadowTy, Align (1 ), /* isStore*/ false );
4359+ ShadowArgs.push_back (SrcShadowPtr);
4360+
4361+ // The NEON vector load instructions handled by this function all have
4362+ // integer variants. It is easier to use those rather than trying to cast
4363+ // a struct of vectors of floats into a struct of vectors of integers.
4364+ CallInst *CI =
4365+ IRB.CreateIntrinsic (getShadowTy (&I), I.getIntrinsicID (), ShadowArgs);
4366+ setShadow (&I, CI);
4367+
4368+ if (!MS.TrackOrigins )
4369+ return ;
4370+
4371+ Value *PtrSrcOrigin = IRB.CreateLoad (MS.OriginTy , SrcOriginPtr);
4372+ setOrigin (&I, PtrSrcOrigin);
4373+ }
4374+
43044375 // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
43054376 // / and vst{2,3,4}lane).
43064377 // /
@@ -5011,6 +5082,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
50115082 handleVectorReduceIntrinsic (I, /* AllowShadowCast=*/ true );
50125083 break ;
50135084
5085+ case Intrinsic::aarch64_neon_ld1x2:
5086+ case Intrinsic::aarch64_neon_ld1x3:
5087+ case Intrinsic::aarch64_neon_ld1x4:
5088+ case Intrinsic::aarch64_neon_ld2:
5089+ case Intrinsic::aarch64_neon_ld3:
5090+ case Intrinsic::aarch64_neon_ld4:
5091+ case Intrinsic::aarch64_neon_ld2r:
5092+ case Intrinsic::aarch64_neon_ld3r:
5093+ case Intrinsic::aarch64_neon_ld4r: {
5094+ handleNEONVectorLoad (I, /* WithLane=*/ false );
5095+ break ;
5096+ }
5097+
5098+ case Intrinsic::aarch64_neon_ld2lane:
5099+ case Intrinsic::aarch64_neon_ld3lane:
5100+ case Intrinsic::aarch64_neon_ld4lane: {
5101+ handleNEONVectorLoad (I, /* WithLane=*/ true );
5102+ break ;
5103+ }
5104+
50145105 // Saturating extract narrow
50155106 case Intrinsic::aarch64_neon_sqxtn:
50165107 case Intrinsic::aarch64_neon_sqxtun:
0 commit comments