@@ -3873,38 +3873,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38733873 setOriginForNaryOp (I);
38743874 }
38753875
3876- // / Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876+ // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877+ // / and vst{2,3,4}lane).
38773878 // /
38783879 // / Arm NEON vector store intrinsics have the output address (pointer) as the
3879- // / last argument, with the initial arguments being the inputs. They return
3880- // / void.
3880+ // / last argument, with the initial arguments being the inputs (and lane
3881+ // / number for vst{2,3,4}lane). They return void.
38813882 // /
38823883 // / - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
38833884 // / abcdabcdabcdabcd... into *outP
38843885 // / - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
38853886 // / writes aaaa...bbbb...cccc...dddd... into *outP
3887+ // / - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
38863888 // / These instructions can all be instrumented with essentially the same
38873889 // / MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888- void handleNEONVectorStoreIntrinsic (IntrinsicInst &I) {
3890+ void handleNEONVectorStoreIntrinsic (IntrinsicInst &I, bool useLane ) {
38893891 IRBuilder<> IRB (&I);
38903892
38913893 // Don't use getNumOperands() because it includes the callee
38923894 int numArgOperands = I.arg_size ();
3893- assert (numArgOperands >= 1 );
38943895
3895- // The last arg operand is the output
3896+ // The last arg operand is the output (pointer)
3897+ assert (numArgOperands >= 1 );
38963898 Value *Addr = I.getArgOperand (numArgOperands - 1 );
38973899 assert (Addr->getType ()->isPointerTy ());
3900+ int skipTrailingOperands = 1 ;
38983901
38993902 if (ClCheckAccessAddress)
39003903 insertShadowCheck (Addr, &I);
39013904
3902- SmallVector<Value *, 8 > Shadows;
3903- // Every arg operand, other than the last one, is an input vector
3904- for (int i = 0 ; i < numArgOperands - 1 ; i++) {
3905+ // Second-last operand is the lane number (for vst{2,3,4}lane)
3906+ if (useLane) {
3907+ skipTrailingOperands++;
3908+ assert (numArgOperands >= static_cast <int >(skipTrailingOperands));
3909+ assert (isa<IntegerType>(
3910+ I.getArgOperand (numArgOperands - skipTrailingOperands)->getType ()));
3911+ }
3912+
3913+ SmallVector<Value *, 8 > ShadowArgs;
3914+ // All the initial operands are the inputs
3915+ for (int i = 0 ; i < numArgOperands - skipTrailingOperands; i++) {
39053916 assert (isa<FixedVectorType>(I.getArgOperand (i)->getType ()));
39063917 Value *Shadow = getShadow (&I, i);
3907- Shadows .append (1 , Shadow);
3918+ ShadowArgs .append (1 , Shadow);
39083919 }
39093920
39103921 // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3932,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39213932 FixedVectorType *OutputVectorTy = FixedVectorType::get (
39223933 cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getElementType (),
39233934 cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getNumElements () *
3924- (numArgOperands - 1 ));
3935+ (numArgOperands - skipTrailingOperands ));
39253936 Type *OutputShadowTy = getShadowTy (OutputVectorTy);
39263937
3938+ if (useLane)
3939+ ShadowArgs.append (1 ,
3940+ I.getArgOperand (numArgOperands - skipTrailingOperands));
3941+
39273942 Value *OutputShadowPtr, *OutputOriginPtr;
39283943 // AArch64 NEON does not need alignment (unless OS requires it)
39293944 std::tie (OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr (
39303945 Addr, IRB, OutputShadowTy, Align (1 ), /* isStore*/ true );
3931- Shadows .append (1 , OutputShadowPtr);
3946+ ShadowArgs .append (1 , OutputShadowPtr);
39323947
3933- // CreateIntrinsic will select the correct (integer) type for the
3934- // intrinsic; the original instruction I may have either integer- or
3935- // float-type inputs.
39363948 CallInst *CI =
3937- IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), Shadows );
3949+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs );
39383950 setShadow (&I, CI);
39393951
39403952 if (MS.TrackOrigins ) {
39413953 // TODO: if we modelled the vst* instruction more precisely, we could
39423954 // more accurately track the origins (e.g., if both inputs are
39433955 // uninitialized for vst2, we currently blame the second input, even
39443956 // though part of the output depends only on the first input).
3957+ //
3958+ // This is particularly imprecise for vst{2,3,4}lane, since only one
3959+ // lane of each input is actually copied to the output.
39453960 OriginCombiner OC (this , IRB);
3946- for (int i = 0 ; i < numArgOperands - 1 ; i++)
3961+ for (int i = 0 ; i < numArgOperands - skipTrailingOperands ; i++)
39473962 OC.Add (I.getArgOperand (i));
39483963
39493964 const DataLayout &DL = F.getDataLayout ();
@@ -4316,7 +4331,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
43164331 case Intrinsic::aarch64_neon_st2:
43174332 case Intrinsic::aarch64_neon_st3:
43184333 case Intrinsic::aarch64_neon_st4: {
4319- handleNEONVectorStoreIntrinsic (I);
4334+ handleNEONVectorStoreIntrinsic (I, false );
4335+ break ;
4336+ }
4337+
4338+ case Intrinsic::aarch64_neon_st2lane:
4339+ case Intrinsic::aarch64_neon_st3lane:
4340+ case Intrinsic::aarch64_neon_st4lane: {
4341+ handleNEONVectorStoreIntrinsic (I, true );
43204342 break ;
43214343 }
43224344
0 commit comments