@@ -2607,8 +2607,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26072607 // /
26082608 // / e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
26092609 // / <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
2610- // /
2611- // / TODO: adapt this function to handle horizontal add/sub?
26122610 void handlePairwiseShadowOrIntrinsic (IntrinsicInst &I) {
26132611 assert (I.arg_size () == 1 || I.arg_size () == 2 );
26142612
@@ -2617,8 +2615,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26172615
26182616 FixedVectorType *ParamType =
26192617 cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
2620- if ( I.arg_size () == 2 )
2621- assert (ParamType == cast<FixedVectorType>(I.getArgOperand (1 )->getType ()));
2618+ assert (( I.arg_size () != 2 ) ||
2619+ (ParamType == cast<FixedVectorType>(I.getArgOperand (1 )->getType () )));
26222620 [[maybe_unused]] FixedVectorType *ReturnType =
26232621 cast<FixedVectorType>(I.getType ());
26242622 assert (ParamType->getNumElements () * I.arg_size () ==
@@ -2656,6 +2654,82 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26562654 setOriginForNaryOp (I);
26572655 }
26582656
2657+ // / Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
2658+ // / fields, with the parameters reinterpreted to have elements of a specified
2659+ // / width. For example:
2660+ // / @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]])
2661+ // / conceptually operates on
2662+ // / (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
2663+ // / and can be handled with ReinterpretElemWidth == 16.
2664+ void handlePairwiseShadowOrIntrinsic (IntrinsicInst &I,
2665+ int ReinterpretElemWidth) {
2666+ assert (I.arg_size () == 1 || I.arg_size () == 2 );
2667+
2668+ assert (I.getType ()->isVectorTy ());
2669+ assert (I.getArgOperand (0 )->getType ()->isVectorTy ());
2670+
2671+ FixedVectorType *ParamType =
2672+ cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
2673+ assert ((I.arg_size () != 2 ) ||
2674+ (ParamType == cast<FixedVectorType>(I.getArgOperand (1 )->getType ())));
2675+
2676+ [[maybe_unused]] FixedVectorType *ReturnType =
2677+ cast<FixedVectorType>(I.getType ());
2678+ assert (ParamType->getNumElements () * I.arg_size () ==
2679+ 2 * ReturnType->getNumElements ());
2680+
2681+ IRBuilder<> IRB (&I);
2682+
2683+ unsigned TotalNumElems = ParamType->getNumElements () * I.arg_size ();
2684+ FixedVectorType *ReinterpretShadowTy = nullptr ;
2685+ assert (isAligned (Align (ReinterpretElemWidth),
2686+ ParamType->getPrimitiveSizeInBits ()));
2687+ ReinterpretShadowTy = FixedVectorType::get (
2688+ IRB.getIntNTy (ReinterpretElemWidth),
2689+ ParamType->getPrimitiveSizeInBits () / ReinterpretElemWidth);
2690+ TotalNumElems = ReinterpretShadowTy->getNumElements () * I.arg_size ();
2691+
2692+ // Horizontal OR of shadow
2693+ SmallVector<int , 8 > EvenMask;
2694+ SmallVector<int , 8 > OddMask;
2695+ for (unsigned X = 0 ; X < TotalNumElems - 1 ; X += 2 ) {
2696+ EvenMask.push_back (X);
2697+ OddMask.push_back (X + 1 );
2698+ }
2699+
2700+ Value *FirstArgShadow = getShadow (&I, 0 );
2701+ FirstArgShadow = IRB.CreateBitCast (FirstArgShadow, ReinterpretShadowTy);
2702+
2703+ // If we had two parameters each with an odd number of elements, the total
2704+ // number of elements is even, but we have never seen this in extant
2705+ // instruction sets, so we enforce that each parameter must have an even
2706+ // number of elements.
2707+ assert (isAligned (
2708+ Align (2 ),
2709+ cast<FixedVectorType>(FirstArgShadow->getType ())->getNumElements ()));
2710+
2711+ Value *EvenShadow;
2712+ Value *OddShadow;
2713+ if (I.arg_size () == 2 ) {
2714+ Value *SecondArgShadow = getShadow (&I, 1 );
2715+ SecondArgShadow = IRB.CreateBitCast (SecondArgShadow, ReinterpretShadowTy);
2716+
2717+ EvenShadow =
2718+ IRB.CreateShuffleVector (FirstArgShadow, SecondArgShadow, EvenMask);
2719+ OddShadow =
2720+ IRB.CreateShuffleVector (FirstArgShadow, SecondArgShadow, OddMask);
2721+ } else {
2722+ EvenShadow = IRB.CreateShuffleVector (FirstArgShadow, EvenMask);
2723+ OddShadow = IRB.CreateShuffleVector (FirstArgShadow, OddMask);
2724+ }
2725+
2726+ Value *OrShadow = IRB.CreateOr (EvenShadow, OddShadow);
2727+ OrShadow = CreateShadowCast (IRB, OrShadow, getShadowTy (&I));
2728+
2729+ setShadow (&I, OrShadow);
2730+ setOriginForNaryOp (I);
2731+ }
2732+
26592733 void visitFNeg (UnaryOperator &I) { handleShadowOr (I); }
26602734
26612735 // Handle multiplication by constant.
@@ -4156,87 +4230,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41564230 setOriginForNaryOp (I);
41574231 }
41584232
4159- void handleAVXHorizontalAddSubIntrinsic (IntrinsicInst &I) {
4160- // Approximation only:
4161- // output = horizontal_add/sub(A, B)
4162- // => shadow[output] = horizontal_add(shadow[A], shadow[B])
4163- //
4164- // We always use horizontal add instead of subtract, because subtracting
4165- // a fully uninitialized shadow would result in a fully initialized shadow.
4166- //
4167- // - If we add two adjacent zero (initialized) shadow values, the
4168- // result always be zero i.e., no false positives.
4169- // - If we add two shadows, one of which is uninitialized, the
4170- // result will always be non-zero i.e., no false negatives.
4171- // - However, we can have false negatives if we do an addition that wraps
4172- // to zero; we consider this an acceptable tradeoff for performance.
4173- //
4174- // To make shadow propagation precise, we want the equivalent of
4175- // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4176-
4177- Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID ();
4178-
4179- switch (I.getIntrinsicID ()) {
4180- case Intrinsic::x86_sse3_hsub_ps:
4181- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4182- break ;
4183-
4184- case Intrinsic::x86_sse3_hsub_pd:
4185- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4186- break ;
4187-
4188- case Intrinsic::x86_ssse3_phsub_d:
4189- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4190- break ;
4191-
4192- case Intrinsic::x86_ssse3_phsub_d_128:
4193- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4194- break ;
4195-
4196- case Intrinsic::x86_ssse3_phsub_w:
4197- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4198- break ;
4199-
4200- case Intrinsic::x86_ssse3_phsub_w_128:
4201- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4202- break ;
4203-
4204- case Intrinsic::x86_ssse3_phsub_sw:
4205- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4206- break ;
4207-
4208- case Intrinsic::x86_ssse3_phsub_sw_128:
4209- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4210- break ;
4211-
4212- case Intrinsic::x86_avx_hsub_pd_256:
4213- shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4214- break ;
4215-
4216- case Intrinsic::x86_avx_hsub_ps_256:
4217- shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4218- break ;
4219-
4220- case Intrinsic::x86_avx2_phsub_d:
4221- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4222- break ;
4223-
4224- case Intrinsic::x86_avx2_phsub_w:
4225- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4226- break ;
4227-
4228- case Intrinsic::x86_avx2_phsub_sw:
4229- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4230- break ;
4231-
4232- default :
4233- break ;
4234- }
4235-
4236- return handleIntrinsicByApplyingToShadow (I, shadowIntrinsicID,
4237- /* trailingVerbatimArgs*/ 0 );
4238- }
4239-
42404233 // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
42414234 // / and vst{2,3,4}lane).
42424235 // /
@@ -4783,33 +4776,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47834776 handleVtestIntrinsic (I);
47844777 break ;
47854778
4786- case Intrinsic::x86_sse3_hadd_ps:
4787- case Intrinsic::x86_sse3_hadd_pd:
4788- case Intrinsic::x86_ssse3_phadd_d:
4789- case Intrinsic::x86_ssse3_phadd_d_128:
4779+ // Packed Horizontal Add/Subtract
47904780 case Intrinsic::x86_ssse3_phadd_w:
47914781 case Intrinsic::x86_ssse3_phadd_w_128:
4782+ case Intrinsic::x86_avx2_phadd_w:
4783+ case Intrinsic::x86_ssse3_phsub_w:
4784+ case Intrinsic::x86_ssse3_phsub_w_128:
4785+ case Intrinsic::x86_avx2_phsub_w: {
4786+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4787+ break ;
4788+ }
4789+
4790+ // Packed Horizontal Add/Subtract
4791+ case Intrinsic::x86_ssse3_phadd_d:
4792+ case Intrinsic::x86_ssse3_phadd_d_128:
4793+ case Intrinsic::x86_avx2_phadd_d:
4794+ case Intrinsic::x86_ssse3_phsub_d:
4795+ case Intrinsic::x86_ssse3_phsub_d_128:
4796+ case Intrinsic::x86_avx2_phsub_d: {
4797+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 32 );
4798+ break ;
4799+ }
4800+
4801+ // Packed Horizontal Add/Subtract and Saturate
47924802 case Intrinsic::x86_ssse3_phadd_sw:
47934803 case Intrinsic::x86_ssse3_phadd_sw_128:
4804+ case Intrinsic::x86_avx2_phadd_sw:
4805+ case Intrinsic::x86_ssse3_phsub_sw:
4806+ case Intrinsic::x86_ssse3_phsub_sw_128:
4807+ case Intrinsic::x86_avx2_phsub_sw: {
4808+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4809+ break ;
4810+ }
4811+
4812+ // Packed Single/Double Precision Floating-Point Horizontal Add
4813+ case Intrinsic::x86_sse3_hadd_ps:
4814+ case Intrinsic::x86_sse3_hadd_pd:
47944815 case Intrinsic::x86_avx_hadd_pd_256:
47954816 case Intrinsic::x86_avx_hadd_ps_256:
4796- case Intrinsic::x86_avx2_phadd_d:
4797- case Intrinsic::x86_avx2_phadd_w:
4798- case Intrinsic::x86_avx2_phadd_sw:
47994817 case Intrinsic::x86_sse3_hsub_ps:
48004818 case Intrinsic::x86_sse3_hsub_pd:
4801- case Intrinsic::x86_ssse3_phsub_d:
4802- case Intrinsic::x86_ssse3_phsub_d_128:
4803- case Intrinsic::x86_ssse3_phsub_w:
4804- case Intrinsic::x86_ssse3_phsub_w_128:
4805- case Intrinsic::x86_ssse3_phsub_sw:
4806- case Intrinsic::x86_ssse3_phsub_sw_128:
48074819 case Intrinsic::x86_avx_hsub_pd_256:
4808- case Intrinsic::x86_avx_hsub_ps_256:
4809- case Intrinsic::x86_avx2_phsub_d:
4810- case Intrinsic::x86_avx2_phsub_w:
4811- case Intrinsic::x86_avx2_phsub_sw: {
4812- handleAVXHorizontalAddSubIntrinsic (I);
4820+ case Intrinsic::x86_avx_hsub_ps_256: {
4821+ handlePairwiseShadowOrIntrinsic (I);
48134822 break ;
48144823 }
48154824
0 commit comments