@@ -2608,38 +2608,79 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26082608 // / e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
26092609 // / <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
26102610 // /
2611- // / TODO: adapt this function to handle horizontal add/sub?
2612- void handlePairwiseShadowOrIntrinsic (IntrinsicInst &I) {
2611+ // / Optionally, reinterpret the parameters to have elements of a specified
2612+ // / width. For example:
2613+ // / @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]])
2614+ // / conceptually operates on
2615+ // / (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
2616+ // / and can be handled with ReinterpretElemWidth == 16.
2617+ void
2618+ handlePairwiseShadowOrIntrinsic (IntrinsicInst &I,
2619+ std::optional<int > ReinterpretElemWidth) {
26132620 assert (I.arg_size () == 1 || I.arg_size () == 2 );
26142621
26152622 assert (I.getType ()->isVectorTy ());
26162623 assert (I.getArgOperand (0 )->getType ()->isVectorTy ());
26172624
26182625 FixedVectorType *ParamType =
26192626 cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
2620- if (I.arg_size () == 2 )
2627+ if (I.arg_size () == 2 ) {
2628+ assert (I.getArgOperand (1 )->getType ()->isVectorTy ());
26212629 assert (ParamType == cast<FixedVectorType>(I.getArgOperand (1 )->getType ()));
2630+ }
2631+
26222632 [[maybe_unused]] FixedVectorType *ReturnType =
26232633 cast<FixedVectorType>(I.getType ());
26242634 assert (ParamType->getNumElements () * I.arg_size () ==
26252635 2 * ReturnType->getNumElements ());
26262636
26272637 IRBuilder<> IRB (&I);
2628- unsigned Width = ParamType->getNumElements () * I.arg_size ();
2638+
2639+ unsigned TotalNumElems = ParamType->getNumElements () * I.arg_size ();
2640+ FixedVectorType *ReinterpretShadowTy = nullptr ;
2641+ if (ReinterpretElemWidth.has_value ()) {
2642+ assert (ParamType->getPrimitiveSizeInBits () %
2643+ ReinterpretElemWidth.value () ==
2644+ 0 );
2645+ ReinterpretShadowTy = FixedVectorType::get (
2646+ IRB.getIntNTy (ReinterpretElemWidth.value ()),
2647+ ParamType->getPrimitiveSizeInBits () / ReinterpretElemWidth.value ());
2648+ TotalNumElems = ReinterpretShadowTy->getNumElements () * I.arg_size ();
2649+ }
26292650
26302651 // Horizontal OR of shadow
26312652 SmallVector<int , 8 > EvenMask;
26322653 SmallVector<int , 8 > OddMask;
2633- for (unsigned X = 0 ; X < Width ; X += 2 ) {
2654+ for (unsigned X = 0 ; X + 1 < TotalNumElems ; X += 2 ) {
26342655 EvenMask.push_back (X);
26352656 OddMask.push_back (X + 1 );
26362657 }
26372658
26382659 Value *FirstArgShadow = getShadow (&I, 0 );
2660+ if (ReinterpretShadowTy)
2661+ FirstArgShadow = IRB.CreateBitCast (FirstArgShadow, ReinterpretShadowTy);
2662+
2663+ // If we had two parameters each with an odd number of elements, the total
2664+ // number of elements is even, but we have never seen this in extant
2665+ // instruction sets, so we enforce that each parameter must have an even
2666+ // number of elements.
2667+ assert (
2668+ (cast<FixedVectorType>(FirstArgShadow->getType ())->getNumElements ()) %
2669+ 2 ==
2670+ 0 );
2671+
26392672 Value *EvenShadow;
26402673 Value *OddShadow;
26412674 if (I.arg_size () == 2 ) {
26422675 Value *SecondArgShadow = getShadow (&I, 1 );
2676+ if (ReinterpretShadowTy)
2677+ SecondArgShadow =
2678+ IRB.CreateBitCast (SecondArgShadow, ReinterpretShadowTy);
2679+ assert ((cast<FixedVectorType>(SecondArgShadow->getType ())
2680+ ->getNumElements ()) %
2681+ 2 ==
2682+ 0 );
2683+
26432684 EvenShadow =
26442685 IRB.CreateShuffleVector (FirstArgShadow, SecondArgShadow, EvenMask);
26452686 OddShadow =
@@ -2653,6 +2694,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
26532694 OrShadow = CreateShadowCast (IRB, OrShadow, getShadowTy (&I));
26542695
26552696 setShadow (&I, OrShadow);
2697+
26562698 setOriginForNaryOp (I);
26572699 }
26582700
@@ -4156,87 +4198,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
41564198 setOriginForNaryOp (I);
41574199 }
41584200
4159- void handleAVXHorizontalAddSubIntrinsic (IntrinsicInst &I) {
4160- // Approximation only:
4161- // output = horizontal_add/sub(A, B)
4162- // => shadow[output] = horizontal_add(shadow[A], shadow[B])
4163- //
4164- // We always use horizontal add instead of subtract, because subtracting
4165- // a fully uninitialized shadow would result in a fully initialized shadow.
4166- //
4167- // - If we add two adjacent zero (initialized) shadow values, the
4168- // result always be zero i.e., no false positives.
4169- // - If we add two shadows, one of which is uninitialized, the
4170- // result will always be non-zero i.e., no false negatives.
4171- // - However, we can have false negatives if we do an addition that wraps
4172- // to zero; we consider this an acceptable tradeoff for performance.
4173- //
4174- // To make shadow propagation precise, we want the equivalent of
4175- // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
4176-
4177- Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID ();
4178-
4179- switch (I.getIntrinsicID ()) {
4180- case Intrinsic::x86_sse3_hsub_ps:
4181- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
4182- break ;
4183-
4184- case Intrinsic::x86_sse3_hsub_pd:
4185- shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
4186- break ;
4187-
4188- case Intrinsic::x86_ssse3_phsub_d:
4189- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
4190- break ;
4191-
4192- case Intrinsic::x86_ssse3_phsub_d_128:
4193- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
4194- break ;
4195-
4196- case Intrinsic::x86_ssse3_phsub_w:
4197- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
4198- break ;
4199-
4200- case Intrinsic::x86_ssse3_phsub_w_128:
4201- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
4202- break ;
4203-
4204- case Intrinsic::x86_ssse3_phsub_sw:
4205- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
4206- break ;
4207-
4208- case Intrinsic::x86_ssse3_phsub_sw_128:
4209- shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
4210- break ;
4211-
4212- case Intrinsic::x86_avx_hsub_pd_256:
4213- shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
4214- break ;
4215-
4216- case Intrinsic::x86_avx_hsub_ps_256:
4217- shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
4218- break ;
4219-
4220- case Intrinsic::x86_avx2_phsub_d:
4221- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
4222- break ;
4223-
4224- case Intrinsic::x86_avx2_phsub_w:
4225- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
4226- break ;
4227-
4228- case Intrinsic::x86_avx2_phsub_sw:
4229- shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
4230- break ;
4231-
4232- default :
4233- break ;
4234- }
4235-
4236- return handleIntrinsicByApplyingToShadow (I, shadowIntrinsicID,
4237- /* trailingVerbatimArgs*/ 0 );
4238- }
4239-
42404201 // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
42414202 // / and vst{2,3,4}lane).
42424203 // /
@@ -4783,33 +4744,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47834744 handleVtestIntrinsic (I);
47844745 break ;
47854746
4786- case Intrinsic::x86_sse3_hadd_ps:
4787- case Intrinsic::x86_sse3_hadd_pd:
4788- case Intrinsic::x86_ssse3_phadd_d:
4789- case Intrinsic::x86_ssse3_phadd_d_128:
4747+ // Packed Horizontal Add/Subtract
47904748 case Intrinsic::x86_ssse3_phadd_w:
47914749 case Intrinsic::x86_ssse3_phadd_w_128:
4750+ case Intrinsic::x86_avx2_phadd_w:
4751+ case Intrinsic::x86_ssse3_phsub_w:
4752+ case Intrinsic::x86_ssse3_phsub_w_128:
4753+ case Intrinsic::x86_avx2_phsub_w: {
4754+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4755+ break ;
4756+ }
4757+
4758+ // Packed Horizontal Add/Subtract
4759+ case Intrinsic::x86_ssse3_phadd_d:
4760+ case Intrinsic::x86_ssse3_phadd_d_128:
4761+ case Intrinsic::x86_avx2_phadd_d:
4762+ case Intrinsic::x86_ssse3_phsub_d:
4763+ case Intrinsic::x86_ssse3_phsub_d_128:
4764+ case Intrinsic::x86_avx2_phsub_d: {
4765+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 32 );
4766+ break ;
4767+ }
4768+
4769+ // Packed Horizontal Add/Subtract and Saturate
47924770 case Intrinsic::x86_ssse3_phadd_sw:
47934771 case Intrinsic::x86_ssse3_phadd_sw_128:
4772+ case Intrinsic::x86_avx2_phadd_sw:
4773+ case Intrinsic::x86_ssse3_phsub_sw:
4774+ case Intrinsic::x86_ssse3_phsub_sw_128:
4775+ case Intrinsic::x86_avx2_phsub_sw: {
4776+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ 16 );
4777+ break ;
4778+ }
4779+
4780+ // Packed Single/Double Precision Floating-Point Horizontal Add
4781+ case Intrinsic::x86_sse3_hadd_ps:
4782+ case Intrinsic::x86_sse3_hadd_pd:
47944783 case Intrinsic::x86_avx_hadd_pd_256:
47954784 case Intrinsic::x86_avx_hadd_ps_256:
4796- case Intrinsic::x86_avx2_phadd_d:
4797- case Intrinsic::x86_avx2_phadd_w:
4798- case Intrinsic::x86_avx2_phadd_sw:
47994785 case Intrinsic::x86_sse3_hsub_ps:
48004786 case Intrinsic::x86_sse3_hsub_pd:
4801- case Intrinsic::x86_ssse3_phsub_d:
4802- case Intrinsic::x86_ssse3_phsub_d_128:
4803- case Intrinsic::x86_ssse3_phsub_w:
4804- case Intrinsic::x86_ssse3_phsub_w_128:
4805- case Intrinsic::x86_ssse3_phsub_sw:
4806- case Intrinsic::x86_ssse3_phsub_sw_128:
48074787 case Intrinsic::x86_avx_hsub_pd_256:
4808- case Intrinsic::x86_avx_hsub_ps_256:
4809- case Intrinsic::x86_avx2_phsub_d:
4810- case Intrinsic::x86_avx2_phsub_w:
4811- case Intrinsic::x86_avx2_phsub_sw: {
4812- handleAVXHorizontalAddSubIntrinsic (I);
4788+ case Intrinsic::x86_avx_hsub_ps_256: {
4789+ handlePairwiseShadowOrIntrinsic (I, /* ReinterpretElemWidth=*/ std::nullopt );
48134790 break ;
48144791 }
48154792
@@ -4869,7 +4846,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
48694846 // Add Long Pairwise
48704847 case Intrinsic::aarch64_neon_saddlp:
48714848 case Intrinsic::aarch64_neon_uaddlp: {
4872- handlePairwiseShadowOrIntrinsic (I);
4849+ handlePairwiseShadowOrIntrinsic (I, std:: nullopt );
48734850 break ;
48744851 }
48754852
0 commit comments