@@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3641
3641
setOriginForNaryOp (I);
3642
3642
}
3643
3643
3644
- // Get an MMX-sized vector type.
3645
- Type *getMMXVectorTy (unsigned EltSizeInBits) {
3646
- const unsigned X86_MMXSizeInBits = 64 ;
3644
+ // Get an MMX-sized (64-bit) vector type, or optionally, other sized
3645
+ // vectors.
3646
+ Type *getMMXVectorTy (unsigned EltSizeInBits,
3647
+ unsigned X86_MMXSizeInBits = 64 ) {
3647
3648
assert (EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3648
3649
" Illegal MMX vector element size" );
3649
3650
return FixedVectorType::get (IntegerType::get (*MS.C , EltSizeInBits),
@@ -3843,20 +3844,109 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3843
3844
setOriginForNaryOp (I);
3844
3845
}
3845
3846
3846
- // Instrument multiply-add intrinsic.
3847
- void handleVectorPmaddIntrinsic (IntrinsicInst &I,
3848
- unsigned MMXEltSizeInBits = 0 ) {
3849
- Type *ResTy =
3850
- MMXEltSizeInBits ? getMMXVectorTy (MMXEltSizeInBits * 2 ) : I.getType ();
3847
+ // Instrument multiply-add intrinsics.
3848
+ //
3849
+ // e.g., Two operands:
3850
+ // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
3851
+ //
3852
+ // Two operands which require an EltSizeInBits override:
3853
+ // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
3854
+ //
3855
+ // Three operands are not implemented yet:
3856
+ // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
3857
+ // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
3858
+ // (the result of multiply-add'ing %a and %b is accumulated with %s)
3859
+ void handleVectorPmaddIntrinsic (IntrinsicInst &I, unsigned ReductionFactor,
3860
+ unsigned EltSizeInBits = 0 ) {
3851
3861
IRBuilder<> IRB (&I);
3852
- auto *Shadow0 = getShadow (&I, 0 );
3853
- auto *Shadow1 = getShadow (&I, 1 );
3854
- Value *S = IRB.CreateOr (Shadow0, Shadow1);
3855
- S = IRB.CreateBitCast (S, ResTy);
3856
- S = IRB.CreateSExt (IRB.CreateICmpNE (S, Constant::getNullValue (ResTy)),
3857
- ResTy);
3858
- S = IRB.CreateBitCast (S, getShadowTy (&I));
3859
- setShadow (&I, S);
3862
+
3863
+ [[maybe_unused]] FixedVectorType *ReturnType =
3864
+ cast<FixedVectorType>(I.getType ());
3865
+ assert (isa<FixedVectorType>(ReturnType));
3866
+
3867
+ assert (I.arg_size () == 2 );
3868
+
3869
+ // Vectors A and B, and shadows
3870
+ Value *Va = I.getOperand (0 );
3871
+ Value *Vb = I.getOperand (1 );
3872
+
3873
+ Value *Sa = getShadow (&I, 0 );
3874
+ Value *Sb = getShadow (&I, 1 );
3875
+
3876
+ FixedVectorType *ParamType =
3877
+ cast<FixedVectorType>(I.getArgOperand (0 )->getType ());
3878
+ assert (ParamType == I.getArgOperand (1 )->getType ());
3879
+
3880
+ assert (ParamType->getPrimitiveSizeInBits () ==
3881
+ ReturnType->getPrimitiveSizeInBits ());
3882
+
3883
+ FixedVectorType *ImplicitReturnType = ReturnType;
3884
+ // Step 1: instrument multiplication of corresponding vector elements
3885
+ if (EltSizeInBits) {
3886
+ ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy (
3887
+ EltSizeInBits * 2 , ParamType->getPrimitiveSizeInBits ()));
3888
+ ParamType = cast<FixedVectorType>(
3889
+ getMMXVectorTy (EltSizeInBits, ParamType->getPrimitiveSizeInBits ()));
3890
+
3891
+ Va = IRB.CreateBitCast (Va, ParamType);
3892
+ Vb = IRB.CreateBitCast (Vb, ParamType);
3893
+
3894
+ Sa = IRB.CreateBitCast (Sa, getShadowTy (ParamType));
3895
+ Sb = IRB.CreateBitCast (Sb, getShadowTy (ParamType));
3896
+ } else {
3897
+ assert (ParamType->getNumElements () ==
3898
+ ReturnType->getNumElements () * ReductionFactor);
3899
+ }
3900
+
3901
+ // Multiplying an *initialized* zero by an uninitialized element results in
3902
+ // an initialized zero element.
3903
+ //
3904
+ // This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
3905
+ // results in an unpoisoned value. We can therefore adapt the visitAnd()
3906
+ // instrumentation:
3907
+ // OutShadow = (SaNonZero & SbNonZero)
3908
+ // | (VaNonZero & SbNonZero)
3909
+ // | (SaNonZero & VbNonZero)
3910
+ // where non-zero is checked on a per-element basis (not per bit).
3911
+ Value *SZero = Constant::getNullValue (Va->getType ());
3912
+ Value *VZero = Constant::getNullValue (Sa->getType ());
3913
+ Value *SaNonZero = IRB.CreateICmpNE (Sa, SZero);
3914
+ Value *SbNonZero = IRB.CreateICmpNE (Sb, SZero);
3915
+ Value *VaNonZero = IRB.CreateICmpNE (Va, VZero);
3916
+ Value *VbNonZero = IRB.CreateICmpNE (Vb, VZero);
3917
+
3918
+ Value *SaAndSbNonZero = IRB.CreateAnd (SaNonZero, SbNonZero);
3919
+ Value *VaAndSbNonZero = IRB.CreateAnd (VaNonZero, SbNonZero);
3920
+ Value *SaAndVbNonZero = IRB.CreateAnd (SaNonZero, VbNonZero);
3921
+
3922
+ // Each element of the vector is represented by a single bit (poisoned or
3923
+ // not) e.g., <8 x i1>.
3924
+ Value *And = IRB.CreateOr ({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero});
3925
+
3926
+ // Extend <8 x i1> to <8 x i16>.
3927
+ // (The real pmadd intrinsic would have computed intermediate values of
3928
+ // <8 x i32>, but that is irrelevant for our shadow purposes because we
3929
+ // consider each element to be either fully initialized or fully
3930
+ // uninitialized.)
3931
+ And = IRB.CreateSExt (And, Sa->getType ());
3932
+
3933
+ // Step 2: instrument horizontal add
3934
+ // We don't need bit-precise horizontalReduce because we only want to check
3935
+ // if each pair of elements is fully zero.
3936
+ // Cast to <4 x i32>.
3937
+ Value *Horizontal = IRB.CreateBitCast (And, ImplicitReturnType);
3938
+
3939
+ // Compute <4 x i1>, then extend back to <4 x i32>.
3940
+ Value *OutShadow = IRB.CreateSExt (
3941
+ IRB.CreateICmpNE (Horizontal,
3942
+ Constant::getNullValue (Horizontal->getType ())),
3943
+ ImplicitReturnType);
3944
+
3945
+ // For MMX, cast it back to the required fake return type (<1 x i64>).
3946
+ if (EltSizeInBits)
3947
+ OutShadow = CreateShadowCast (IRB, OutShadow, getShadowTy (&I));
3948
+
3949
+ setShadow (&I, OutShadow);
3860
3950
setOriginForNaryOp (I);
3861
3951
}
3862
3952
@@ -5391,19 +5481,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
5391
5481
handleVectorSadIntrinsic (I);
5392
5482
break ;
5393
5483
5484
+ // Multiply and Add Packed Words
5485
+ // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
5486
+ // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
5487
+ //
5488
+ // Multiply and Add Packed Signed and Unsigned Bytes
5489
+ // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
5490
+ // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
5394
5491
case Intrinsic::x86_sse2_pmadd_wd:
5395
5492
case Intrinsic::x86_avx2_pmadd_wd:
5396
5493
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
5397
5494
case Intrinsic::x86_avx2_pmadd_ub_sw:
5398
- handleVectorPmaddIntrinsic (I);
5495
+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 );
5399
5496
break ;
5400
5497
5498
+ // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
5401
5499
case Intrinsic::x86_ssse3_pmadd_ub_sw:
5402
- handleVectorPmaddIntrinsic (I, 8 );
5500
+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 , /* EltSize= */ 8 );
5403
5501
break ;
5404
5502
5503
+ // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
5405
5504
case Intrinsic::x86_mmx_pmadd_wd:
5406
- handleVectorPmaddIntrinsic (I, 16 );
5505
+ handleVectorPmaddIntrinsic (I, /* ReductionFactor= */ 2 , /* EltSize= */ 16 );
5407
5506
break ;
5408
5507
5409
5508
case Intrinsic::x86_sse_cmp_ss:
0 commit comments