Skip to content

Commit 27e6df6

Browse files
authored
Merge branch 'main' into main
2 parents d8c33dd + 2b75ff1 commit 27e6df6

File tree

10 files changed

+385
-110
lines changed

10 files changed

+385
-110
lines changed

compiler-rt/lib/msan/tests/msan_test.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4271,14 +4271,39 @@ TEST(VectorSadTest, sse2_psad_bw) {
42714271
}
42724272

42734273
TEST(VectorMaddTest, mmx_pmadd_wd) {
4274-
V4x16 a = {Poisoned<U2>(), 1, 2, 3};
4274+
V4x16 a = {Poisoned<U2>(0), 1, 2, 3};
42754275
V4x16 b = {100, 101, 102, 103};
42764276
V2x32 c = _mm_madd_pi16(a, b);
4277+
// Multiply step:
4278+
// {Poison * 100, 1 * 101, 2 * 102, 3 * 103}
4279+
// == {Poison, 1 * 101, 2 * 102, 3 * 103}
4280+
// Notice that for the poisoned value, we ignored the concrete zero value.
4281+
//
4282+
// Horizontal add step:
4283+
// {Poison + 1 * 101, 2 * 102 + 3 * 103}
4284+
// == {Poison, 2 * 102 + 3 * 103}
42774285

42784286
EXPECT_POISONED(c[0]);
42794287
EXPECT_NOT_POISONED(c[1]);
42804288

42814289
EXPECT_EQ((unsigned)(2 * 102 + 3 * 103), c[1]);
4290+
4291+
V4x16 d = {Poisoned<U2>(0), 1, 0, 3};
4292+
V4x16 e = {100, 101, Poisoned<U2>(102), 103};
4293+
V2x32 f = _mm_madd_pi16(d, e);
4294+
// Multiply step:
4295+
// {Poison * 100, 1 * 101, 0 * Poison, 3 * 103}
4296+
// == {Poison, 1 * 101, 0 , 3 * 103}
4297+
// Notice that 0 * Poison == 0.
4298+
//
4299+
// Horizontal add step:
4300+
// {Poison + 1 * 101, 0 + 3 * 103}
4301+
// == {Poison, 3 * 103}
4302+
4303+
EXPECT_POISONED(f[0]);
4304+
EXPECT_NOT_POISONED(f[1]);
4305+
4306+
EXPECT_EQ((unsigned)(3 * 103), f[1]);
42824307
}
42834308

42844309
TEST(VectorCmpTest, mm_cmpneq_ps) {

compiler-rt/test/msan/dtls_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
1212
// Reports use-of-uninitialized-value, not analyzed
1313
XFAIL: target={{.*netbsd.*}}
14-
XFAIL: aarch64-target-arch
14+
UNSUPPORTED: aarch64-target-arch
1515
1616
*/
1717

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 118 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36413641
setOriginForNaryOp(I);
36423642
}
36433643

3644-
// Get an MMX-sized vector type.
3645-
Type *getMMXVectorTy(unsigned EltSizeInBits) {
3646-
const unsigned X86_MMXSizeInBits = 64;
3644+
// Get an MMX-sized (64-bit) vector type, or optionally, other sized
3645+
// vectors.
3646+
Type *getMMXVectorTy(unsigned EltSizeInBits,
3647+
unsigned X86_MMXSizeInBits = 64) {
36473648
assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
36483649
"Illegal MMX vector element size");
36493650
return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3843,20 +3844,109 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38433844
setOriginForNaryOp(I);
38443845
}
38453846

3846-
// Instrument multiply-add intrinsic.
3847-
void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3848-
unsigned MMXEltSizeInBits = 0) {
3849-
Type *ResTy =
3850-
MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
3847+
// Instrument multiply-add intrinsics.
3848+
//
3849+
// e.g., Two operands:
3850+
// <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
3851+
//
3852+
// Two operands which require an EltSizeInBits override:
3853+
// <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
3854+
//
3855+
// Three operands are not implemented yet:
3856+
// <4 x i32> @llvm.x86.avx512.vpdpbusd.128
3857+
// (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
3858+
// (the result of multiply-add'ing %a and %b is accumulated with %s)
3859+
void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
3860+
unsigned EltSizeInBits = 0) {
38513861
IRBuilder<> IRB(&I);
3852-
auto *Shadow0 = getShadow(&I, 0);
3853-
auto *Shadow1 = getShadow(&I, 1);
3854-
Value *S = IRB.CreateOr(Shadow0, Shadow1);
3855-
S = IRB.CreateBitCast(S, ResTy);
3856-
S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3857-
ResTy);
3858-
S = IRB.CreateBitCast(S, getShadowTy(&I));
3859-
setShadow(&I, S);
3862+
3863+
[[maybe_unused]] FixedVectorType *ReturnType =
3864+
cast<FixedVectorType>(I.getType());
3865+
assert(isa<FixedVectorType>(ReturnType));
3866+
3867+
assert(I.arg_size() == 2);
3868+
3869+
// Vectors A and B, and shadows
3870+
Value *Va = I.getOperand(0);
3871+
Value *Vb = I.getOperand(1);
3872+
3873+
Value *Sa = getShadow(&I, 0);
3874+
Value *Sb = getShadow(&I, 1);
3875+
3876+
FixedVectorType *ParamType =
3877+
cast<FixedVectorType>(I.getArgOperand(0)->getType());
3878+
assert(ParamType == I.getArgOperand(1)->getType());
3879+
3880+
assert(ParamType->getPrimitiveSizeInBits() ==
3881+
ReturnType->getPrimitiveSizeInBits());
3882+
3883+
FixedVectorType *ImplicitReturnType = ReturnType;
3884+
// Step 1: instrument multiplication of corresponding vector elements
3885+
if (EltSizeInBits) {
3886+
ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy(
3887+
EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits()));
3888+
ParamType = cast<FixedVectorType>(
3889+
getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
3890+
3891+
Va = IRB.CreateBitCast(Va, ParamType);
3892+
Vb = IRB.CreateBitCast(Vb, ParamType);
3893+
3894+
Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
3895+
Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
3896+
} else {
3897+
assert(ParamType->getNumElements() ==
3898+
ReturnType->getNumElements() * ReductionFactor);
3899+
}
3900+
3901+
// Multiplying an *initialized* zero by an uninitialized element results in
3902+
// an initialized zero element.
3903+
//
3904+
// This is analogous to bitwise AND, where "AND" of 0 and a poisoned value
3905+
// results in an unpoisoned value. We can therefore adapt the visitAnd()
3906+
// instrumentation:
3907+
// OutShadow = (SaNonZero & SbNonZero)
3908+
// | (VaNonZero & SbNonZero)
3909+
// | (SaNonZero & VbNonZero)
3910+
// where non-zero is checked on a per-element basis (not per bit).
3911+
Value *SZero = Constant::getNullValue(Va->getType());
3912+
Value *VZero = Constant::getNullValue(Sa->getType());
3913+
Value *SaNonZero = IRB.CreateICmpNE(Sa, SZero);
3914+
Value *SbNonZero = IRB.CreateICmpNE(Sb, SZero);
3915+
Value *VaNonZero = IRB.CreateICmpNE(Va, VZero);
3916+
Value *VbNonZero = IRB.CreateICmpNE(Vb, VZero);
3917+
3918+
Value *SaAndSbNonZero = IRB.CreateAnd(SaNonZero, SbNonZero);
3919+
Value *VaAndSbNonZero = IRB.CreateAnd(VaNonZero, SbNonZero);
3920+
Value *SaAndVbNonZero = IRB.CreateAnd(SaNonZero, VbNonZero);
3921+
3922+
// Each element of the vector is represented by a single bit (poisoned or
3923+
// not) e.g., <8 x i1>.
3924+
Value *And = IRB.CreateOr({SaAndSbNonZero, VaAndSbNonZero, SaAndVbNonZero});
3925+
3926+
// Extend <8 x i1> to <8 x i16>.
3927+
// (The real pmadd intrinsic would have computed intermediate values of
3928+
// <8 x i32>, but that is irrelevant for our shadow purposes because we
3929+
// consider each element to be either fully initialized or fully
3930+
// uninitialized.)
3931+
And = IRB.CreateSExt(And, Sa->getType());
3932+
3933+
// Step 2: instrument horizontal add
3934+
// We don't need bit-precise horizontalReduce because we only want to check
3935+
// if each pair of elements is fully zero.
3936+
// Cast to <4 x i32>.
3937+
Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType);
3938+
3939+
// Compute <4 x i1>, then extend back to <4 x i32>.
3940+
Value *OutShadow = IRB.CreateSExt(
3941+
IRB.CreateICmpNE(Horizontal,
3942+
Constant::getNullValue(Horizontal->getType())),
3943+
ImplicitReturnType);
3944+
3945+
// For MMX, cast it back to the required fake return type (<1 x i64>).
3946+
if (EltSizeInBits)
3947+
OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
3948+
3949+
setShadow(&I, OutShadow);
38603950
setOriginForNaryOp(I);
38613951
}
38623952

@@ -5391,19 +5481,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
53915481
handleVectorSadIntrinsic(I);
53925482
break;
53935483

5484+
// Multiply and Add Packed Words
5485+
// < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
5486+
// < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
5487+
//
5488+
// Multiply and Add Packed Signed and Unsigned Bytes
5489+
// < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
5490+
// <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
53945491
case Intrinsic::x86_sse2_pmadd_wd:
53955492
case Intrinsic::x86_avx2_pmadd_wd:
53965493
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
53975494
case Intrinsic::x86_avx2_pmadd_ub_sw:
5398-
handleVectorPmaddIntrinsic(I);
5495+
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
53995496
break;
54005497

5498+
// <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
54015499
case Intrinsic::x86_ssse3_pmadd_ub_sw:
5402-
handleVectorPmaddIntrinsic(I, 8);
5500+
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
54035501
break;
54045502

5503+
// <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
54055504
case Intrinsic::x86_mmx_pmadd_wd:
5406-
handleVectorPmaddIntrinsic(I, 16);
5505+
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
54075506
break;
54085507

54095508
case Intrinsic::x86_sse_cmp_ss:

llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,20 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
140140
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
141141
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
142142
; CHECK-NEXT: call void @llvm.donothing()
143-
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
144-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
145-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
146-
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
147-
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
143+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer
144+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer
145+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
146+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
147+
; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
148+
; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP12]], [[TMP5]]
149+
; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[TMP4]], [[TMP13]]
150+
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP11]], [[TMP14]]
151+
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
152+
; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i16>
153+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i16> [[TMP7]] to <8 x i32>
154+
; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
155+
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
156+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
148157
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
149158
; CHECK-NEXT: ret <8 x i32> [[RES]]
150159
;
@@ -677,11 +686,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
677686
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
678687
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
679688
; CHECK-NEXT: call void @llvm.donothing()
680-
; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
681-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
682-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
683-
; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
684-
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
689+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[TMP1]], zeroinitializer
690+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer
691+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
692+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
693+
; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
694+
; CHECK-NEXT: [[TMP14:%.*]] = and <32 x i1> [[TMP12]], [[TMP5]]
695+
; CHECK-NEXT: [[TMP15:%.*]] = and <32 x i1> [[TMP4]], [[TMP13]]
696+
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP11]], [[TMP14]]
697+
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
698+
; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
699+
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP7]] to <16 x i16>
700+
; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
701+
; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
702+
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
685703
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
686704
; CHECK-NEXT: ret <16 x i16> [[RES]]
687705
;
@@ -706,11 +724,20 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
706724
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
707725
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
708726
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
709-
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
710-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
711-
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
712-
; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
713-
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
727+
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[_MSLD]], zeroinitializer
728+
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[TMP2]], zeroinitializer
729+
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
730+
; CHECK-NEXT: [[TMP18:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
731+
; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
732+
; CHECK-NEXT: [[TMP19:%.*]] = and <32 x i1> [[TMP17]], [[TMP10]]
733+
; CHECK-NEXT: [[TMP20:%.*]] = and <32 x i1> [[TMP9]], [[TMP18]]
734+
; CHECK-NEXT: [[TMP21:%.*]] = or <32 x i1> [[TMP16]], [[TMP19]]
735+
; CHECK-NEXT: [[TMP22:%.*]] = or <32 x i1> [[TMP21]], [[TMP20]]
736+
; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP22]] to <32 x i8>
737+
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <32 x i8> [[TMP12]] to <16 x i16>
738+
; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i16> [[TMP23]], zeroinitializer
739+
; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i16>
740+
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
714741
; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
715742
; CHECK-NEXT: ret <16 x i16> [[RES]]
716743
;

0 commit comments

Comments
 (0)