Skip to content

Commit cf00284

Browse files
authored
Revert "[msan] Improve packed multiply-add instrumentation" (#153343)
Reverts #152941 Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843
1 parent 2edee0b commit cf00284

File tree

8 files changed

+103
-280
lines changed

8 files changed

+103
-280
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 19 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36413641
setOriginForNaryOp(I);
36423642
}
36433643

3644-
// Get an MMX-sized (64-bit) vector type, or optionally, other sized
3645-
// vectors.
3646-
Type *getMMXVectorTy(unsigned EltSizeInBits,
3647-
unsigned X86_MMXSizeInBits = 64) {
3644+
// Get an MMX-sized vector type.
3645+
Type *getMMXVectorTy(unsigned EltSizeInBits) {
3646+
const unsigned X86_MMXSizeInBits = 64;
36483647
assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
36493648
"Illegal MMX vector element size");
36503649
return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38443843
setOriginForNaryOp(I);
38453844
}
38463845

3847-
// Instrument multiply-add intrinsics.
3848-
//
3849-
// e.g., Two operands:
3850-
// <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
3851-
// <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
3852-
//
3853-
// Three operands are not implemented yet:
3854-
// <4 x i32> @llvm.x86.avx512.vpdpbusd.128
3855-
// (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
3856-
// (the result of multiply-add'ing %a and %b is accumulated with %s)
3857-
void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
3858-
unsigned EltSizeInBits = 0) {
3846+
// Instrument multiply-add intrinsic.
3847+
void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3848+
unsigned MMXEltSizeInBits = 0) {
3849+
Type *ResTy =
3850+
MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
38593851
IRBuilder<> IRB(&I);
3860-
3861-
[[maybe_unused]] FixedVectorType *ReturnType =
3862-
cast<FixedVectorType>(I.getType());
3863-
assert(isa<FixedVectorType>(ReturnType));
3864-
3865-
assert(I.arg_size() == 2);
3866-
3867-
// Vectors A and B, and shadows
3868-
Value *Va = I.getOperand(0);
3869-
Value *Vb = I.getOperand(1);
3870-
3871-
Value *Sa = getShadow(&I, 0);
3872-
Value *Sb = getShadow(&I, 1);
3873-
3874-
FixedVectorType *ParamType =
3875-
cast<FixedVectorType>(I.getArgOperand(0)->getType());
3876-
assert(ParamType == I.getArgOperand(1)->getType());
3877-
3878-
assert(ParamType->getPrimitiveSizeInBits() ==
3879-
ReturnType->getPrimitiveSizeInBits());
3880-
3881-
// Step 1: instrument multiplication of corresponding vector elements
3882-
if (EltSizeInBits) {
3883-
ParamType = cast<FixedVectorType>(
3884-
getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
3885-
3886-
Va = IRB.CreateBitCast(Va, ParamType);
3887-
Vb = IRB.CreateBitCast(Vb, ParamType);
3888-
3889-
Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
3890-
Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
3891-
} else {
3892-
assert(ParamType->getNumElements() ==
3893-
ReturnType->getNumElements() * ReductionFactor);
3894-
}
3895-
3896-
Value *Sab = IRB.CreateOr(Sa, Sb);
3897-
3898-
// Multiplying an uninitialized / element by zero results in an initialized
3899-
// element.
3900-
Value *Zero = Constant::getNullValue(Va->getType());
3901-
Value *VaNotZero = IRB.CreateICmpNE(Va, Zero);
3902-
Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero);
3903-
Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero);
3904-
3905-
// After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have
3906-
// <8 x i32> %ab, but we cheated and ended up with <8 x i16>.
3907-
Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType()));
3908-
3909-
// Step 2: instrument horizontal add
3910-
// e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2)
3911-
// <16 x i8> into <4 x i8> (reduction factor == 4)
3912-
Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr);
3913-
3914-
// Extend to <4 x i32>.
3915-
// For MMX, cast it back to <1 x i64>.
3916-
OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
3917-
3918-
setShadow(&I, OutShadow);
3852+
auto *Shadow0 = getShadow(&I, 0);
3853+
auto *Shadow1 = getShadow(&I, 1);
3854+
Value *S = IRB.CreateOr(Shadow0, Shadow1);
3855+
S = IRB.CreateBitCast(S, ResTy);
3856+
S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3857+
ResTy);
3858+
S = IRB.CreateBitCast(S, getShadowTy(&I));
3859+
setShadow(&I, S);
39193860
setOriginForNaryOp(I);
39203861
}
39213862

@@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
54505391
handleVectorSadIntrinsic(I);
54515392
break;
54525393

5453-
// Multiply and Add Packed Words
5454-
// < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
5455-
// < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
5456-
5457-
// Multiply and Add Packed Signed and Unsigned Bytes
5458-
// < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
5459-
// <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
54605394
case Intrinsic::x86_sse2_pmadd_wd:
54615395
case Intrinsic::x86_avx2_pmadd_wd:
54625396
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
54635397
case Intrinsic::x86_avx2_pmadd_ub_sw:
5464-
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
5398+
handleVectorPmaddIntrinsic(I);
54655399
break;
54665400

5467-
// <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
54685401
case Intrinsic::x86_ssse3_pmadd_ub_sw:
5469-
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
5402+
handleVectorPmaddIntrinsic(I, 8);
54705403
break;
54715404

5472-
// <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
54735405
case Intrinsic::x86_mmx_pmadd_wd:
5474-
handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
5406+
handleVectorPmaddIntrinsic(I, 16);
54755407
break;
54765408

54775409
case Intrinsic::x86_sse_cmp_ss:

llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
141141
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
142142
; CHECK-NEXT: call void @llvm.donothing()
143143
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
144-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
145-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
146-
; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
147-
; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
148-
; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]]
149-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
150-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
151-
; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]]
152-
; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32>
153-
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
144+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
145+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
146+
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
147+
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
154148
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
155149
; CHECK-NEXT: ret <8 x i32> [[RES]]
156150
;
@@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
684678
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
685679
; CHECK-NEXT: call void @llvm.donothing()
686680
; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
687-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
688-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
689-
; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
690-
; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8>
691-
; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]]
692-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
693-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
694-
; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]]
695-
; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
696-
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
681+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
682+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
683+
; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
684+
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
697685
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
698686
; CHECK-NEXT: ret <16 x i16> [[RES]]
699687
;
@@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
719707
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
720708
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
721709
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
722-
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
723-
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
724-
; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
725-
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
726-
; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]]
727-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
728-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
729-
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]]
730-
; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
731-
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
710+
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
711+
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
712+
; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
713+
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
732714
; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
733715
; CHECK-NEXT: ret <16 x i16> [[RES]]
734716
;

llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
16871687
; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
16881688
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
16891689
; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1690-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16>
1691-
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16>
1692-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
1693-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
1694-
; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]]
1695-
; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
1696-
; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
1697-
; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]]
1698-
; CHECK-NEXT: [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16>
1699-
; CHECK-NEXT: [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]]
1700-
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
1701-
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
1702-
; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]]
1703-
; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32
1704-
; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64
1705-
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64>
1706-
; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1690+
; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1691+
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1692+
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
1693+
; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
1694+
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1695+
; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1696+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
17071697
; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
1708-
; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32>
1709-
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
1710-
; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64>
1698+
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
1699+
; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
17111700
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
17121701
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
17131702
; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
@@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
33263315
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
33273316
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
33283317
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3329-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8>
3330-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8>
3331-
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8>
3332-
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
3333-
; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]]
3334-
; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer
3335-
; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer
3336-
; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]]
3337-
; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8>
3338-
; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]]
3339-
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
3340-
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
3341-
; CHECK-NEXT: [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]]
3342-
; CHECK-NEXT: [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32
3343-
; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP29]] to i64
3344-
; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64>
3345-
; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
3346-
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8>
3347-
; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8>
3348-
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64>
3349-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64>
3318+
; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
3319+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3320+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
3321+
; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
3322+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3323+
; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
3324+
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
3325+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
3326+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
3327+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
33503328
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
33513329
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
33523330
; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8

llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
763763
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
764764
; CHECK-NEXT: call void @llvm.donothing()
765765
; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
766-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer
767-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer
768-
; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]]
769-
; CHECK-NEXT: [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16>
770-
; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]]
771-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
772-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
773-
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]]
774-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
775-
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]])
766+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
767+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
768+
; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
769+
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
776770
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
777771
; CHECK-NEXT: ret <4 x i32> [[RES]]
778772
;

0 commit comments

Comments
 (0)