Skip to content

Commit 30d8f69

Browse files
authored
[msan][NFCI] Generalize handlePairwiseShadowOrIntrinsic to have shards (#167954)
This will allow fixing up the handling of AVX2 phadd/phsub instructions in a future patch, by setting Shards = 2. Currently, the extra functionality is not used.
1 parent f6004ae commit 30d8f69

File tree

1 file changed

+73
-34
lines changed

1 file changed

+73
-34
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,34 +2720,55 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
27202720
// of elements.
27212721
//
27222722
// For example, suppose we have:
2723-
// VectorA: <a1, a2, a3, a4, a5, a6>
2724-
// VectorB: <b1, b2, b3, b4, b5, b6>
2725-
// ReductionFactor: 3.
2723+
// VectorA: <a0, a1, a2, a3, a4, a5>
2724+
// VectorB: <b0, b1, b2, b3, b4, b5>
2725+
// ReductionFactor: 3
2726+
// Shards: 1
27262727
// The output would be:
2727-
// <a1|a2|a3, a4|a5|a6, b1|b2|b3, b4|b5|b6>
2728+
// <a0|a1|a2, a3|a4|a5, b0|b1|b2, b3|b4|b5>
2729+
//
2730+
// If we have:
2731+
// VectorA: <a0, a1, a2, a3, a4, a5, a6, a7>
2732+
// VectorB: <b0, b1, b2, b3, b4, b5, b6, b7>
2733+
// ReductionFactor: 2
2734+
// Shards: 2
2735+
// then a and be each have 2 "shards", resulting in the output being
2736+
// interleaved:
2737+
// <a0|a1, a2|a3, b0|b1, b2|b3, a4|a5, a6|a7, b4|b5, b6|b7>
27282738
//
27292739
// This is convenient for instrumenting horizontal add/sub.
27302740
// For bitwise OR on "vertical" pairs, see maybeHandleSimpleNomemIntrinsic().
27312741
Value *horizontalReduce(IntrinsicInst &I, unsigned ReductionFactor,
2732-
Value *VectorA, Value *VectorB) {
2742+
unsigned Shards, Value *VectorA, Value *VectorB) {
27332743
assert(isa<FixedVectorType>(VectorA->getType()));
2734-
unsigned TotalNumElems =
2744+
unsigned NumElems =
27352745
cast<FixedVectorType>(VectorA->getType())->getNumElements();
27362746

2747+
[[maybe_unused]] unsigned TotalNumElems = NumElems;
27372748
if (VectorB) {
27382749
assert(VectorA->getType() == VectorB->getType());
2739-
TotalNumElems = TotalNumElems * 2;
2750+
TotalNumElems *= 2;
27402751
}
27412752

2742-
assert(TotalNumElems % ReductionFactor == 0);
2753+
assert(NumElems % (ReductionFactor * Shards) == 0);
27432754

27442755
Value *Or = nullptr;
27452756

27462757
IRBuilder<> IRB(&I);
27472758
for (unsigned i = 0; i < ReductionFactor; i++) {
27482759
SmallVector<int, 16> Mask;
2749-
for (unsigned X = 0; X < TotalNumElems; X += ReductionFactor)
2750-
Mask.push_back(X + i);
2760+
2761+
for (unsigned j = 0; j < Shards; j++) {
2762+
unsigned Offset = NumElems / Shards * j;
2763+
2764+
for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2765+
Mask.push_back(Offset + X + i);
2766+
2767+
if (VectorB) {
2768+
for (unsigned X = 0; X < NumElems / Shards; X += ReductionFactor)
2769+
Mask.push_back(NumElems + Offset + X + i);
2770+
}
2771+
}
27512772

27522773
Value *Masked;
27532774
if (VectorB)
@@ -2769,7 +2790,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
27692790
///
27702791
/// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
27712792
/// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
2772-
void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) {
2793+
void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards) {
27732794
assert(I.arg_size() == 1 || I.arg_size() == 2);
27742795

27752796
assert(I.getType()->isVectorTy());
@@ -2792,8 +2813,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
27922813
if (I.arg_size() == 2)
27932814
SecondArgShadow = getShadow(&I, 1);
27942815

2795-
Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, FirstArgShadow,
2796-
SecondArgShadow);
2816+
Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
2817+
FirstArgShadow, SecondArgShadow);
27972818

27982819
OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I));
27992820

@@ -2808,7 +2829,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
28082829
/// conceptually operates on
28092830
/// (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]])
28102831
/// and can be handled with ReinterpretElemWidth == 16.
2811-
void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I,
2832+
void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, unsigned Shards,
28122833
int ReinterpretElemWidth) {
28132834
assert(I.arg_size() == 1 || I.arg_size() == 2);
28142835

@@ -2852,8 +2873,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
28522873
SecondArgShadow = IRB.CreateBitCast(SecondArgShadow, ReinterpretShadowTy);
28532874
}
28542875

2855-
Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, FirstArgShadow,
2856-
SecondArgShadow);
2876+
Value *OrShadow = horizontalReduce(I, /*ReductionFactor=*/2, Shards,
2877+
FirstArgShadow, SecondArgShadow);
28572878

28582879
OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I));
28592880

@@ -6036,48 +6057,66 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
60366057
// Packed Horizontal Add/Subtract
60376058
case Intrinsic::x86_ssse3_phadd_w:
60386059
case Intrinsic::x86_ssse3_phadd_w_128:
6039-
case Intrinsic::x86_avx2_phadd_w:
60406060
case Intrinsic::x86_ssse3_phsub_w:
60416061
case Intrinsic::x86_ssse3_phsub_w_128:
6042-
case Intrinsic::x86_avx2_phsub_w: {
6043-
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16);
6062+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6063+
/*ReinterpretElemWidth=*/16);
6064+
break;
6065+
6066+
case Intrinsic::x86_avx2_phadd_w:
6067+
case Intrinsic::x86_avx2_phsub_w:
6068+
// TODO: Shards = 2
6069+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6070+
/*ReinterpretElemWidth=*/16);
60446071
break;
6045-
}
60466072

60476073
// Packed Horizontal Add/Subtract
60486074
case Intrinsic::x86_ssse3_phadd_d:
60496075
case Intrinsic::x86_ssse3_phadd_d_128:
6050-
case Intrinsic::x86_avx2_phadd_d:
60516076
case Intrinsic::x86_ssse3_phsub_d:
60526077
case Intrinsic::x86_ssse3_phsub_d_128:
6053-
case Intrinsic::x86_avx2_phsub_d: {
6054-
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/32);
6078+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6079+
/*ReinterpretElemWidth=*/32);
6080+
break;
6081+
6082+
case Intrinsic::x86_avx2_phadd_d:
6083+
case Intrinsic::x86_avx2_phsub_d:
6084+
// TODO: Shards = 2
6085+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6086+
/*ReinterpretElemWidth=*/32);
60556087
break;
6056-
}
60576088

60586089
// Packed Horizontal Add/Subtract and Saturate
60596090
case Intrinsic::x86_ssse3_phadd_sw:
60606091
case Intrinsic::x86_ssse3_phadd_sw_128:
6061-
case Intrinsic::x86_avx2_phadd_sw:
60626092
case Intrinsic::x86_ssse3_phsub_sw:
60636093
case Intrinsic::x86_ssse3_phsub_sw_128:
6064-
case Intrinsic::x86_avx2_phsub_sw: {
6065-
handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16);
6094+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6095+
/*ReinterpretElemWidth=*/16);
6096+
break;
6097+
6098+
case Intrinsic::x86_avx2_phadd_sw:
6099+
case Intrinsic::x86_avx2_phsub_sw:
6100+
// TODO: Shards = 2
6101+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1,
6102+
/*ReinterpretElemWidth=*/16);
60666103
break;
6067-
}
60686104

60696105
// Packed Single/Double Precision Floating-Point Horizontal Add
60706106
case Intrinsic::x86_sse3_hadd_ps:
60716107
case Intrinsic::x86_sse3_hadd_pd:
6072-
case Intrinsic::x86_avx_hadd_pd_256:
6073-
case Intrinsic::x86_avx_hadd_ps_256:
60746108
case Intrinsic::x86_sse3_hsub_ps:
60756109
case Intrinsic::x86_sse3_hsub_pd:
6110+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
6111+
break;
6112+
6113+
case Intrinsic::x86_avx_hadd_pd_256:
6114+
case Intrinsic::x86_avx_hadd_ps_256:
60766115
case Intrinsic::x86_avx_hsub_pd_256:
6077-
case Intrinsic::x86_avx_hsub_ps_256: {
6078-
handlePairwiseShadowOrIntrinsic(I);
6116+
case Intrinsic::x86_avx_hsub_ps_256:
6117+
// TODO: Shards = 2
6118+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
60796119
break;
6080-
}
60816120

60826121
case Intrinsic::x86_avx_maskstore_ps:
60836122
case Intrinsic::x86_avx_maskstore_pd:
@@ -6460,7 +6499,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
64606499
// Add Long Pairwise
64616500
case Intrinsic::aarch64_neon_saddlp:
64626501
case Intrinsic::aarch64_neon_uaddlp: {
6463-
handlePairwiseShadowOrIntrinsic(I);
6502+
handlePairwiseShadowOrIntrinsic(I, /*Shards=*/1);
64646503
break;
64656504
}
64666505

0 commit comments

Comments
 (0)