Skip to content

Commit 0dbdc24

Browse files
committed
[msan] Handle NEON vector load
This adds an explicit handler for: - llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4 - llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4 - llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane - llvm.aarch64.neon.ld2r, llvm.aarch64.neon.ld3r, llvm.aarch64.neon.ld4r instead of relying on the default strict handler. Updates the tests from #125267
1 parent 667bbd2 commit 0dbdc24

File tree

2 files changed

+1212
-925
lines changed

2 files changed

+1212
-925
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4071,12 +4071,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
40714071
ShadowArgs.append(1, SrcShadowPtr);
40724072
ShadowArgs.append(1, Mask);
40734073

4074-
CallInst *CI =
4075-
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
4076-
// The intrinsic may require floating-point but shadows can be arbitrary
4077-
// bit patterns, of which some would be interpreted as "invalid"
4078-
// floating-point values (NaN etc.); we assume the intrinsic will happily
4079-
// copy them.
4074+
CallInst *CI;
4075+
// The AVX masked load intrinsics do not have integer variants. We use the
4076+
// floating-point variants, and assume that the intrinsic will happily copy
4077+
// the shadows even if they are interpreted as "invalid" floating-point
4078+
// values (NaN etc.).
4079+
CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
40804080
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
40814081

40824082
if (!MS.TrackOrigins)
@@ -4242,6 +4242,82 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
42424242
setOriginForNaryOp(I);
42434243
}
42444244

4245+
// Handle Arm NEON vector load intrinsics (vld*).
4246+
//
4247+
// The WithLane instructions (ld[234]lane) are similar to:
4248+
// call {<4 x i32>, <4 x i32>, <4 x i32>}
4249+
// @llvm.aarch64.neon.ld3lane.v4i32.p0
4250+
// (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
4251+
// %A)
4252+
//
4253+
// The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
4254+
// to:
4255+
// call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
4256+
void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
4257+
unsigned int numArgs = I.arg_size();
4258+
4259+
// Return type is a struct of vectors of integers or floating-point
4260+
assert(I.getType()->isStructTy());
4261+
[[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
4262+
assert(RetTy->getNumElements() > 0);
4263+
assert(isa<FixedVectorType>(RetTy->getElementType(0)));
4264+
assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
4265+
RetTy->getElementType(0)->isFPOrFPVectorTy());
4266+
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
4267+
assert(RetTy->getElementType(i) == RetTy->getElementType(0));
4268+
4269+
if (WithLane) {
4270+
// 2, 3 or 4 vectors, plus lane number, plus input pointer
4271+
assert(numArgs >= 4);
4272+
assert(numArgs <= 6);
4273+
4274+
// Return type is a struct of the input vectors
4275+
assert(RetTy->getNumElements() + 2 == numArgs);
4276+
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
4277+
assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
4278+
} else
4279+
assert(numArgs == 1);
4280+
4281+
IRBuilder<> IRB(&I);
4282+
4283+
SmallVector<Value *, 6> ShadowArgs;
4284+
if (WithLane) {
4285+
for (unsigned int i = 0; i < numArgs - 2; i++)
4286+
ShadowArgs.push_back(getShadow(I.getArgOperand(i)));
4287+
4288+
// Lane number, passed verbatim
4289+
Value *LaneNumber = I.getArgOperand(numArgs - 2);
4290+
ShadowArgs.push_back(LaneNumber);
4291+
4292+
// TODO: blend shadow of lane number into output shadow?
4293+
insertShadowCheck(LaneNumber, &I);
4294+
}
4295+
4296+
Value *Src = I.getArgOperand(numArgs - 1);
4297+
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
4298+
4299+
const Align Alignment = Align(1);
4300+
4301+
Type *SrcShadowTy = getShadowTy(Src);
4302+
Value *SrcShadowPtr, *SrcOriginPtr;
4303+
std::tie(SrcShadowPtr, SrcOriginPtr) =
4304+
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
4305+
ShadowArgs.push_back(SrcShadowPtr);
4306+
4307+
CallInst *CI;
4308+
// The NEON vector load instructions handled by this function all have
4309+
// integer variants. It is easier to use those rather than trying to cast
4310+
// a struct of vectors of floats into a struct of vectors of integers.
4311+
CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
4312+
setShadow(&I, CI);
4313+
4314+
if (!MS.TrackOrigins)
4315+
return;
4316+
4317+
Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
4318+
setOrigin(&I, PtrSrcOrigin);
4319+
}
4320+
42454321
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
42464322
/// and vst{2,3,4}lane).
42474323
///
@@ -4946,6 +5022,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
49465022
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
49475023
break;
49485024

5025+
case Intrinsic::aarch64_neon_ld1x2:
5026+
case Intrinsic::aarch64_neon_ld1x3:
5027+
case Intrinsic::aarch64_neon_ld1x4:
5028+
case Intrinsic::aarch64_neon_ld2:
5029+
case Intrinsic::aarch64_neon_ld3:
5030+
case Intrinsic::aarch64_neon_ld4:
5031+
case Intrinsic::aarch64_neon_ld2r:
5032+
case Intrinsic::aarch64_neon_ld3r:
5033+
case Intrinsic::aarch64_neon_ld4r: {
5034+
handleNEONVectorLoad(I, /*WithLane=*/false);
5035+
break;
5036+
}
5037+
5038+
case Intrinsic::aarch64_neon_ld2lane:
5039+
case Intrinsic::aarch64_neon_ld3lane:
5040+
case Intrinsic::aarch64_neon_ld4lane: {
5041+
handleNEONVectorLoad(I, /*WithLane=*/true);
5042+
break;
5043+
}
5044+
49495045
// Saturating extract narrow
49505046
case Intrinsic::aarch64_neon_sqxtn:
49515047
case Intrinsic::aarch64_neon_sqxtun:

0 commit comments

Comments
 (0)