Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 100 additions & 4 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4073,10 +4073,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {

CallInst *CI =
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
// The intrinsic may require floating-point but shadows can be arbitrary
// bit patterns, of which some would be interpreted as "invalid"
// floating-point values (NaN etc.); we assume the intrinsic will happily
// copy them.
// The AVX masked load intrinsics do not have integer variants. We use the
// floating-point variants, and assume that the intrinsic will happily copy
// the shadows even if they are interpreted as "invalid" floating-point
// values (NaN etc.).
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));

if (!MS.TrackOrigins)
Expand Down Expand Up @@ -4242,6 +4242,82 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}

// Handle Arm NEON vector load intrinsics (vld*).
//
// The WithLane instructions (ld[234]lane) are similar to:
// call {<4 x i32>, <4 x i32>, <4 x i32>}
// @llvm.aarch64.neon.ld3lane.v4i32.p0
// (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
// %A)
//
// The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
// to:
// call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
unsigned int numArgs = I.arg_size();

// Return type is a struct of vectors of integers or floating-point
assert(I.getType()->isStructTy());
[[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
Copy link
Collaborator

@vitalybuka vitalybuka Mar 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[[maybe_unused]] is not needed because of loop?

assert(RetTy->getNumElements() > 0);
assert(isa<FixedVectorType>(RetTy->getElementType(0)));
assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
RetTy->getElementType(0)->isFPOrFPVectorTy());
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
assert(RetTy->getElementType(i) == RetTy->getElementType(0));

if (WithLane) {
// 2, 3 or 4 vectors, plus lane number, plus input pointer
assert(numArgs >= 4);
assert(numArgs <= 6);

// Return type is a struct of the input vectors
assert(RetTy->getNumElements() + 2 == numArgs);
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
} else
assert(numArgs == 1);

IRBuilder<> IRB(&I);

SmallVector<Value *, 6> ShadowArgs;
if (WithLane) {
for (unsigned int i = 0; i < numArgs - 2; i++)
ShadowArgs.push_back(getShadow(I.getArgOperand(i)));

// Lane number, passed verbatim
Value *LaneNumber = I.getArgOperand(numArgs - 2);
ShadowArgs.push_back(LaneNumber);

// TODO: blend shadow of lane number into output shadow?
insertShadowCheck(LaneNumber, &I);
}

Value *Src = I.getArgOperand(numArgs - 1);
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");

const Align Alignment = Align(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we just pass Align(1) below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


Type *SrcShadowTy = getShadowTy(Src);
Value *SrcShadowPtr, *SrcOriginPtr;
std::tie(SrcShadowPtr, SrcOriginPtr) =
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
ShadowArgs.push_back(SrcShadowPtr);

CallInst *CI;
// The NEON vector load instructions handled by this function all have
// integer variants. It is easier to use those rather than trying to cast
// a struct of vectors of floats into a struct of vectors of integers.
CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);

if (!MS.TrackOrigins)
return;

Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
setOrigin(&I, PtrSrcOrigin);
}

/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
/// and vst{2,3,4}lane).
///
Expand Down Expand Up @@ -4946,6 +5022,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;

case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r: {
handleNEONVectorLoad(I, /*WithLane=*/false);
break;
}

case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane: {
handleNEONVectorLoad(I, /*WithLane=*/true);
break;
}

// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
Expand Down
Loading