Skip to content

Commit d39fd27

Browse files
PavelKopylakiramenai
authored andcommitted
[EVM] Refine intrinsic memory effects for improved alias analysis accuracy
EVM opcodes can be classified based on how their behavior or output depends on the transaction state: - Readnone (Pure) - Volatile (State-Dependent) - Side-Effecting (State-Changing) (Reference: EVM opcodes categorization) This patch adjusts the memory attributes of LLVM intrinsics corresponding to these opcodes. At the LLVM IR level, the transaction-scoped EVM state is modeled as reads/writes to inaccessible memory. This state does not include the heap, which is modeled separately via regular LLVM pointer parameters. State-dependent intrinsics are now marked as reading from inaccessible memory. State-changing intrinsics are marked as both reading from and writing to it. To capture memory dependencies between plain loads/stores to storage (or transient storage) and context (CALL* or CREATE* like) intrinsics, we extended EVM alias analysis to determine aliasing between the call and the memory location in a custom way.
1 parent 3e9e231 commit d39fd27

File tree

8 files changed

+922
-293
lines changed

8 files changed

+922
-293
lines changed

llvm/include/llvm/IR/IntrinsicsEVM.td

Lines changed: 273 additions & 224 deletions
Large diffs are not rendered by default.

llvm/lib/Analysis/MemoryLocation.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,6 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
176176
// location
177177
auto T = Call->getModule()->getTargetTriple();
178178
if (Triple(T).isEraVM() || Triple(T).isEVM()) {
179-
// For EVM intrinsics, the memory size argument always immediately
180-
// follows the memory argument, meaning its index is ArgIdx + 1.
181179
auto GetMemLocation = [Call, Arg, &AATags](unsigned MemSizeArgIdx) {
182180
const auto *LenCI =
183181
dyn_cast<ConstantInt>(Call->getArgOperand(MemSizeArgIdx));
@@ -212,6 +210,23 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
212210
assert((ArgIdx == 0) && "Invalid argument index for calldataload");
213211
return MemoryLocation(Arg, LocationSize::precise(32), AATags);
214212
}
213+
case Intrinsic::evm_revert: {
214+
assert((ArgIdx == 0) && "Invalid argument index for revert");
215+
return GetMemLocation(ArgIdx + 1);
216+
}
217+
case Intrinsic::evm_extcodecopy: {
218+
assert((ArgIdx == 1 || ArgIdx == 2) &&
219+
"Invalid argument index for extcodecopy");
220+
return GetMemLocation(3);
221+
}
222+
case Intrinsic::evm_log0:
223+
case Intrinsic::evm_log1:
224+
case Intrinsic::evm_log2:
225+
case Intrinsic::evm_log3:
226+
case Intrinsic::evm_log4: {
227+
assert((ArgIdx == 0) && "Invalid argument index for log");
228+
return GetMemLocation(ArgIdx + 1);
229+
}
215230
default:
216231
llvm_unreachable("Unexpected intrinsic for EraVM/EVM target");
217232
break;

llvm/lib/Analysis/VMAliasAnalysis.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ AliasResult VMAAResult::alias(const MemoryLocation &LocA,
156156
// If heap locations are the same, they either must or partially alias based
157157
// on the size of locations.
158158
if (StartAVal == StartBVal) {
159+
// If either of the memory references is empty, it doesn't matter what the
160+
// pointer values are.
161+
if (LocA.Size.isZero() || LocB.Size.isZero())
162+
return AliasResult::NoAlias;
163+
159164
if (LocA.Size == LocB.Size)
160165
return AliasResult::MustAlias;
161166
return AliasResult::PartialAlias;

llvm/lib/Target/EVM/EVMAliasAnalysis.cpp

Lines changed: 107 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
#include "EVMAliasAnalysis.h"
1414
#include "EVM.h"
15+
#include "llvm/IR/IntrinsicInst.h"
16+
#include "llvm/IR/IntrinsicsEVM.h"
1517
#include "llvm/IR/Module.h"
1618

1719
using namespace llvm;
@@ -36,6 +38,108 @@ ImmutablePass *llvm::createEVMExternalAAWrapperPass() {
3638
return new EVMExternalAAWrapper();
3739
}
3840

41+
EVMAAResult::EVMAAResult(const DataLayout &DL)
42+
: VMAAResult(DL, {EVMAS::AS_STORAGE, EVMAS::AS_TSTORAGE}, {EVMAS::AS_HEAP},
43+
EVMAS::MAX_ADDRESS) {}
44+
45+
ModRefInfo EVMAAResult::getArgModRefInfo(const CallBase *Call,
46+
unsigned ArgIdx) {
47+
if (Call->doesNotAccessMemory(ArgIdx))
48+
return ModRefInfo::NoModRef;
49+
50+
if (Call->onlyWritesMemory(ArgIdx))
51+
return ModRefInfo::Mod;
52+
53+
if (Call->onlyReadsMemory(ArgIdx))
54+
return ModRefInfo::Ref;
55+
56+
return ModRefInfo::ModRef;
57+
}
58+
59+
static MemoryLocation getMemLocForArgument(const CallBase *Call,
60+
unsigned ArgIdx) {
61+
AAMDNodes AATags = Call->getAAMetadata();
62+
const Value *Arg = Call->getArgOperand(ArgIdx);
63+
const auto *II = cast<IntrinsicInst>(Call);
64+
65+
auto GetMemLocation = [Call, Arg, &AATags](unsigned MemSizeArgIdx) {
66+
const auto *LenCI =
67+
dyn_cast<ConstantInt>(Call->getArgOperand(MemSizeArgIdx));
68+
if (LenCI && LenCI->getValue().getActiveBits() <= 64)
69+
return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
70+
AATags);
71+
return MemoryLocation::getAfter(Arg, AATags);
72+
};
73+
74+
switch (II->getIntrinsicID()) {
75+
case Intrinsic::evm_return: {
76+
assert((ArgIdx == 0) && "Invalid argument index for return");
77+
return GetMemLocation(ArgIdx + 1);
78+
}
79+
case Intrinsic::evm_create:
80+
case Intrinsic::evm_create2: {
81+
assert((ArgIdx == 1) && "Invalid argument index for create/create2");
82+
return GetMemLocation(ArgIdx + 1);
83+
}
84+
case Intrinsic::evm_call:
85+
case Intrinsic::evm_callcode: {
86+
assert((ArgIdx == 3 || ArgIdx == 5) &&
87+
"Invalid argument index for call/callcode");
88+
return GetMemLocation(ArgIdx + 1);
89+
}
90+
case Intrinsic::evm_delegatecall:
91+
case Intrinsic::evm_staticcall: {
92+
assert((ArgIdx == 2 || ArgIdx == 4) &&
93+
"Invalid argument index for delegatecall/staticcall");
94+
return GetMemLocation(ArgIdx + 1);
95+
}
96+
default:
97+
llvm_unreachable("Unexpected intrinsic for EraVM/EVM target");
98+
break;
99+
}
100+
}
101+
102+
ModRefInfo EVMAAResult::getModRefInfo(const CallBase *Call,
103+
const MemoryLocation &Loc,
104+
AAQueryInfo &AAQI) {
105+
const auto *II = dyn_cast<IntrinsicInst>(Call);
106+
if (!II)
107+
return AAResultBase::getModRefInfo(Call, Loc, AAQI);
108+
109+
unsigned AS = Loc.Ptr->getType()->getPointerAddressSpace();
110+
switch (II->getIntrinsicID()) {
111+
case Intrinsic::evm_return:
112+
if (AS == EVMAS::AS_STORAGE || AS == EVMAS::AS_TSTORAGE)
113+
return ModRefInfo::Ref;
114+
break;
115+
case Intrinsic::evm_create:
116+
case Intrinsic::evm_create2:
117+
case Intrinsic::evm_call:
118+
case Intrinsic::evm_callcode:
119+
case Intrinsic::evm_delegatecall:
120+
case Intrinsic::evm_staticcall:
121+
if (AS == EVMAS::AS_STORAGE || AS == EVMAS::AS_TSTORAGE)
122+
return ModRefInfo::ModRef;
123+
break;
124+
default:
125+
return AAResultBase::getModRefInfo(Call, Loc, AAQI);
126+
}
127+
128+
ModRefInfo Result = ModRefInfo::NoModRef;
129+
for (const auto &I : llvm::enumerate(Call->args())) {
130+
const Value *Arg = I.value();
131+
if (!Arg->getType()->isPointerTy())
132+
continue;
133+
unsigned ArgIdx = I.index();
134+
MemoryLocation ArgLoc = getMemLocForArgument(Call, ArgIdx);
135+
AliasResult ArgAlias = VMAAResult::alias(ArgLoc, Loc, AAQI, Call);
136+
if (ArgAlias != AliasResult::NoAlias)
137+
Result |= getArgModRefInfo(Call, ArgIdx);
138+
}
139+
140+
return Result;
141+
}
142+
39143
EVMAAWrapperPass::EVMAAWrapperPass() : ImmutablePass(ID) {
40144
initializeEVMAAWrapperPassPass(*PassRegistry::getPassRegistry());
41145
}
@@ -45,16 +149,10 @@ void EVMAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
45149
}
46150

47151
bool EVMAAWrapperPass::doInitialization(Module &M) {
48-
SmallDenseSet<unsigned> StorageAS = {EVMAS::AS_STORAGE, EVMAS::AS_TSTORAGE};
49-
SmallDenseSet<unsigned> HeapAS = {EVMAS::AS_HEAP};
50-
Result = std::make_unique<VMAAResult>(M.getDataLayout(), StorageAS, HeapAS,
51-
EVMAS::MAX_ADDRESS);
152+
Result = std::make_unique<EVMAAResult>(M.getDataLayout());
52153
return false;
53154
}
54155

55-
VMAAResult EVMAA::run(Function &F, AnalysisManager<Function> &AM) {
56-
SmallDenseSet<unsigned> StorageAS = {EVMAS::AS_STORAGE, EVMAS::AS_TSTORAGE};
57-
SmallDenseSet<unsigned> HeapAS = {EVMAS::AS_HEAP};
58-
return VMAAResult(F.getParent()->getDataLayout(), StorageAS, HeapAS,
59-
EVMAS::MAX_ADDRESS);
156+
EVMAAResult EVMAA::run(Function &F, AnalysisManager<Function> &AM) {
157+
return EVMAAResult(F.getParent()->getDataLayout());
60158
}

llvm/lib/Target/EVM/EVMAliasAnalysis.h

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,45 @@
1616
#include "llvm/Analysis/VMAliasAnalysis.h"
1717

1818
namespace llvm {
19+
20+
/// EVM-specific AA result. Note that we override certain non-virtual methods
21+
/// from AAResultBase, as clarified in its documentation.
22+
class EVMAAResult : public VMAAResult {
23+
public:
24+
explicit EVMAAResult(const DataLayout &DL);
25+
26+
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
27+
AAQueryInfo &AAQI);
28+
29+
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
30+
31+
ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
32+
AAQueryInfo &AAQI) {
33+
return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
34+
}
35+
};
36+
1937
/// Analysis pass providing a never-invalidated alias analysis result.
2038
class EVMAA : public AnalysisInfoMixin<EVMAA> {
2139
friend AnalysisInfoMixin<EVMAA>;
2240

2341
static AnalysisKey Key;
2442

2543
public:
26-
using Result = VMAAResult;
27-
VMAAResult run(Function &F, AnalysisManager<Function> &AM);
44+
using Result = EVMAAResult;
45+
EVMAAResult run(Function &F, AnalysisManager<Function> &AM);
2846
};
2947

30-
/// Legacy wrapper pass to provide the VMAAResult object.
48+
/// Legacy wrapper pass to provide the EVMAAResult object.
3149
class EVMAAWrapperPass : public ImmutablePass {
32-
std::unique_ptr<VMAAResult> Result;
50+
std::unique_ptr<EVMAAResult> Result;
3351

3452
public:
3553
static char ID;
3654

3755
EVMAAWrapperPass();
3856

39-
VMAAResult &getResult() { return *Result; }
40-
const VMAAResult &getResult() const { return *Result; }
57+
EVMAAResult &getResult() { return *Result; }
4158

4259
bool doFinalization(Module &M) override {
4360
Result.reset();

0 commit comments

Comments
 (0)