Skip to content

Commit 5a9977b

Browse files
State merging (#973)
Minimal proof of concept for merging branches in symbolic execution. --------- Co-authored-by: Mate Soos <[email protected]>
1 parent e59ad40 commit 5a9977b

File tree

12 files changed

+434
-6
lines changed

12 files changed

+434
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
- New opcode: CLZ
1212
- Support for `--early-abort` flag that aborts symbolic execution as soon as a counterexample is found
1313
- Support for `vm.etch(address, bytecode)` cheatcode to set the code of a contract at a given address
14+
- State merging via speculative execution of both branches of a JUMPI, joining the
15+
resulting states whenever possible. Amount of speculative execution is
16+
controlled via `merge-max-budget`
1417

1518
## Changed
1619
- `AbiFunction` and `AbiBytes` parser is now more strict

cli/cli.hs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ data CommonOptions = CommonOptions
101101
, onlyDeployed ::Bool
102102
, cacheDir ::Maybe String
103103
, earlyAbort ::Bool
104+
, mergeMaxBudget :: Int
104105
}
105106

106107
commonOptions :: Parser CommonOptions
@@ -131,6 +132,7 @@ commonOptions = CommonOptions
131132
<*> (switch $ long "only-deployed" <> help "When trying to resolve unknown addresses, only use addresses of deployed contracts")
132133
<*> (optional $ strOption $ long "cache-dir" <> help "Directory to save and load RPC cache")
133134
<*> (switch $ long "early-abort" <> help "Stop exploration immediately upon finding the first counterexample")
135+
<*> (option auto $ long "merge-max-budget" <> showDefault <> value 100 <> help "Max instructions for speculative merge exploration during path merging")
134136

135137
data CommonExecOptions = CommonExecOptions
136138
{ address ::Maybe Addr
@@ -374,6 +376,7 @@ main = do
374376
, simp = Prelude.not cOpts.noSimplify
375377
, onlyDeployed = cOpts.onlyDeployed
376378
, earlyAbort = cOpts.earlyAbort
379+
, mergeMaxBudget = cOpts.mergeMaxBudget
377380
} }
378381

379382

hevm.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ library
113113
EVM.Traversals,
114114
EVM.CSE,
115115
EVM.Keccak,
116+
EVM.Merge,
116117
EVM.Transaction,
117118
EVM.Types,
118119
EVM.UnitTest,

src/EVM.hs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import EVM.Expr (readStorage, concStoreContains, writeStorage, readByte, readWor
1616
import EVM.Expr qualified as Expr
1717
import EVM.FeeSchedule (FeeSchedule (..))
1818
import EVM.FeeSchedule qualified as Fees (feeSchedule)
19+
import EVM.Merge qualified as Merge
1920
import EVM.Op
2021
import EVM.Precompiled qualified
2122
import EVM.Solidity
@@ -198,6 +199,7 @@ makeVm o = do
198199
, exploreDepth = 0
199200
, keccakPreImgs = fromList []
200201
, pathsVisited = mempty
202+
, mergeState = defaultMergeState
201203
}
202204
where
203205
env = Env
@@ -867,14 +869,39 @@ exec1 conf = do
867869

868870
OpJumpi -> {-# SCC "OpJumpi" #-}
869871
case stk of
870-
x:y:xs -> forceConcreteLimitSz x 2 "JUMPI: symbolic jumpdest" $ \x' ->
872+
x:cond:xs -> forceConcreteLimitSz x 2 "JUMPI: symbolic jumpdest" $ \maybeTarget ->
871873
burn g_high $
872874
let jump :: Bool -> EVM t ()
873875
jump False = assign' (#state % #stack) xs >> next
874-
jump _ = case tryInto x' of
876+
jump _ = case tryInto maybeTarget of
875877
Left _ -> vmError BadJumpDestination
876-
Right i -> checkJump i xs
877-
in branch conf.maxDepth y jump
878+
Right jumpTarget -> checkJump jumpTarget xs
879+
-- For Symbolic execution, try forward-jump merge first
880+
symbolicMerge :: EVM Symbolic ()
881+
symbolicMerge = case tryInto maybeTarget of
882+
Left _ ->
883+
-- Invalid destination - but we only error if we try to jump
884+
-- Use branch to decide based on condition
885+
branch conf.maxDepth cond $ \case
886+
False -> assign' (#state % #stack) xs >> next
887+
True -> vmError BadJumpDestination
888+
Right jumpTarget -> do
889+
-- Check if we're already in merge mode (speculative execution)
890+
ms <- use #mergeState
891+
if ms.msActive then
892+
-- Inside speculation: nested branch, bail out
893+
-- This will cause speculateLoop to return Nothing
894+
assign #result $ Just $ VMFailure BadJumpDestination
895+
else do
896+
merged <- Merge.tryMergeForwardJump conf (exec1 conf) vm.state.pc jumpTarget cond xs
897+
unless merged $ do
898+
let jumpSym :: Bool -> EVM Symbolic ()
899+
jumpSym False = assign' (#state % #stack) xs >> next
900+
jumpSym _ = checkJump jumpTarget xs
901+
branch conf.maxDepth cond jumpSym
902+
in case eqT @t @Symbolic of
903+
Just Refl -> symbolicMerge
904+
Nothing -> branch conf.maxDepth cond jump
878905
_ -> underrun
879906

880907
OpPc -> {-# SCC "OpPc" #-}

src/EVM/Effects.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ data Config = Config
4747
, simp :: Bool
4848
, onlyDeployed :: Bool
4949
, earlyAbort :: Bool
50+
, mergeMaxBudget :: Int -- ^ Max instructions for speculative merge exploration
5051
}
5152
deriving (Show, Eq)
5253

@@ -67,6 +68,7 @@ defaultConfig = Config
6768
, simp = True
6869
, onlyDeployed = False
6970
, earlyAbort = False
71+
, mergeMaxBudget = 100
7072
}
7173

7274
-- Write to the console

src/EVM/Expr.hs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,13 @@ simplifyNoLitToKeccak e = untilFixpoint (mapExpr go) e
10571057
go (IsZero (Xor x y)) = eq x y
10581058
go (IsZero a) = iszero a
10591059

1060+
-- ITE (if-then-else) simplification for path merging
1061+
go (ITE (Lit 0) _ f) = f
1062+
go (ITE (Lit _) t _) = t
1063+
go (ITE _ t f) | t == f = t
1064+
go (ITE c (ITE c' a _) d) | c == c' = ITE c a d -- nested same condition in then
1065+
go (ITE c a (ITE c' _ d)) | c == c' = ITE c a d -- nested same condition in else
1066+
10601067
-- syntactic Eq reduction
10611068
go (Eq (Lit a) (Lit b))
10621069
| a == b = Lit 1

src/EVM/Format.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,7 @@ formatExpr = go
655655
Eq a b -> fmt "Eq" [a, b]
656656
EqByte a b -> fmt "EqByte" [a, b]
657657
IsZero a -> fmt "IsZero" [a]
658+
ITE c t el -> fmt "ITE" [c, t, el]
658659

659660
And a b -> fmt "And" [a, b]
660661
Or a b -> fmt "Or" [a, b]

src/EVM/Merge.hs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
{-# LANGUAGE DataKinds #-}
2+
{-# LANGUAGE ScopedTypeVariables #-}
3+
4+
-- | State merging for symbolic execution
5+
--
6+
-- This module provides functions for merging execution paths during symbolic
7+
-- execution. Instead of forking on every JUMPI, we can speculatively execute
8+
-- both paths and merge them using ITE (If-Then-Else) expressions when:
9+
--
10+
-- 1. Both paths converge to the same PC (forward jump pattern)
11+
-- 2. Neither path has side effects (storage, memory, logs unchanged)
12+
-- 3. Both paths have the same stack depth
13+
--
14+
-- This reduces path explosion from 2^N to linear in many common patterns.
15+
16+
module EVM.Merge
17+
( tryMergeForwardJump
18+
) where
19+
20+
import Control.Monad (when)
21+
import Control.Monad.State.Strict (get, put)
22+
import Debug.Trace (traceM)
23+
import Optics.Core
24+
import Optics.State
25+
26+
import EVM.Effects (Config(..))
27+
import EVM.Expr qualified as Expr
28+
import EVM.Types
29+
30+
-- | Execute instructions speculatively until target PC is reached or
31+
-- we hit budget limit/SMT query/RPC call/revert/error.
32+
speculateLoopOuter
33+
:: Config
34+
-> EVM Symbolic () -- ^ Single-step executor
35+
-> Int -- ^ Target PC
36+
-> EVM Symbolic (Maybe (VM Symbolic))
37+
speculateLoopOuter conf exec1Step targetPC = do
38+
-- Initialize merge state for this speculation
39+
let budget = conf.mergeMaxBudget
40+
modifying #mergeState $ \ms -> ms { msActive = True , msRemainingBudget = budget }
41+
res <- speculateLoop conf exec1Step targetPC
42+
-- Reset merge state
43+
assign #mergeState defaultMergeState
44+
pure res
45+
46+
-- | Inner loop for speculative execution with budget tracking
47+
speculateLoop
48+
:: Config
49+
-> EVM Symbolic () -- ^ Single-step executor
50+
-> Int -- ^ Target PC
51+
-> EVM Symbolic (Maybe (VM Symbolic))
52+
speculateLoop conf exec1Step targetPC = do
53+
ms <- use #mergeState
54+
if ms.msRemainingBudget <= 0
55+
then pure Nothing -- Budget exhausted
56+
else do
57+
pc <- use (#state % #pc)
58+
result <- use #result
59+
case result of
60+
Just _ -> pure Nothing -- Hit RPC call/revert/SMT query/etc.
61+
Nothing
62+
| pc == targetPC -> Just <$> get -- Reached target
63+
| otherwise -> do
64+
-- Decrement budget and execute one instruction
65+
modifying #mergeState $ \s -> s { msRemainingBudget = subtract 1 s.msRemainingBudget }
66+
exec1Step
67+
speculateLoop conf exec1Step targetPC
68+
69+
-- | Try to merge a forward jump (skip block pattern) for Symbolic execution
70+
-- Returns True if merge succeeded, False if we should fall back to forking
71+
-- SOUNDNESS: Both paths (jump and fall-through) must converge to the same PC,
72+
-- have the same stack depth, and have no side effects. Only then can we merge.
73+
tryMergeForwardJump
74+
:: Config
75+
-> EVM Symbolic () -- ^ Single-step executor
76+
-> Int -- ^ Current PC
77+
-> Int -- ^ Jump target PC
78+
-> Expr EWord -- ^ Branch condition
79+
-> [Expr EWord] -- ^ Stack after popping JUMPI args
80+
-> EVM Symbolic Bool
81+
tryMergeForwardJump conf exec1Step currentPC jumpTarget cond stackAfterPop = do
82+
-- Only handle forward jumps (skip block pattern)
83+
if jumpTarget <= currentPC
84+
then pure False -- Not a forward jump
85+
else do
86+
vm0 <- get
87+
88+
-- Skip merge if memory is mutable (ConcreteMemory)
89+
case vm0.state.memory of
90+
ConcreteMemory _ -> pure False
91+
SymbolicMemory _ -> do
92+
-- True branch (jump taken): Just sets PC to target, no execution needed
93+
let trueStack = stackAfterPop -- Stack after popping JUMPI args
94+
95+
-- False branch (fall through): Execute until we reach jump target
96+
assign' (#state % #stack) stackAfterPop
97+
modifying' (#state % #pc) (+ 1) -- Move past JUMPI
98+
maybeVmFalse <- speculateLoopOuter conf exec1Step jumpTarget
99+
100+
case maybeVmFalse of
101+
Nothing -> put vm0 >> pure False -- can't merge: EVM error, SMT/RPC query, over-budget
102+
Just vmFalse -> do
103+
let falseStack = vmFalse.state.stack
104+
soundnessOK = checkNoSideEffects vm0 vmFalse
105+
106+
-- Check merge conditions: same stack depth AND no side effects
107+
if length trueStack == length falseStack && soundnessOK
108+
then do
109+
-- Merge stacks using ITE expressions, simplifying to prevent growth
110+
let condSimp = Expr.simplify cond
111+
mergeExpr t f = Expr.simplify $ ITE condSimp t f
112+
mergedStack = zipWith mergeExpr trueStack falseStack
113+
-- Use vm0 as base and update only PC and stack
114+
when conf.debug $ traceM $
115+
"Merged forward jump to PC: " <> show jumpTarget <> " from PC: " <> show currentPC
116+
put vm0
117+
assign (#state % #pc) jumpTarget
118+
assign (#state % #stack) mergedStack
119+
assign #result Nothing
120+
assign (#mergeState % #msActive) False
121+
pure True
122+
else put vm0 >> pure False -- can't merge: stack depth or state differs
123+
124+
-- | Check that execution had no side effects (storage, memory, logs, etc.)
125+
checkNoSideEffects :: VM Symbolic -> VM Symbolic -> Bool
126+
checkNoSideEffects vm0 vmAfter =
127+
let memoryUnchanged = case (vm0.state.memory, vmAfter.state.memory) of
128+
(SymbolicMemory m1, SymbolicMemory m2) -> m1 == m2
129+
_ -> False
130+
memorySizeUnchanged = vm0.state.memorySize == vmAfter.state.memorySize
131+
returndataUnchanged = vm0.state.returndata == vmAfter.state.returndata
132+
storageUnchanged = vm0.env.contracts == vmAfter.env.contracts
133+
logsUnchanged = vm0.logs == vmAfter.logs
134+
constraintsUnchanged = vm0.constraints == vmAfter.constraints
135+
keccakUnchanged = vm0.keccakPreImgs == vmAfter.keccakPreImgs
136+
freshVarUnchanged = vm0.freshVar == vmAfter.freshVar
137+
framesUnchanged = length vm0.frames == length vmAfter.frames
138+
subStateUnchanged = vm0.tx.subState == vmAfter.tx.subState
139+
in memoryUnchanged && memorySizeUnchanged && returndataUnchanged
140+
&& storageUnchanged && logsUnchanged && constraintsUnchanged
141+
&& keccakUnchanged && freshVarUnchanged
142+
&& framesUnchanged && subStateUnchanged

src/EVM/SMT.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,11 @@ exprToSMT = \case
476476
IsZero a -> do
477477
cond <- op2 "=" a (Lit 0)
478478
pure $ "(ite " <> cond `sp` one `sp` zero <> ")"
479+
ITE c t f -> do
480+
condEnc <- exprToSMT c
481+
thenEnc <- exprToSMT t
482+
elseEnc <- exprToSMT f
483+
pure $ "(ite (distinct " <> condEnc `sp` zero <> ") " <> thenEnc `sp` elseEnc <> ")"
479484
And a b -> op2 "bvand" a b
480485
Or a b -> op2 "bvor" a b
481486
Xor a b -> op2 "bvxor" a b

src/EVM/Traversals.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ foldExpr f acc expr = acc <> (go expr)
123123
e@(SGT a b) -> f e <> (go a) <> (go b)
124124
e@(Eq a b) -> f e <> (go a) <> (go b)
125125
e@(IsZero a) -> f e <> (go a)
126+
e@(ITE c t el) -> f e <> (go c) <> (go t) <> (go el)
126127

127128
-- bits
128129

@@ -449,6 +450,11 @@ mapExprM f expr = case expr of
449450
IsZero a -> do
450451
a' <- mapExprM f a
451452
f (IsZero a')
453+
ITE c t el -> do
454+
c' <- mapExprM f c
455+
t' <- mapExprM f t
456+
el' <- mapExprM f el
457+
f (ITE c' t' el')
452458

453459
-- bits
454460

0 commit comments

Comments
 (0)