Skip to content

Commit ba11c8b

Browse files
committed
feat(bench): implement comprehensive benchmarking for Value builtins
Add systematic benchmarking framework with worst-case test coverage: LookupCoin with 400 power-of-2 combinations testing BST depth range 2-21, ValueContains with 1000+ cases using multiplied_sizes model for x * y complexity. Includes R statistical models: linearInZ for LookupCoin, multiplied_sizes for ValueContains to properly account for both container and contained sizes.
1 parent a25ee85 commit ba11c8b

File tree

4 files changed

+379
-3
lines changed

4 files changed

+379
-3
lines changed
Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
{-# LANGUAGE BlockArguments #-}
2+
{-# LANGUAGE ImportQualifiedPost #-}
3+
{-# LANGUAGE LambdaCase #-}
4+
{-# LANGUAGE NumericUnderscores #-}
5+
{-# LANGUAGE TupleSections #-}
6+
{-# LANGUAGE TypeApplications #-}
7+
8+
module Benchmarks.Values (makeBenchmarks) where
9+
10+
import Prelude
11+
12+
import Common
13+
import Control.Monad (replicateM)
14+
import Criterion.Main (Benchmark)
15+
import Data.Bits (shiftR, (.&.))
16+
import Data.ByteString (ByteString)
17+
import Data.ByteString qualified as BS
18+
import Data.Int (Int64)
19+
import Data.List (find)
20+
import Data.Word (Word8)
21+
import GHC.Stack (HasCallStack)
22+
import PlutusCore (DefaultFun (LookupCoin, UnValueData, ValueContains, ValueData))
23+
import PlutusCore.Builtin (BuiltinResult (BuiltinFailure, BuiltinSuccess, BuiltinSuccessWithLogs))
24+
import PlutusCore.Evaluation.Machine.ExMemoryUsage (ValueLogOuterSizeAddLogMaxInnerSize (..),
25+
ValueTotalSize (..))
26+
import PlutusCore.Value (K, Value)
27+
import PlutusCore.Value qualified as Value
28+
import System.Random.Stateful (StatefulGen, StdGen, runStateGen_, uniformRM)
29+
30+
----------------------------------------------------------------------------------------------------
31+
-- Benchmarks --------------------------------------------------------------------------------------
32+
33+
makeBenchmarks :: StdGen -> [Benchmark]
34+
makeBenchmarks gen =
35+
[ lookupCoinBenchmark gen
36+
, valueContainsBenchmark gen
37+
, valueDataBenchmark gen
38+
, unValueDataBenchmark gen
39+
]
40+
41+
----------------------------------------------------------------------------------------------------
42+
-- LookupCoin --------------------------------------------------------------------------------------
43+
44+
lookupCoinBenchmark :: StdGen -> Benchmark
45+
lookupCoinBenchmark gen =
46+
createThreeTermBuiltinBenchElementwiseWithWrappers
47+
(id, id, ValueLogOuterSizeAddLogMaxInnerSize) -- Wrap Value argument to report sum of log sizes
48+
LookupCoin -- the builtin fun
49+
[] -- no type arguments needed (monomorphic builtin)
50+
(lookupCoinArgs gen) -- the argument combos to generate benchmarks for
51+
52+
lookupCoinArgs :: StdGen -> [(ByteString, ByteString, Value)]
53+
lookupCoinArgs gen = runStateGen_ gen \(g :: g) -> do
54+
{- Exhaustive power-of-2 combinations for BST worst-case benchmarking.
55+
56+
Tests all combinations of sizes from powers and half-powers of 2.
57+
For each integer n ∈ {1..10}, includes both 2^n and 2^(n+0.5) ≈ 2^n * √2.
58+
59+
This provides:
60+
- 400 total test points (20 × 20 grid)
61+
- Complete systematic coverage of depth range 2 to 21
62+
- Finer granularity between powers of 2
63+
- All distribution patterns (balanced, outer-heavy, inner-heavy)
64+
65+
Size values: 2, 3, 4, 6, 8, 11, 16, 23, 32, 45, 64, 91, 128, 181,
66+
256, 362, 512, 724, 1024, 1448
67+
68+
Depth coverage:
69+
- Minimum depth: log₂(2) + log₂(2) ≈ 2
70+
- Maximum depth: log₂(1448) + log₂(1448) ≈ 21
71+
-}
72+
let
73+
-- Generate powers of 2 and their geometric means (half-powers)
74+
sizes =
75+
[ 2 ^ n -- 2^n
76+
| n <- [1 .. 10 :: Int]
77+
]
78+
++ [ round @Double (2 ^ n * sqrt 2) -- 2^(n+0.5)
79+
| n <- [1 .. 10 :: Int]
80+
]
81+
82+
sequence
83+
-- Generate worst-case lookups for each size combination
84+
[ withWorstCaseSearchKeys (generateConstrainedValueWithMaxPolicy numPolicies tokensPerPolicy g)
85+
| numPolicies <- sizes
86+
, tokensPerPolicy <- sizes
87+
]
88+
89+
-- | Add worst-case search keys targeting the max-size inner map
90+
withWorstCaseSearchKeys :: (Monad m) => m (Value, K, K) -> m (ByteString, ByteString, Value)
91+
withWorstCaseSearchKeys genValueWithKeys = do
92+
(value, maxPolicyId, deepestToken) <- genValueWithKeys
93+
pure (Value.unK maxPolicyId, Value.unK deepestToken, value)
94+
95+
----------------------------------------------------------------------------------------------------
96+
-- ValueContains -----------------------------------------------------------------------------------
97+
98+
valueContainsBenchmark :: StdGen -> Benchmark
99+
valueContainsBenchmark gen =
100+
createTwoTermBuiltinBenchElementwiseWithWrappers
101+
(ValueLogOuterSizeAddLogMaxInnerSize, ValueTotalSize)
102+
-- Container: sum of log sizes, Contained: totalSize
103+
ValueContains -- the builtin fun
104+
[] -- no type arguments needed (monomorphic builtin)
105+
(valueContainsArgs gen) -- the argument combos to generate benchmarks for
106+
107+
valueContainsArgs :: StdGen -> [(Value, Value)]
108+
valueContainsArgs gen = runStateGen_ gen \g -> do
109+
{- ValueContains performs multiple LookupCoin operations (one per entry in contained).
110+
Worst case: All lookups succeed at maximum depth with many entries to check.
111+
112+
Strategy:
113+
1. Generate container with worst-case BST structure (uniform, power-of-2 sizes)
114+
2. Select entries FROM container (maintain subset relationship for no early exit)
115+
3. Include deepest entry to force maximum BST traversal
116+
4. Test multiple contained sizes to explore iteration count dimension
117+
118+
Result: ~1000 systematic worst-case benchmarks vs 100 random cases previously
119+
-}
120+
121+
-- Use power-of-2 grid (without half-powers) for systematic coverage
122+
-- ValueContains does multiple lookups, so we don't need as fine-grained
123+
-- size variation as LookupCoin
124+
let containerSizes = [2 ^ n | n <- [1 .. 10 :: Int]] -- [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
125+
126+
-- Generate test cases for all container size combinations
127+
concat
128+
<$> sequence
129+
[ do
130+
-- Generate container with worst-case BST structure:
131+
-- - Uniform distribution (all policies have same token count)
132+
-- - Worst-case keys (long common prefix, differ in last 4 bytes)
133+
-- - Returns metadata about the deepest entry
134+
(container, maxPolicyId, deepestToken) <-
135+
generateConstrainedValueWithMaxPolicy numPolicies tokensPerPolicy g
136+
137+
-- Extract all entries from container as a flat list
138+
-- This maintains the subset relationship: contained ⊆ container
139+
let allEntries = Value.toFlatList container
140+
totalEntries = length allEntries
141+
142+
-- Find the worst-case entry (deepest in both BSTs)
143+
-- This entry forces maximum depth lookup:
144+
-- - maxPolicyId: first in outer BST (but all equal size, so any works)
145+
-- - deepestToken: last token in sorted order (maximum inner BST depth)
146+
let worstCaseEntry =
147+
find (\(p, t, _) -> p == maxPolicyId && t == deepestToken) allEntries
148+
149+
-- Generate test cases for different contained sizes (uniform linear distribution)
150+
-- Each size tests the same container with different iteration counts
151+
-- Use uniform spacing from 1 to min(1000, totalEntries) for better distribution
152+
let maxContainedSize = min 1000 totalEntries
153+
numSamples = 10
154+
containedSizes =
155+
if totalEntries < numSamples
156+
then [1 .. totalEntries] -- Test all sizes for small containers
157+
else
158+
let step = maxContainedSize `div` numSamples
159+
in [i * step | i <- [1 .. numSamples], i * step > 0]
160+
++ [maxContainedSize | maxContainedSize `notElem` [i * step | i <- [1 .. numSamples]]]
161+
162+
-- Create one test case per contained size
163+
pure
164+
[ let
165+
-- Select entries ensuring worst-case is included
166+
-- Place worst-case entry at END so it's checked (not early-exit)
167+
selectedEntries =
168+
case worstCaseEntry of
169+
Just worst ->
170+
-- Take (containedSize - 1) entries, then add worst-case
171+
-- This ensures: 1) subset relationship maintained
172+
-- 2) worst-case depth is hit
173+
-- 3) no early exit (all lookups succeed)
174+
let numOthers = min (containedSize - 1) (totalEntries - 1)
175+
others = take numOthers allEntries
176+
in others ++ [worst]
177+
Nothing ->
178+
-- Fallback if worst-case entry somehow not found
179+
-- (shouldn't happen, but defensive programming)
180+
take containedSize allEntries
181+
182+
-- Build contained Value from selected entries
183+
-- This maintains the Value structure while ensuring subset
184+
contained =
185+
unsafeFromBuiltinResult $
186+
Value.fromList
187+
[ (policyId, [(tokenName, quantity)])
188+
| (policyId, tokenName, quantity) <- selectedEntries
189+
]
190+
in
191+
(container, contained)
192+
| containedSize <- containedSizes
193+
]
194+
| numPolicies <- containerSizes
195+
, tokensPerPolicy <- containerSizes
196+
]
197+
198+
----------------------------------------------------------------------------------------------------
199+
-- ValueData ---------------------------------------------------------------------------------------
200+
201+
valueDataBenchmark :: StdGen -> Benchmark
202+
valueDataBenchmark gen = createOneTermBuiltinBench ValueData [] (generateTestValues gen)
203+
204+
----------------------------------------------------------------------------------------------------
205+
-- UnValueData -------------------------------------------------------------------------------------
206+
207+
unValueDataBenchmark :: StdGen -> Benchmark
208+
unValueDataBenchmark gen =
209+
createOneTermBuiltinBench UnValueData [] (Value.valueData <$> generateTestValues gen)
210+
211+
----------------------------------------------------------------------------------------------------
212+
-- Value Generators --------------------------------------------------------------------------------
213+
214+
-- | Generate common test values for benchmarking
215+
generateTestValues :: StdGen -> [Value]
216+
generateTestValues gen = runStateGen_ gen \g ->
217+
-- Empty value as edge case
218+
(Value.empty :)
219+
<$>
220+
-- Random tests for parameter spread (100 combinations)
221+
replicateM 100 (generateValue g)
222+
223+
-- | Generate Value with random budget between 1 and 30,000 bytes
224+
generateValue :: (StatefulGen g m) => g -> m Value
225+
generateValue g = do
226+
maxEntries <- uniformRM (1, maxValueEntries maxValueInBytes) g
227+
generateValueMaxEntries maxEntries g
228+
where
229+
-- Maximum budget for Value generation (30,000 bytes)
230+
maxValueInBytes :: Int
231+
maxValueInBytes = 30_000
232+
233+
-- Calculate maximum possible number of entries with maximal key sizes that fits in the budget
234+
maxValueEntries :: Int -> Int
235+
maxValueEntries budget =
236+
let bytesPerEntry =
237+
{- bytes per policy -} Value.maxKeyLen
238+
{- bytes per token -} + Value.maxKeyLen
239+
{- bytes per quantity (Int64 takes up 8 bytes) -} + 8
240+
in budget `div` bytesPerEntry
241+
242+
-- | Generate Value within total size budget
243+
generateValueMaxEntries :: (StatefulGen g m) => Int -> g -> m Value
244+
generateValueMaxEntries maxEntries g = do
245+
-- Uniform random distribution: cover full range from many policies (few tokens each)
246+
-- to few policies (many tokens each)
247+
numPolicies <- uniformRM (1, maxEntries) g
248+
let tokensPerPolicy = if numPolicies > 0 then maxEntries `div` numPolicies else 0
249+
250+
generateConstrainedValue numPolicies tokensPerPolicy g
251+
252+
-- | Generate constrained Value with information about max-size policy
253+
generateConstrainedValueWithMaxPolicy
254+
:: (StatefulGen g m)
255+
=> Int -- Number of policies
256+
-> Int -- Number of tokens per policy
257+
-> g
258+
-> m (Value, K, K) -- Returns (value, maxPolicyId, deepestTokenInMaxPolicy)
259+
generateConstrainedValueWithMaxPolicy numPolicies tokensPerPolicy g = do
260+
policyIds <- replicateM numPolicies (generateKey g)
261+
tokenNames <- replicateM tokensPerPolicy (generateKey g)
262+
263+
let
264+
qty :: Value.Quantity
265+
qty = case Value.quantity (fromIntegral (maxBound :: Int64)) of
266+
Just q -> q
267+
Nothing -> error "generateConstrainedValueWithMaxPolicy: Int64 maxBound should be valid Quantity"
268+
269+
nestedMap :: [(K, [(K, Value.Quantity)])]
270+
nestedMap = (,(,qty) <$> tokenNames) <$> policyIds
271+
272+
value = unsafeFromBuiltinResult $ Value.fromList nestedMap
273+
274+
-- All policies have the same number of tokens in this uniform distribution,
275+
-- so we pick the first policy as the max-size policy for worst-case targeting
276+
maxPolicyId = head policyIds
277+
-- Pick the last token (deepest in binary search tree) for worst-case inner lookup
278+
deepestToken = last tokenNames
279+
280+
pure (value, maxPolicyId, deepestToken)
281+
282+
-- | Generate constrained Value (legacy interface for other builtins)
283+
generateConstrainedValue
284+
:: (StatefulGen g m)
285+
=> Int -- Number of policies
286+
-> Int -- Number of tokens per policy
287+
-> g
288+
-> m Value
289+
generateConstrainedValue numPolicies tokensPerPolicy g = do
290+
(value, _, _) <- generateConstrainedValueWithMaxPolicy numPolicies tokensPerPolicy g
291+
pure value
292+
293+
----------------------------------------------------------------------------------------------------
294+
-- Other Generators --------------------------------------------------------------------------------
295+
296+
{-| Generate a worst-case key for benchmarking ByteString comparisons.
297+
298+
ByteString comparison is lexicographic and short-circuits on the first differing byte.
299+
Random keys typically differ in the first 1-2 bytes, making comparisons artificially cheap.
300+
301+
For accurate worst-case costing, we generate keys with a common prefix (0xFF bytes) that
302+
differ only in the last bytes. This forces full-length comparisons during Map lookups,
303+
providing conservative cost estimates for adversarial on-chain scenarios.
304+
305+
We use a random integer to ensure uniqueness while maintaining the worst-case prefix pattern.
306+
-}
307+
generateKey :: (StatefulGen g m) => g -> m K
308+
generateKey g = do
309+
-- Generate a random integer for uniqueness
310+
n <- uniformRM (0, maxBound :: Int) g
311+
case Value.k (mkWorstCaseKey n) of
312+
Just key -> pure key
313+
Nothing -> error "Internal error: maxKeyLen key should always be valid"
314+
315+
{-| Helper: Create a worst-case ByteString key from an integer
316+
The key has maxKeyLen-4 bytes of 0xFF prefix, followed by 4 bytes encoding the integer
317+
-}
318+
mkWorstCaseKey :: Int -> ByteString
319+
mkWorstCaseKey n =
320+
let prefixLen = Value.maxKeyLen - 4
321+
prefix = BS.replicate prefixLen (0xFF :: Word8)
322+
-- Encode the integer in big-endian format (last 4 bytes)
323+
b0 = fromIntegral $ (n `shiftR` 24) .&. 0xFF
324+
b1 = fromIntegral $ (n `shiftR` 16) .&. 0xFF
325+
b2 = fromIntegral $ (n `shiftR` 8) .&. 0xFF
326+
b3 = fromIntegral $ n .&. 0xFF
327+
suffix = BS.pack [b0, b1, b2, b3]
328+
in prefix <> suffix
329+
330+
----------------------------------------------------------------------------------------------------
331+
-- Helper Functions --------------------------------------------------------------------------------
332+
333+
{-| Extract value from BuiltinResult for benchmark data generation.
334+
335+
This function is intended for use in test and benchmark code where BuiltinResult
336+
failures indicate bugs in the generator code, not runtime errors.
337+
338+
Errors if BuiltinResult indicates failure (should never happen with valid inputs).
339+
The call stack will provide context about where the error occurred.
340+
-}
341+
unsafeFromBuiltinResult :: (HasCallStack) => BuiltinResult a -> a
342+
unsafeFromBuiltinResult = \case
343+
BuiltinSuccess x -> x
344+
BuiltinSuccessWithLogs _ x -> x
345+
BuiltinFailure _ err -> error $ "BuiltinResult failed: " <> show err

plutus-core/cost-model/budgeting-bench/Main.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import Benchmarks.Pairs qualified
1717
import Benchmarks.Strings qualified
1818
import Benchmarks.Tracing qualified
1919
import Benchmarks.Unit qualified
20+
import Benchmarks.Values qualified
2021

2122
import Criterion.Main
2223
import Criterion.Types as C
@@ -60,6 +61,7 @@ main = do
6061
<> Benchmarks.Strings.makeBenchmarks gen
6162
<> Benchmarks.Tracing.makeBenchmarks gen
6263
<> Benchmarks.Unit.makeBenchmarks gen
64+
<> Benchmarks.Values.makeBenchmarks gen
6365

6466
{- Run the nop benchmarks with a large time limit (30 seconds) in an attempt to
6567
get accurate results. -}

0 commit comments

Comments
 (0)