feat(uplc-evaluator): use integer nanoseconds for timing measurements (#7552)

Unisay · web-flow · commit 0dcea8dd28d0 · 2026-01-30T11:05:12.000Z
Replace floating-point milliseconds with integer nanoseconds to eliminate representation inconsistencies across platforms and precision loss in JSON serialization. Timing values now serialize as integers (cpu_time_ns) instead of floats (cpu_time_ms). Change TimingSample from data to newtype for zero-cost abstraction. Update criterion-measurement conversion to multiply by 1e9 and round to Word64. Adjust test assertions to validate nanosecond ranges (500ms = 500000000ns). Update SPEC.md with new field name, type, examples, and jq usage patterns. Closes IntersectMBO/plutus-private#2045
diff --git a/plutus-benchmark/uplc-evaluator/Main.hs b/plutus-benchmark/uplc-evaluator/Main.hs
@@ -23,6 +23,7 @@ import Data.Text qualified as T
 import Data.Text.IO qualified as TIO
 import Data.UUID (UUID)
 import Data.UUID qualified as UUID
+import Data.Word (Word64)
 import GHC.Generics (Generic)
 import Main.Utf8 (withUtf8)
 import Options.Applicative
@@ -75,16 +76,14 @@ configParser =
           <> help "Polling interval in milliseconds (default: 1000)"
       )
 
--- | Timing sample for a single evaluation run (variable data only)
-data TimingSample = TimingSample
-  { tsCpuTimeMs :: Double
-  }
+-- | Timing sample for a single evaluation run
+newtype TimingSample = TimingSample {tsCpuTimeNs :: Word64}
   deriving stock (Generic, Show)
 
 instance ToJSON TimingSample where
   toJSON TimingSample {..} =
     Aeson.object
-      [ "cpu_time_ms" .= tsCpuTimeMs
+      [ "cpu_time_ns" .= tsCpuTimeNs
       ]
 
 -- | Successful evaluation result with deterministic budget at top level
@@ -164,15 +163,15 @@ parseUplcProgram input =
     Right prog -> Right $ void prog
 
 {-| Measure execution time of an IO action using criterion-measurement.
-Returns the result and execution time in milliseconds.
+Returns the result and execution time in nanoseconds.
 Uses evaluate+force to ensure the result is fully evaluated before measuring end time. -}
-measureExecution :: NFData a => IO a -> IO (a, Double)
+measureExecution :: NFData a => IO a -> IO (a, Word64)
 measureExecution ioAction = do
   startTime <- getTime
   result <- evaluate . force =<< ioAction
   endTime <- getTime
-  let timeMs = (endTime - startTime) * 1000 -- Convert seconds to milliseconds
-  return (result, timeMs)
+  let timeNs = round ((endTime - startTime) * 1e9) -- Convert seconds to nanoseconds
+  return (result, timeNs)
 
 -- | Result of CEK evaluation with budget information
 data EvalBudget = EvalBudget
@@ -258,13 +257,13 @@ collectMeasurements term sampleCount = do
       let samples = map buildTimingSample timings
       return $ Right (budget, samples)
   where
-    -- Measure a single execution and return wall-clock time in milliseconds
+    -- Measure a single execution and return wall-clock time in nanoseconds
     measureSingleExecution
       :: UPLC.Term UPLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-      -> IO Double
+      -> IO Word64
     measureSingleExecution t = do
-      (_, timeMs) <- measureExecution $ return $! evalTerm t
-      return timeMs
+      (_, timeNs) <- measureExecution $ return $! evalTerm t
+      return timeNs
 
     -- Evaluate term (used for timing, result discarded)
     evalTerm
@@ -273,10 +272,10 @@ collectMeasurements term sampleCount = do
     evalTerm = evaluateWithBudget
 
     -- Build a TimingSample from timing (only variable data)
-    buildTimingSample :: Double -> TimingSample
-    buildTimingSample cpuTimeMs =
+    buildTimingSample :: Word64 -> TimingSample
+    buildTimingSample cpuTimeNs =
       TimingSample
-        { tsCpuTimeMs = cpuTimeMs
+        { tsCpuTimeNs = cpuTimeNs
         }
 
 -- | Process a single program file
diff --git a/plutus-benchmark/uplc-evaluator/SPEC.md b/plutus-benchmark/uplc-evaluator/SPEC.md
@@ -671,7 +671,7 @@ Result files are JSON objects containing evaluation metrics from UPLC program ex
   "memory_bytes": <number>,
   "timing_samples": [
     {
-      "cpu_time_ms": <number>
+      "cpu_time_ns": <integer>
     },
     ...
   ]
@@ -746,15 +746,15 @@ An array of timing samples collected during program evaluation. Each program is
 
 Each timing sample object in the `timing_samples` array contains:
 
-#### `cpu_time_ms` (required)
+#### `cpu_time_ns` (required)
 
-Wall-clock execution time in milliseconds. This measures the actual elapsed time for the CEK machine evaluation.
+Wall-clock execution time in nanoseconds. This measures the actual elapsed time for the CEK machine evaluation.
 
-**Type**: Number (floating-point)
+**Type**: Integer
 
-**Unit**: Milliseconds
+**Unit**: Nanoseconds
 
-**Example**: `0.421`
+**Example**: `421000`
 
 **Note**: This is wall-clock time, not CPU budget units. It reflects actual hardware performance and may vary between runs.
 
@@ -768,25 +768,25 @@ Wall-clock execution time in milliseconds. This measures the actual elapsed time
   "memory_budget": 50000,
   "memory_bytes": 400000,
   "timing_samples": [
-    {"cpu_time_ms": 0.421},
-    {"cpu_time_ms": 0.398},
-    {"cpu_time_ms": 0.415},
-    {"cpu_time_ms": 0.402},
-    {"cpu_time_ms": 0.419},
-    {"cpu_time_ms": 0.408},
-    {"cpu_time_ms": 0.411},
-    {"cpu_time_ms": 0.425},
-    {"cpu_time_ms": 0.403},
-    {"cpu_time_ms": 0.417}
+    {"cpu_time_ns": 421000},
+    {"cpu_time_ns": 398000},
+    {"cpu_time_ns": 415000},
+    {"cpu_time_ns": 402000},
+    {"cpu_time_ns": 419000},
+    {"cpu_time_ns": 408000},
+    {"cpu_time_ns": 411000},
+    {"cpu_time_ns": 425000},
+    {"cpu_time_ns": 403000},
+    {"cpu_time_ns": 417000}
   ]
 }
 ```
 
 **Observations from this example**:
 - 10 timing samples collected
-- `cpu_time_ms` shows variation between runs (0.398 to 0.425 ms)
+- `cpu_time_ns` shows variation between runs (398000 to 425000 ns)
 - Budget values (`cpu_budget`, `memory_budget`, `memory_bytes`) are deterministic and at top level
-- Clients should compute statistics: mean CPU time ≈ 0.412 ms, std dev ≈ 0.009 ms
+- Clients should compute statistics: mean CPU time ≈ 412000 ns (0.412 ms), std dev ≈ 9000 ns
 
 ### Example Result: Complex Program
 
@@ -798,21 +798,21 @@ Wall-clock execution time in milliseconds. This measures the actual elapsed time
   "memory_budget": 10000000,
   "memory_bytes": 80000000,
   "timing_samples": [
-    {"cpu_time_ms": 125.842},
-    {"cpu_time_ms": 123.156},
-    {"cpu_time_ms": 127.934},
-    {"cpu_time_ms": 124.587},
-    {"cpu_time_ms": 126.419},
-    {"cpu_time_ms": 122.738},
-    {"cpu_time_ms": 128.205},
-    {"cpu_time_ms": 125.063},
-    {"cpu_time_ms": 124.178},
-    {"cpu_time_ms": 126.891},
-    {"cpu_time_ms": 123.542},
-    {"cpu_time_ms": 127.319},
-    {"cpu_time_ms": 125.684},
-    {"cpu_time_ms": 124.926},
-    {"cpu_time_ms": 126.437}
+    {"cpu_time_ns": 125842000},
+    {"cpu_time_ns": 123156000},
+    {"cpu_time_ns": 127934000},
+    {"cpu_time_ns": 124587000},
+    {"cpu_time_ns": 126419000},
+    {"cpu_time_ns": 122738000},
+    {"cpu_time_ns": 128205000},
+    {"cpu_time_ns": 125063000},
+    {"cpu_time_ns": 124178000},
+    {"cpu_time_ns": 126891000},
+    {"cpu_time_ns": 123542000},
+    {"cpu_time_ns": 127319000},
+    {"cpu_time_ns": 125684000},
+    {"cpu_time_ns": 124926000},
+    {"cpu_time_ns": 126437000}
   ]
 }
 ```
@@ -850,7 +850,7 @@ Std deviation:    0.009 ms
 - Results are written to `/benchmarking/output/{job_id}.result.json` when evaluation completes successfully
 - Result files remain available until cleanup (see retention policy)
 - Budget values (`cpu_budget`, `memory_budget`, `memory_bytes`) are deterministic for a given program and cost model
-- Wall-clock times (`cpu_time_ms`) may vary between runs due to system load
+- Wall-clock times (`cpu_time_ns`) may vary between runs due to system load
 - Multiple timing samples enable statistical confidence in measurements
 - Budget consumption is independent of hardware performance (cost model is abstract)
 - Clients should parse JSON and extract timing_samples array for statistical analysis
@@ -1501,10 +1501,10 @@ Parse the result JSON and extract metrics:
 
 ```bash
 # Parse result JSON and extract timing samples
-jq '.timing_samples[] | {cpu_time_ms}' ./results/${JOB_ID}.result.json
+jq '.timing_samples[] | {cpu_time_ns}' ./results/${JOB_ID}.result.json
 
-# Compute statistics (mean CPU time)
-jq '[.timing_samples[].cpu_time_ms] | add / length' ./results/${JOB_ID}.result.json
+# Compute statistics (mean CPU time in nanoseconds)
+jq '[.timing_samples[].cpu_time_ns] | add / length' ./results/${JOB_ID}.result.json
 
 # Check budget consumption (deterministic values at top level)
 jq '{cpu_budget, memory_budget, memory_bytes}' ./results/${JOB_ID}.result.json
@@ -1679,7 +1679,7 @@ while [ $TOTAL_WAIT -lt $MAX_TOTAL_WAIT ]; do
     echo "=== Result Summary ==="
     echo "Program ID: $JOB_ID"
     echo "Sample count: $(jq '.timing_samples | length' ./results/${JOB_ID}.result.json)"
-    echo "Mean CPU time: $(jq '[.timing_samples[].cpu_time_ms] | add / length' ./results/${JOB_ID}.result.json) ms"
+    echo "Mean CPU time: $(jq '[.timing_samples[].cpu_time_ns] | add / length / 1000000' ./results/${JOB_ID}.result.json) ms"
     echo "CPU budget: $(jq '.cpu_budget' ./results/${JOB_ID}.result.json) ExCPU"
     echo "Memory budget: $(jq '.memory_budget' ./results/${JOB_ID}.result.json) ExMemory"
     echo ""
diff --git a/plutus-benchmark/uplc-evaluator/test/Spec.hs b/plutus-benchmark/uplc-evaluator/test/Spec.hs
@@ -90,13 +90,13 @@ main = defaultMain $ testGroup "uplc-evaluator integration tests" do
                 ("timing_samples should have 10-20 entries, got " ++ show sampleCount)
                 (sampleCount >= 10 && sampleCount <= 20)
 
-              -- Verify each timing sample has positive cpu_time_ms
+              -- Verify each timing sample has positive cpu_time_ns
               mapM_
                 ( \s -> do
-                    -- Check that cpu_time_ms is in reasonable range
+                    -- Check that cpu_time_ns is in reasonable range
                     assertBool
-                      ("cpu_time_ms should be > 0, got " ++ show (tsCpuTimeMs s))
-                      (tsCpuTimeMs s > 0)
+                      ("cpu_time_ns should be > 0, got " ++ show (tsCpuTimeNs s))
+                      (tsCpuTimeNs s > 0)
                 )
                 (erTimingSamples result)
 
@@ -501,14 +501,14 @@ main = defaultMain $ testGroup "uplc-evaluator integration tests" do
                 ("memory_bytes should be >= 0 and <= 10485760, got " ++ show memBytes)
                 (memBytes >= 0 && memBytes <= 10485760)
 
-              -- Verify each timing sample has cpu_time_ms in expected range
+              -- Verify each timing sample has cpu_time_ns in expected range
               -- Simple programs can evaluate in microseconds
               mapM_
                 ( \s -> do
-                    let cpuTime = tsCpuTimeMs s
+                    let cpuTime = tsCpuTimeNs s
                     assertBool
-                      ("cpu_time_ms should be >= 0 and <= 500.0, got " ++ show cpuTime)
-                      (cpuTime >= 0 && cpuTime <= 500.0)
+                      ("cpu_time_ns should be >= 0 and <= 500000000, got " ++ show cpuTime)
+                      (cpuTime >= 0 && cpuTime <= 500000000)
                 )
                 samples
         ]
@@ -546,13 +546,13 @@ main = defaultMain $ testGroup "uplc-evaluator integration tests" do
                 ("memory_bytes should be > 0, got " ++ show (erMemoryBytes result))
                 (erMemoryBytes result > 0)
 
-              -- cpu_time_ms values may vary (timing is non-deterministic)
+              -- cpu_time_ns values may vary (timing is non-deterministic)
               -- We just verify they exist and are positive
-              let cpuTimes = map tsCpuTimeMs samples
+              let cpuTimes = map tsCpuTimeNs samples
               mapM_
                 ( \t ->
                     assertBool
-                      ("cpu_time_ms should be > 0, got " ++ show t)
+                      ("cpu_time_ns should be > 0, got " ++ show t)
                       (t > 0)
                 )
                 cpuTimes
diff --git a/plutus-benchmark/uplc-evaluator/test/TestHelpers.hs b/plutus-benchmark/uplc-evaluator/test/TestHelpers.hs
@@ -36,22 +36,23 @@ import Data.Text (Text)
 import Data.Text qualified as T
 import Data.UUID (UUID)
 import Data.UUID qualified as UUID
+import Data.Word (Word64)
 import GHC.Generics (Generic)
 import Harness (ServiceHandle (..))
 import System.Directory (doesFileExist)
 import System.FilePath ((</>))
 import Test.Tasty.HUnit (assertFailure)
 
 -- | Timing sample for a single evaluation run (variable data only)
-data TimingSample = TimingSample
-  { tsCpuTimeMs :: Double
+newtype TimingSample = TimingSample
+  { tsCpuTimeNs :: Word64
   }
   deriving stock (Generic, Show, Eq)
 
 instance FromJSON TimingSample where
   parseJSON = Aeson.withObject "TimingSample" \v ->
     TimingSample
-      <$> v .: "cpu_time_ms"
+      <$> v .: "cpu_time_ns"
 
 -- | Successful evaluation result with deterministic budget at top level
 data EvalResult = EvalResult