From a5e5b28ab4d19e8b7967601c60f5d0055e14a096 Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 14:45:55 +0100
Subject: [PATCH 1/7] test: switch to `ReTestItems`

---
 test/Project.toml                             |   1 +
 test/call_func_test.jl                        |  45 +++
 test/execute_on_input_tests.jl                |  82 ++++++
 .../{test_interpret.jl => interpret_tests.jl} |  10 +-
 test/make_interpreter_tests.jl                | 267 ++++++++++++++++++
 test/quality_tests.jl                         |   4 +
 test/runtests.jl                              |  17 +-
 test/test_call_func.jl                        |  41 ---
 test/test_execute_on_input.jl                 |  77 -----
 test/test_make_interpreter.jl                 | 257 -----------------
 10 files changed, 406 insertions(+), 395 deletions(-)
 create mode 100644 test/call_func_test.jl
 create mode 100644 test/execute_on_input_tests.jl
 rename test/{test_interpret.jl => interpret_tests.jl} (85%)
 create mode 100644 test/make_interpreter_tests.jl
 create mode 100644 test/quality_tests.jl
 delete mode 100644 test/test_call_func.jl
 delete mode 100644 test/test_execute_on_input.jl
 delete mode 100644 test/test_make_interpreter.jl

diff --git a/test/Project.toml b/test/Project.toml
index c49e080..91fab29 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -3,5 +3,6 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45"
 HerbGrammar = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7"
 HerbSpecification = "6d54aada-062f-46d8-85cf-a1ceaf058a06"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/call_func_test.jl b/test/call_func_test.jl
new file mode 100644
index 0000000..f1ad6d4
--- /dev/null
+++ b/test/call_func_test.jl
@@ -0,0 +1,45 @@
+@testitem "call_func" begin
+    using HerbInterpret: call_func
+
+    module TestModule
+        export add, mul, greet, no_arg
+
+        add(x, y) = x + y
+
+        mul(x, y, z) = x * y * z
+
+        greet(name, age) = "Hello, $name ! You are $age years old."
+
+        no_arg() = "No arguments!"
+    end
+
+    @testset "Testing call_func" begin
+        @testset "No argument function" begin
+            @test call_func(TestModule, :no_arg) == "No arguments!"
+        end
+
+        @testset "Two-argument function" begin
+            @test call_func(TestModule, :add, 2, 3) == 5
+        end
+
+        @testset "Three-argument function" begin
+            @test call_func(TestModule, :mul, 2, 3, 4) == 24
+        end
+
+        @testset "Function with mixed types" begin
+            @test call_func(TestModule, :greet, "Alice", 25) == "Hello, Alice ! You are 25 years old."
+        end
+
+        @testset "Error cases" begin
+            @testset "Not enough arguments" begin 
+                @test_throws Exception call_func(TestModule, :add, 2)
+            end
+            @testset "Too many arguments" begin
+                @test_throws Exception call_func(TestModule, :mul, 2, 3)
+            end
+            @testset "Function does not exist" begin
+                @test_throws Exception call_func(TestModule, :nonexistent)
+            end
+        end
+    end
+end
diff --git a/test/execute_on_input_tests.jl b/test/execute_on_input_tests.jl
new file mode 100644
index 0000000..cada3fe
--- /dev/null
+++ b/test/execute_on_input_tests.jl
@@ -0,0 +1,82 @@
+@testitem "execute_on_input" begin
+    using HerbGrammar: @csgrammar
+    using HerbCore: @rulenode
+
+    function create_dummy_grammar()
+        g = @csgrammar begin
+            Number = |(1:2)
+            Number = x
+            Number = Number + Number
+            Number = Number * Number
+        end
+        return g
+    end
+
+    function create_dummy_rulenode()
+        return @rulenode 4{3,1}
+    end
+
+
+    @testset verbose = true "Execute on input" begin
+        @testset verbose = true "With SymbolTable and Expr" begin
+            @testset "(tab, expr, dict)" begin
+                @testset "Simple execute_on_input (x + 2)" begin
+                    tab = Dict{Symbol,Any}(:+ => +)
+                    input_dict = Dict(:x => 3)
+                    @test execute_on_input(tab, :(x + 2), input_dict) == 5
+                end
+
+                @testset "Simple execute_on_input (x * x + 2)" begin
+                    tab = Dict{Symbol,Any}(:+ => +, :* => *)
+                    input = 3
+                    f(x) = x * x + 2
+                    input_dict = Dict(:x => input, :f => f)
+                    @test execute_on_input(tab, :(f(x)), input_dict) == f(input)
+                end
+            end
+
+            @testset "(tab, expr, Vector{Dict})" begin
+                @testset "Execute_on_input with multiple inputs" begin
+                    tab = Dict{Symbol,Any}(:+ => +, :* => *)
+                    expr = :(x * 2 + y)
+                    inputs = [
+                        Dict(:x => 1, :y => 2),
+                        Dict(:x => 2, :y => 3),
+                        Dict(:x => 3, :y => 4)
+                    ]
+                    expected_outputs = [4, 7, 10]
+                    @test execute_on_input(tab, expr, inputs) == expected_outputs
+                end
+            end
+        end
+
+        @testset "With grammar and RuleNode" begin
+            grammar = create_dummy_grammar() # integer arithmetic
+            program = create_dummy_rulenode() # :(1+x)
+
+            @testset "(grammar, rulenode, Dict)" begin
+                input_dict = Dict(:x => 5, :y => 3)
+                @test execute_on_input(grammar, program, input_dict) == 6
+            end
+
+            @testset "(grammar, rulenode, Vector{Dict})" begin
+                inputs = [
+                    Dict(:x => 2, :y => 3),
+                    Dict(:x => 4, :y => 1)
+                ]
+                expected_outputs = [3, 5]
+                @test execute_on_input(grammar, program, inputs) == expected_outputs
+            end
+        end
+
+        @testset "Error handling" begin
+            @testset "Invalid expression" begin
+                tab = Dict{Symbol,Any}(:+ => +)
+                input_dict = Dict(:x => "a")
+                @test_throws Exception execute_on_input(tab, :(x + 2), input_dict)
+            end
+        end
+    end
+
+
+end
diff --git a/test/test_interpret.jl b/test/interpret_tests.jl
similarity index 85%
rename from test/test_interpret.jl
rename to test/interpret_tests.jl
index e125fe7..6cdda52 100644
--- a/test/test_interpret.jl
+++ b/test/interpret_tests.jl
@@ -1,4 +1,4 @@
-@testset verbose = true "Interpret Function Tests" begin
+@testitem "Interpret Function Tests" begin
     @testset "Basic Interpretations on Arithmetic Operators" begin
         tab = Dict{Symbol,Any}(:x => 5, :y => 3, :+ => +, :* => *)
         @testset "Interpreting a single variable" begin
@@ -18,12 +18,12 @@
     @testset "Advanced Interpretations" begin
         tab = Dict{Symbol,Any}(
             :x => 2, :y => 4, :+ => +, :- => -, :* => *, :/ => /
-)
+        )
         @testset "Interpreting compound expression (x * y) + y" begin
-            @test interpret(tab, :(x * y + y)) == 12    
+            @test interpret(tab, :(x * y + y)) == 12
         end
         @testset "Interpreting compound expression x / y + (y * x)" begin
-            @test interpret(tab, :(( x / y ) + y * x)) == 8.5  
+            @test interpret(tab, :((x / y) + y * x)) == 8.5
         end
     end
 
@@ -42,7 +42,7 @@
         tab = Dict{Symbol,Any}(:x => "hello", :+ => +)
 
         @testset "Interpreting invalid expressions" begin
-            @test_throws Exception interpret(tab, :(x + 2)) 
+            @test_throws Exception interpret(tab, :(x + 2))
         end
     end
 end
diff --git a/test/make_interpreter_tests.jl b/test/make_interpreter_tests.jl
new file mode 100644
index 0000000..0bb17e5
--- /dev/null
+++ b/test/make_interpreter_tests.jl
@@ -0,0 +1,267 @@
+@testitem "make_interpreter" begin
+    import HerbInterpret: make_interpreter
+    using HerbGrammar: @csgrammar
+    using HerbCore: @rulenode
+    using HerbSpecification: HerbSpecification
+
+    using RuntimeGeneratedFunctions
+    RuntimeGeneratedFunctions.init(@__MODULE__)
+
+    # Small module for testing state-less make_interpret
+    module LocalStringDSL
+    using HerbCore
+    using RuntimeGeneratedFunctions
+    RuntimeGeneratedFunctions.init(LocalStringDSL)
+    concat_cvc(a::String, b::String) = a * b
+    end
+
+    # Simplest stateful grammar
+    module LocalStateDSL
+    using HerbCore
+    using RuntimeGeneratedFunctions
+    RuntimeGeneratedFunctions.init(LocalStateDSL)
+
+    struct St
+        x::Int
+    end
+
+    inc(st::St) = St(st.x + 1)
+    iseven(st::St) = Base.iseven(st.x)
+    end
+
+    # Stateful grammar with if-then-else
+    module LocalStateDSL2
+    using HerbCore
+    using HerbGrammar
+    using RuntimeGeneratedFunctions
+    RuntimeGeneratedFunctions.init(LocalStateDSL2)
+
+    struct St
+        x::Int
+    end
+
+    inc(st::St) = St(st.x + 1)
+    dec(st::St) = St(st.x - 1)
+    iseven(st::St) = Base.iseven(st.x)
+
+    g2 = @csgrammar begin
+        Start = Step
+        Step = IF(Cond, Step, Step)
+        Step = inc()
+        Step = dec()
+        Cond = iseven()
+    end
+    end
+
+    # Stateful grammar with WHILE 
+    module LocalStateDSL3
+    using HerbCore
+    using HerbGrammar
+    using RuntimeGeneratedFunctions
+    RuntimeGeneratedFunctions.init(LocalStateDSL3)
+
+    struct St
+        x::Int
+    end
+
+    inc(st::St) = St(st.x + 1)
+    lt3(st::St) = st.x < 3
+
+    g3 = @csgrammar begin
+        Start = Step
+        Step = WHILE(Cond, Step)
+        Step = inc()
+        Cond = lt3()
+    end
+    end
+
+
+    @testset verbose = true "Test make_interpreter" begin
+        @testset "Test base functionality" begin
+            g = @csgrammar begin
+                Number = |(1:2)
+                Number = x
+                Number = Number + Number
+                Number = Number * Number
+                Number = Number + 1
+                Number = x * 2
+            end
+
+            # Compile once
+            interpret_custom = HerbInterpret.make_interpreter(g; input_symbols=[:x])
+
+            rn = @rulenode(5{4{3,2},7})  # (x + 2) * (x * 2)
+            input = Dict{Symbol,Any}(:x => 1)
+
+            @testset "No input" begin
+                @test interpret_custom(@rulenode(4{1,1})) == 2
+            end
+
+            @testset "Single input dict" begin
+                # Leaves
+                @test interpret_custom(@rulenode(1), input) == 1
+                @test interpret_custom(@rulenode(2), input) == 2
+                @test interpret_custom(@rulenode(3), input) == 1
+
+                # Pure operators
+                @test interpret_custom(@rulenode(4{1,2}), input) == 3   # 1 + 2
+                @test interpret_custom(@rulenode(5{1,2}), input) == 2   # 1 * 2
+
+                # Partial rules
+                @test interpret_custom(@rulenode(6{3}), input) == 2     # x + 1
+                @test interpret_custom(@rulenode(7), input) == 2        # x * 2
+
+                # Composite example
+                @test interpret_custom(rn, input) == 6
+            end
+
+            @testset "Vector of input dicts" begin
+                inputs = [
+                    Dict{Symbol,Any}(:x => 1),
+                    Dict{Symbol,Any}(:x => 3),
+                ]
+                outs = interpret_custom(rn, inputs)
+                @test outs == [6, 30]  # x=1 => 6, x=3 => 30
+            end
+
+            @testset "Single IOExample" begin
+                ex = HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 1), nothing)
+                @test interpret_custom(rn, ex) == 6
+            end
+
+            @testset "Vector of IOExamples" begin
+                exs = [
+                    HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 1), nothing),
+                    HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 3), nothing),
+                ]
+                outs = interpret_custom(rn, exs)
+                @test outs == [6, 30]
+            end
+        end
+
+        @testset "Interpreter uses correct operators from target module" begin
+            # Conflicting operator in caller module: must NOT be used
+            concat_cvc(a::String, b::String) = a * "|" * b
+
+            g = @csgrammar begin
+                Str = s
+                Str = "A"
+                Str = concat_cvc(Str, Str)
+            end
+
+            rn = @rulenode(3{1,2})
+            input = Dict{Symbol,Any}(:s => "X")
+
+            # Compile once, but resolve operators in LocalStringDSL
+            interpret_string = HerbInterpret.make_interpreter(
+                g;
+                input_symbols=[:s],
+                target_module=LocalStringDSL,
+            )
+
+            # Dict form
+            @test interpret_string(rn, input) == "XA"
+
+            # IOExample form (optional extra check)
+            ex = HerbSpecification.IOExample(Dict{Symbol,Any}(:s => "X"), nothing)
+            @test interpret_string(rn, ex) == "XA"
+
+            # Prove caller's concat differs (and is not used)
+            @test concat_cvc("X", "A") == "X|A"
+        end
+
+        @testset "Stateful interpreter generation" begin
+            @testset "Test basic usage in external module" begin
+                # Rule indices:
+                # 1 Start    = Sequence
+                # 2 Sequence = Step
+                # 3 Sequence = (Step; Sequence)
+                # 4 Step     = inc()
+                # 5 Step     = IF(Cond, Step, Step)
+                # 6 Cond     = iseven()
+                g = @csgrammar begin
+                    Start = Sequence
+                    Sequence = Step
+                    Sequence = (Step; Sequence)
+                    Step = inc()
+                    Step = IF(Cond, Step, Step)
+                    Cond = iseven()
+                end
+
+                # Build the interpreter object (RGF-backed)
+                interp = HerbInterpret.make_stateful_interpreter(
+                    g;
+                    target_module=LocalStateDSL,
+                    cache_module=@__MODULE__,
+                )
+
+                # Program: (inc(); inc()) starting from x=0 => x=2
+                # Start=Sequence -> Sequence=(Step;Sequence) -> Step=inc(); Sequence=Step -> Step=inc()
+                prog_two_incs = @rulenode(1{3{4,2{4}}})
+
+                st0 = LocalStateDSL.St(0)
+                out = interp(prog_two_incs, st0)
+                @test out == LocalStateDSL.St(2)
+
+                # Vector-of-states overload
+                outs = interp(prog_two_incs, [LocalStateDSL.St(0), LocalStateDSL.St(10)])
+                @test outs == [LocalStateDSL.St(2), LocalStateDSL.St(12)]
+            end
+
+            @testset "IF semantics in external target module" begin
+                # Build interpreter from grammar that lives in LocalStateDSL2
+                interp2 = HerbInterpret.make_stateful_interpreter(
+                    LocalStateDSL2.g2;
+                    target_module=LocalStateDSL2,
+                    cache_module=@__MODULE__,
+                )
+
+                # Rule indices in LocalStateDSL2.g2:
+                # 1 Start=Step
+                # 2 Step=IF(Cond,Step,Step)
+                # 3 Step=inc()
+                # 4 Step=dec()
+                # 5 Cond=iseven()
+
+                # IF(iseven(), inc(), dec())
+                prog_if = @rulenode(2{5,3,4})
+
+                @test interp2(prog_if, LocalStateDSL2.St(2)) == LocalStateDSL2.St(3)  # even -> inc
+                @test interp2(prog_if, LocalStateDSL2.St(3)) == LocalStateDSL2.St(2)  # odd  -> dec
+
+                # IOExample support (state is in :_arg_1)
+                exs = [
+                    HerbSpecification.IOExample(Dict{Symbol,Any}(:_arg_1 => LocalStateDSL2.St(2)), nothing),
+                    HerbSpecification.IOExample(Dict{Symbol,Any}(:_arg_1 => LocalStateDSL2.St(3)), nothing),
+                ]
+
+                outs_ex = interp2(prog_if, exs)
+                @test outs_ex == [LocalStateDSL2.St(3), LocalStateDSL2.St(2)]
+            end
+
+            @testset "WHILE operator (bounded loop) " begin
+                # Grammar lives in LocalStateDSL3.g3:
+                # 1 Start=Step
+                # 2 Step=WHILE(Cond, Step)
+                # 3 Step=inc()
+                # 4 Cond=lt3()
+
+                interp3 = HerbInterpret.make_stateful_interpreter(
+                    LocalStateDSL3.g3;
+                    target_module=LocalStateDSL3,
+                    cache_module=@__MODULE__,
+                )
+
+                # WHILE(lt3(), inc())
+                prog_while = @rulenode(2{4,3})
+
+                @test interp3(prog_while, LocalStateDSL3.St(0)) == LocalStateDSL3.St(3)
+                @test interp3(prog_while, LocalStateDSL3.St(2)) == LocalStateDSL3.St(3)
+
+                # Vector-of-states
+                outs = interp3(prog_while, [LocalStateDSL3.St(0), LocalStateDSL3.St(1), LocalStateDSL3.St(3)])
+                @test outs == [LocalStateDSL3.St(3), LocalStateDSL3.St(3), LocalStateDSL3.St(3)]
+            end
+        end
+    end
+end
diff --git a/test/quality_tests.jl b/test/quality_tests.jl
new file mode 100644
index 0000000..c44d763
--- /dev/null
+++ b/test/quality_tests.jl
@@ -0,0 +1,4 @@
+@testitem "Quality tests" begin
+    using Aqua
+    @testset "Aqua" Aqua.test_all(HerbInterpret)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index d3aeb46..89abb6f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,16 +1,3 @@
-using Aqua
+using ReTestItems: @testitem, runtests
 using HerbInterpret
-using HerbInterpret: call_func
-using HerbCore
-using HerbGrammar
-using HerbSpecification
-using Test
-
-
-@testset verbose = true "HerbInterpret.jl" begin
-    @testset "Aqua" Aqua.test_all(HerbInterpret)
-    include("test_execute_on_input.jl")
-    include("test_interpret.jl")
-    include("test_call_func.jl")
-    include("test_make_interpreter.jl")
-end
+runtests(HerbInterpret)
diff --git a/test/test_call_func.jl b/test/test_call_func.jl
deleted file mode 100644
index 3de5206..0000000
--- a/test/test_call_func.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-module TestModule
-    export add, mul, greet, no_arg
-
-    add(x, y) = x + y
-
-    mul(x, y, z) = x * y * z
-
-    greet(name, age) = "Hello, $name ! You are $age years old."
-
-    no_arg() = "No arguments!"
-end
-
-@testset "Testing call_func" begin
-    @testset "No argument function" begin
-        @test call_func(TestModule, :no_arg) == "No arguments!"
-    end
-
-    @testset "Two-argument function" begin
-        @test call_func(TestModule, :add, 2, 3) == 5
-    end
-
-    @testset "Three-argument function" begin
-        @test call_func(TestModule, :mul, 2, 3, 4) == 24
-    end
-
-    @testset "Function with mixed types" begin
-        @test call_func(TestModule, :greet, "Alice", 25) == "Hello, Alice ! You are 25 years old."
-    end
-
-    @testset "Error cases" begin
-        @testset "Not enough arguments" begin 
-            @test_throws Exception call_func(TestModule, :add, 2)
-        end
-        @testset "Too many arguments" begin
-            @test_throws Exception call_func(TestModule, :mul, 2, 3)
-        end
-        @testset "Function does not exist" begin
-            @test_throws Exception call_func(TestModule, :nonexistent)
-        end
-    end
-end
\ No newline at end of file
diff --git a/test/test_execute_on_input.jl b/test/test_execute_on_input.jl
deleted file mode 100644
index d454dff..0000000
--- a/test/test_execute_on_input.jl
+++ /dev/null
@@ -1,77 +0,0 @@
-function create_dummy_grammar()
-    g = @cfgrammar begin
-       Number = |(1:2)
-       Number = x
-       Number = Number + Number
-       Number = Number * Number
-    end
-    return g
-end
-
-function create_dummy_rulenode()
-    return @rulenode 4{3,1}
-end
-
-
-@testset verbose=true "Execute on input" begin
-    @testset verbose=true "With SymbolTable and Expr" begin
-        @testset "(tab, expr, dict)" begin
-            @testset "Simple execute_on_input (x + 2)" begin
-                tab = Dict{Symbol,Any}(:+ => +)
-                input_dict = Dict(:x => 3)
-                @test execute_on_input(tab, :(x + 2), input_dict) == 5
-            end
-
-            @testset "Simple execute_on_input (x * x + 2)" begin
-                tab = Dict{Symbol,Any}(:+ => +, :* => *)
-                input = 3
-                f(x) = x * x + 2
-                input_dict = Dict(:x => input,:f => f)
-                @test execute_on_input(tab, :(f(x)), input_dict) == f(input)
-            end
-        end
-
-        @testset "(tab, expr, Vector{Dict})" begin
-            @testset "Execute_on_input with multiple inputs" begin
-                tab = Dict{Symbol,Any}(:+ => +, :* => *)
-                expr = :(x * 2 + y)
-                inputs = [
-                    Dict(:x => 1, :y => 2),
-                    Dict(:x => 2, :y => 3),
-                    Dict(:x => 3, :y => 4)
-                ]
-                expected_outputs = [4, 7, 10]
-                @test execute_on_input(tab, expr, inputs) == expected_outputs
-            end
-        end
-    end
-
-    @testset "With grammar and RuleNode" begin
-        grammar = create_dummy_grammar() # integer arithmetic
-        program = create_dummy_rulenode() # :(1+x)
-
-        @testset "(grammar, rulenode, Dict)" begin
-            input_dict = Dict(:x => 5, :y => 3)
-            @test execute_on_input(grammar, program, input_dict) == 6
-        end
-
-        @testset "(grammar, rulenode, Vector{Dict})" begin
-            inputs = [
-                Dict(:x => 2, :y => 3),
-                Dict(:x => 4, :y => 1)
-            ]
-            expected_outputs = [3, 5]
-            @test execute_on_input(grammar, program, inputs) == expected_outputs
-        end
-    end
-
-    @testset "Error handling" begin
-        @testset "Invalid expression" begin
-            tab = Dict{Symbol,Any}(:+ => +)
-            input_dict = Dict(:x => "a")
-            @test_throws Exception execute_on_input(tab, :(x + 2), input_dict)
-        end
-    end
-end
-
-
diff --git a/test/test_make_interpreter.jl b/test/test_make_interpreter.jl
deleted file mode 100644
index 47fb27c..0000000
--- a/test/test_make_interpreter.jl
+++ /dev/null
@@ -1,257 +0,0 @@
-import HerbInterpret: make_interpreter
-using RuntimeGeneratedFunctions
-RuntimeGeneratedFunctions.init(@__MODULE__)
-
-# Small module for testing state-less make_interpret
-module LocalStringDSL
-    using HerbCore
-    using RuntimeGeneratedFunctions
-    RuntimeGeneratedFunctions.init(LocalStringDSL)
-    concat_cvc(a::String, b::String) = a * b
-end
-
-# Simplest stateful grammar
-module LocalStateDSL
-    using HerbCore
-    using RuntimeGeneratedFunctions
-    RuntimeGeneratedFunctions.init(LocalStateDSL)
- 
-    struct St
-        x::Int
-    end
-
-    inc(st::St) = St(st.x + 1)
-    iseven(st::St) = Base.iseven(st.x)
-end
-
-# Stateful grammar with if-then-else
-module LocalStateDSL2
-    using HerbCore
-    using HerbGrammar
-    using RuntimeGeneratedFunctions
-    RuntimeGeneratedFunctions.init(LocalStateDSL2)
- 
-    struct St
-        x::Int
-    end
-
-    inc(st::St) = St(st.x + 1)
-    dec(st::St) = St(st.x - 1)
-    iseven(st::St) = Base.iseven(st.x)
-
-    g2 = @cfgrammar begin
-        Start = Step
-        Step  = IF(Cond, Step, Step)
-        Step  = inc()
-        Step  = dec()
-        Cond  = iseven()
-    end
-end
-
-# Stateful grammar with WHILE 
-module LocalStateDSL3
-    using HerbCore
-    using HerbGrammar
-    using RuntimeGeneratedFunctions
-    RuntimeGeneratedFunctions.init(LocalStateDSL3)
- 
-    struct St
-        x::Int
-    end
-
-    inc(st::St) = St(st.x + 1)
-    lt3(st::St) = st.x < 3
-
-    g3 = @cfgrammar begin
-        Start = Step
-        Step  = WHILE(Cond, Step)
-        Step  = inc()
-        Cond  = lt3()
-    end
-end
-
-
-@testset verbose=true "Test make_interpreter" begin
-    @testset "Test base functionality" begin
-        g = @cfgrammar begin
-            Number = |(1:2)
-            Number = x
-            Number = Number + Number
-            Number = Number * Number
-            Number = Number + 1
-            Number = x * 2
-        end
-
-        # Compile once
-        interpret_custom = HerbInterpret.make_interpreter(g; input_symbols=[:x])
-
-        rn = @rulenode(5{4{3,2},7})  # (x + 2) * (x * 2)
-        input = Dict{Symbol,Any}(:x => 1)
-
-        @testset "Single input dict" begin
-            # Leaves
-            @test interpret_custom(@rulenode(1), input) == 1
-            @test interpret_custom(@rulenode(2), input) == 2
-            @test interpret_custom(@rulenode(3), input) == 1
-
-            # Pure operators
-            @test interpret_custom(@rulenode(4{1,2}), input) == 3   # 1 + 2
-            @test interpret_custom(@rulenode(5{1,2}), input) == 2   # 1 * 2
-
-            # Partial rules
-            @test interpret_custom(@rulenode(6{3}), input) == 2     # x + 1
-            @test interpret_custom(@rulenode(7), input) == 2        # x * 2
-
-            # Composite example
-            @test interpret_custom(rn, input) == 6
-        end
-
-        @testset "Vector of input dicts" begin
-            inputs = [
-                Dict{Symbol,Any}(:x => 1),
-                Dict{Symbol,Any}(:x => 3),
-            ]
-            outs = interpret_custom(rn, inputs)
-            @test outs == [6, 30]  # x=1 => 6, x=3 => 30
-        end
-
-        @testset "Single IOExample" begin
-            ex = HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 1), nothing)
-            @test interpret_custom(rn, ex) == 6
-        end
-
-        @testset "Vector of IOExamples" begin
-            exs = [
-                HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 1), nothing),
-                HerbSpecification.IOExample(Dict{Symbol,Any}(:x => 3), nothing),
-            ]
-            outs = interpret_custom(rn, exs)
-            @test outs == [6, 30]
-        end
-    end
-
-    @testset "Interpreter uses correct operators from target module" begin
-        # Conflicting operator in caller module: must NOT be used
-        concat_cvc(a::String, b::String) = a * "|" * b
-
-        g = @cfgrammar begin
-            Str = s
-            Str = "A"
-            Str = concat_cvc(Str, Str)
-        end
-
-        rn = @rulenode(3{1,2})
-        input = Dict{Symbol,Any}(:s => "X")
-
-        # Compile once, but resolve operators in LocalStringDSL
-        interpret_string = HerbInterpret.make_interpreter(
-            g;
-            input_symbols=[:s],
-            target_module=LocalStringDSL,
-        )
-
-        # Dict form
-        @test interpret_string(rn, input) == "XA"
-
-        # IOExample form (optional extra check)
-        ex = HerbSpecification.IOExample(Dict{Symbol,Any}(:s => "X"), nothing)
-        @test interpret_string(rn, ex) == "XA"
-
-        # Prove caller's concat differs (and is not used)
-        @test concat_cvc("X", "A") == "X|A"
-    end
-
-    @testset "Stateful interpreter generation" begin
-        @testset "Test basic usage in external module" begin
-            # Rule indices:
-            # 1 Start    = Sequence
-            # 2 Sequence = Step
-            # 3 Sequence = (Step; Sequence)
-            # 4 Step     = inc()
-            # 5 Step     = IF(Cond, Step, Step)
-            # 6 Cond     = iseven()
-            g = @cfgrammar begin
-                Start    = Sequence
-                Sequence = Step
-                Sequence = (Step; Sequence)
-                Step     = inc()
-                Step     = IF(Cond, Step, Step)
-                Cond     = iseven()
-            end
-
-            # Build the interpreter object (RGF-backed)
-            interp = HerbInterpret.make_stateful_interpreter(
-                g;
-                target_module = LocalStateDSL,
-                cache_module  = @__MODULE__,
-            )
-
-            # Program: (inc(); inc()) starting from x=0 => x=2
-            # Start=Sequence -> Sequence=(Step;Sequence) -> Step=inc(); Sequence=Step -> Step=inc()
-            prog_two_incs = @rulenode(1{3{4,2{4}}})
-
-            st0 = LocalStateDSL.St(0)
-            out = interp(prog_two_incs, st0)
-            @test out == LocalStateDSL.St(2)
-
-            # Vector-of-states overload
-            outs = interp(prog_two_incs, [LocalStateDSL.St(0), LocalStateDSL.St(10)])
-            @test outs == [LocalStateDSL.St(2), LocalStateDSL.St(12)]
-        end
-
-        @testset "IF semantics in external target module" begin
-            # Build interpreter from grammar that lives in LocalStateDSL2
-            interp2 = HerbInterpret.make_stateful_interpreter(
-                LocalStateDSL2.g2;
-                target_module = LocalStateDSL2,
-                cache_module  = @__MODULE__,
-            )
-
-            # Rule indices in LocalStateDSL2.g2:
-            # 1 Start=Step
-            # 2 Step=IF(Cond,Step,Step)
-            # 3 Step=inc()
-            # 4 Step=dec()
-            # 5 Cond=iseven()
-
-            # IF(iseven(), inc(), dec())
-            prog_if = @rulenode(2{5,3,4})
-
-            @test interp2(prog_if, LocalStateDSL2.St(2)) == LocalStateDSL2.St(3)  # even -> inc
-            @test interp2(prog_if, LocalStateDSL2.St(3)) == LocalStateDSL2.St(2)  # odd  -> dec
-
-            # IOExample support (state is in :_arg_1)
-            exs = [
-                HerbSpecification.IOExample(Dict{Symbol,Any}(:_arg_1 => LocalStateDSL2.St(2)), nothing),
-                HerbSpecification.IOExample(Dict{Symbol,Any}(:_arg_1 => LocalStateDSL2.St(3)), nothing),
-            ]
-
-            outs_ex = interp2(prog_if, exs)
-            @test outs_ex == [LocalStateDSL2.St(3), LocalStateDSL2.St(2)]
-        end
-
-        @testset "WHILE operator (bounded loop) " begin
-            # Grammar lives in LocalStateDSL3.g3:
-            # 1 Start=Step
-            # 2 Step=WHILE(Cond, Step)
-            # 3 Step=inc()
-            # 4 Cond=lt3()
-
-            interp3 = HerbInterpret.make_stateful_interpreter(
-                LocalStateDSL3.g3;
-                target_module = LocalStateDSL3,
-                cache_module  = @__MODULE__,
-            )
-
-            # WHILE(lt3(), inc())
-            prog_while = @rulenode(2{4,3})
-
-            @test interp3(prog_while, LocalStateDSL3.St(0)) == LocalStateDSL3.St(3)
-            @test interp3(prog_while, LocalStateDSL3.St(2)) == LocalStateDSL3.St(3)
-
-            # Vector-of-states
-            outs = interp3(prog_while, [LocalStateDSL3.St(0), LocalStateDSL3.St(1), LocalStateDSL3.St(3)])
-            @test outs == [LocalStateDSL3.St(3), LocalStateDSL3.St(3), LocalStateDSL3.St(3)]
-        end
-    end
-end
\ No newline at end of file

From 71b86b544c7db4fdbd0fbdaf3854de0f6633fac0 Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 14:46:35 +0100
Subject: [PATCH 2/7] feat: add signature for interpreters with no input

---
 src/make_interpret.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/make_interpret.jl b/src/make_interpret.jl
index 5f1cf55..682548f 100644
--- a/src/make_interpret.jl
+++ b/src/make_interpret.jl
@@ -126,6 +126,11 @@ struct GeneratedInterpreter{F}
     core::F
 end
 
+# No input
+function (gi::GeneratedInterpreter)(prog::HerbCore.AbstractRuleNode)
+    return gi.core(gi.core, prog, nothing)
+end
+
 # Single input
 function (gi::GeneratedInterpreter)(prog::HerbCore.AbstractRuleNode,
                                    input::AbstractDict{Symbol,Any})

From 48a66e2e5a9e6f85ad306d10ee8724f029ef1a3e Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 14:47:59 +0100
Subject: [PATCH 3/7] fix: relax type bounds on input for interpreters

---
 src/make_interpret.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/make_interpret.jl b/src/make_interpret.jl
index 682548f..2e47f86 100644
--- a/src/make_interpret.jl
+++ b/src/make_interpret.jl
@@ -133,13 +133,13 @@ end
 
 # Single input
 function (gi::GeneratedInterpreter)(prog::HerbCore.AbstractRuleNode,
-                                   input::AbstractDict{Symbol,Any})
+                                   input::AbstractDict{Symbol,<:Any})
     return gi.core(gi.core, prog, input)
 end
 
 # Vector of inputs
 function (gi::GeneratedInterpreter)(prog::HerbCore.AbstractRuleNode,
-                                   inputs::AbstractVector{<:AbstractDict{Symbol,Any}})
+                                   inputs::AbstractVector{<:AbstractDict{Symbol,<:Any}})
     return (gi.core).((gi.core,), (prog,), inputs)   # broadcasts (self, prog, input)
 end
 
@@ -165,9 +165,9 @@ The returned value is a callable `GeneratedInterpreter` (a small wrapper around
 `RuntimeGeneratedFunctions.RuntimeGeneratedFunction`) that can be applied to:
 
 - a single input dictionary:
-  `interp(prog, input::AbstractDict{Symbol,Any})`
+  `interp(prog, input::AbstractDict{Symbol,<:Any})`
 - a vector of input dictionaries:
-  `interp(prog, inputs::AbstractVector{<:AbstractDict{Symbol,Any}})`
+  `interp(prog, inputs::AbstractVector{<:AbstractDict{Symbol,<:Any}})`
 - a single `HerbSpecification.IOExample`:
   `interp(prog, ex::IOExample)` (uses `ex.in`)
 - a vector of `IOExample`s:

From 92fcc489c8ca85bd01cec303cf6d40d7054f6af2 Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 14:52:24 +0100
Subject: [PATCH 4/7] chore: bump minor version number

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 3ea2e2d..14f916c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "HerbInterpret"
 uuid = "5bbddadd-02c5-4713-84b8-97364418cca7"
-version = "1.0.1"
+version = "1.1.0"
 authors = ["Tilman Hinnerichs <t.r.hinnerichs@tudelft.nl>", "Jaap de Jong <jaapdejong15@gmail.com>", "Sebastijan Dumancic <s.dumancic@tudelft.nl>", "Reuben Gardos Reid <R.J.GardosReid@tudelft.nl>"]
 
 [deps]

From 64b2c40823112cfe402989224a2e811bb278a5a4 Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 15:01:44 +0100
Subject: [PATCH 5/7] benchmarks: update the lone benchmark to generated
 interpret

---
 benchmark/README.md     | 16 ++++++++++++----
 benchmark/benchmarks.jl | 17 ++++-------------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/benchmark/README.md b/benchmark/README.md
index 86b18bb..bd8f2be 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -1,13 +1,21 @@
 # `HerbInterpret` Benchmarks
 
-This directory contains a small benchmark suite for `HerbInterpret` to protect against performance regressions.
+This directory contains a small benchmark suite for `HerbInterpret` to protect
+against performance regressions.
 
-The suite is constructed in `benchmarks.jl` using [`BenchmarkTools.jl`](https://juliaci.github.io/BenchmarkTools.jl/stable/).
+The suite is constructed in `benchmarks.jl` using
+[`BenchmarkTools.jl`](https://juliaci.github.io/BenchmarkTools.jl/stable/).
 
-The suite is assigned to a constant, `SUITE`. Running `benchmarks.jl` does *not* run the benchmark, that is the job of tools like [`AirspeedVelocity`](https://juliahub.com/ui/Packages/General/AirspeedVelocity) and [`PkgBenchmark`](https://juliahub.com/ui/Packages/General/PkgBenchmark). These are tools you might want to install in your global Julia environment. With `AirspeedVelocity`, you can run the benchmarks like this:
+The suite is assigned to a constant, `SUITE`. Running `benchmarks.jl` does
+*not* run the benchmark, that is the job of tools like
+[`AirspeedVelocity`](https://juliahub.com/ui/Packages/General/AirspeedVelocity)
+and [`PkgBenchmark`](https://juliahub.com/ui/Packages/General/PkgBenchmark).
+These are tools you might want to install in your global Julia environment.
+With `AirspeedVelocity`, you can run the benchmarks like this:
 
 ```sh
 benchpkg HerbInterpret --rev=v0.1.7,dirty --path=.
 ```
 
-where the path points to the base directory of `HerbInterpret.jl` (meaning it should be `--path=..` if you're in the `benchmark` directory).
\ No newline at end of file
+where the path points to the base directory of `HerbInterpret.jl` (meaning it
+should be `--path=..` if you're in the `benchmark` directory).
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 00ccc08..30812d3 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -2,13 +2,8 @@ using BenchmarkTools
 using Random: seed!
 using HerbGrammar: @csgrammar, grammar2symboltable, rulenode2expr
 using HerbCore: RuleNode
-# currently this is defined in Search, but should ideally have a definition in Core
-# because having to add Search here in the benchmark environment creates annoying
-# circular dependencies. For now, store the expressions tested in `exprs.jl`
-# using HerbSearch: rand
-using HerbInterpret: interpret
-
-include("exprs.jl")
+using HerbSearch: BFSIterator
+using HerbInterpret: make_interpreter
 
 function create_interpret_benchmark()
     suite = BenchmarkGroup()
@@ -20,12 +15,8 @@ function create_interpret_benchmark()
         Var = |(0:5)
     end
 
-    # once we move random RuleNode sampling to Core
-    # exprs = [rulenode2expr(rand(RuleNode, g), g) for _ in 1:1000]
-
-    st = grammar2symboltable(g)
-
-    suite["Random Expressions"] = @benchmarkable interpret.(($st,), $EXPRS)
+    interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=@__MODULE__)
+    suite["Random Expressions"] = @benchmarkable interpret.($EXPRS)
 
     return suite
 end

From e34852616cb3d7c41c7dbf91a89909e7373adf52 Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Thu, 26 Mar 2026 18:08:42 +0100
Subject: [PATCH 6/7] [WIP] switched benchmark module to the interpret-calling
 module, generated now much faster--maybe too fast??

---
 benchmark/Project.toml  |   5 +
 benchmark/benchmarks.jl |  49 +++++-
 benchmark/exprs.jl      | 354 ----------------------------------------
 3 files changed, 47 insertions(+), 361 deletions(-)
 delete mode 100644 benchmark/exprs.jl

diff --git a/benchmark/Project.toml b/benchmark/Project.toml
index ecbe082..0ac69fe 100644
--- a/benchmark/Project.toml
+++ b/benchmark/Project.toml
@@ -1,4 +1,9 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+HerbBenchmarks = "eadf8b74-d38a-4b1a-a063-8d36e493d376"
 HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45"
 HerbGrammar = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7"
+HerbSearch = "3008d8e8-f9aa-438a-92ed-26e9c7b4829f"
+
+[sources]
+HerbBenchmarks = {url = "https://github.com/Herb-AI/HerbBenchmarks.jl", rev = "e8cd880"}
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 30812d3..82aedc4 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -1,13 +1,17 @@
 using BenchmarkTools
-using Random: seed!
-using HerbGrammar: @csgrammar, grammar2symboltable, rulenode2expr
-using HerbCore: RuleNode
+using HerbGrammar: @csgrammar, expr2rulenode, grammar2symboltable, rulenode2expr
+using HerbCore: HerbCore, RuleNode
 using HerbSearch: BFSIterator
-using HerbInterpret: make_interpreter
+using HerbConstraints: freeze_state
+using HerbInterpret: make_interpreter, execute_on_input
+using HerbBenchmarks: PBE_BV_Track_2018 as BV
+using HerbBenchmarks: PBE_SLIA_Track_2019 as SLIA
+using HerbBenchmarks: get_problem_grammar_pair
+using RuntimeGeneratedFunctions
+RuntimeGeneratedFunctions.init(@__MODULE__)
 
 function create_interpret_benchmark()
     suite = BenchmarkGroup()
-    seed!(42) # keep random expressions constant
     g = @csgrammar begin
         Var = Var + Var
         Var = Var * Var
@@ -15,8 +19,37 @@ function create_interpret_benchmark()
         Var = |(0:5)
     end
 
-    interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=@__MODULE__)
-    suite["Random Expressions"] = @benchmarkable interpret.($EXPRS)
+    # rns = BFSIterator(g, :Var; max_depth=3)
+    #
+    # interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=@__MODULE__)
+    # suite["Random Expressions"] = @benchmarkable interpret.($rns)
+
+    return suite
+end
+
+function create_herbbench_benchmark(benchmark_module, problem_name)
+    suite = BenchmarkGroup()
+    pgp = get_problem_grammar_pair(benchmark_module, problem_name)
+    spec = pgp.problem.spec
+    g = pgp.grammar
+    st = grammar2symboltable(g, benchmark_module)
+    @info "Collecting expressions to benchmark" mod = benchmark_module prob = problem_name
+    it = BFSIterator(g, :Start; max_depth=4, max_size=8)
+    interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=benchmark_module)
+
+    rns = [freeze_state(p) for p in it]
+    @info "Expressions collected" length(rns) type = typeof(rns) examples = rns
+
+    suite["$(length(rns)) expressions"]["generated"] = @benchmarkable try
+        $interpret.($rns, ($spec,))
+    catch
+    end
+
+    suite["$(length(rns)) expressions"]["rulenode2expr"] = @benchmarkable try
+        exprs = rulenode2expr.($rns, ($g,))
+        execute_on_input.(exprs, ($spec,))
+    catch
+    end
 
     return suite
 end
@@ -24,6 +57,8 @@ end
 function create_benchmarks()
     suite = BenchmarkGroup()
     suite["interpret"] = create_interpret_benchmark()
+    suite["HerbBenchmark grammars"]["BV"] = create_herbbench_benchmark(BV, "PRE_100_10")
+    suite["HerbBenchmark grammars"]["SLIA"] = create_herbbench_benchmark(SLIA, "11604909")
     return suite
 end
 
diff --git a/benchmark/exprs.jl b/benchmark/exprs.jl
deleted file mode 100644
index 6df26d0..0000000
--- a/benchmark/exprs.jl
+++ /dev/null
@@ -1,354 +0,0 @@
-const EXPRS = [
-    :(0 / 2),
-    :(1 / (5 / 0)),
-    :(4 * 3),
-    :(5 * 5),
-    :(2 / 3),
-    :((4 + 3) * (0 / 3 + 2 * 1)),
-    :(5 + ((5 + 5) + (1 + 5 / 2))),
-    :((2 * 0) / 5),
-    :(5 / 0),
-    :((5 * 0) / 4),
-    :(2 / 4),
-    :((4 + 1) * 4),
-    :(1 + 2),
-    :(4 * ((1 + 5) / 3 + 1 / (5 * 1))),
-    :(1 / 4),
-    :(2 + 5 * 1),
-    :(1 / 5),
-    :(3 * (3 + (3 + 1) * (5 / ((4 * (2 / (4 + 1) + 1 * 5)) * ((1 / (5 * 2)) / 0))))),
-    :((1 + 2 / 5) / (0 / 2)),
-    :((((0 / (5 * (5 + 4)) + (3 + (1 + 5))) / (3 + 0)) * (5 / 1)) / 1),
-    :(0 / 2),
-    :(3 + 1),
-    :((1 / 3 + 2 * 4) / 1),
-    :(0 * (4 * (1 * 5))),
-    :(1 + 4),
-    :(4 / 0),
-    :(4 + 5),
-    :(0 + 4),
-    :((4 * 1) * ((1 + 5) / ((2 * 2 + (5 / 3) / (4 * 4)) / 4))),
-    :(4 * 4),
-    :(2 / 2 + 2),
-    :((3 / 0) * (2 / (0 * 3))),
-    :(2 + 5),
-    :(4 * ((2 + 0 * (0 + 0)) + 0)),
-    :(
-        (
-            ((5 / (4 * 0)) * 3) * (4 + 2) +
-            1 * (((5 + 0) + 1 / ((2 / 2) / 0 + 1)) * (2 * 2))
-        ) * 2
-    ),
-    :(3 * (4 / 0)),
-    :(3 + ((3 / (0 + 0) + 3) + 3) / 0),
-    :(3 * ((0 + 1) * ((5 + 0 / 2) * 3))),
-    :((4 / ((((1 * 4) / 0) * 5) / (0 + 3))) / ((2 + 2) / 0) + 5 / 5),
-    :(5 / ((0 / 2 + 2) / 2)),
-    :((0 / 2) * (2 + 1 * 4)),
-    :((1 / 3) / 3),
-    :(1 * 1),
-    :(0 / 3),
-    :(0 + 0),
-    :(((2 + 2) * ((1 / 2) * 4)) * 0),
-    :((1 + 5) + 2),
-    :(3 / ((1 / (((3 / 5) * 3) * 3)) * 3) + 5),
-    :(5 * ((0 / 3) * (((3 + 3) / (4 + 2)) * 3))),
-    :(((4 + 3) + 0) * 5),
-    :((2 + 2) / 0),
-    :((4 / 4) * ((0 * 1) * (1 / 2))),
-    :(3 + (0 + 5)),
-    :(
-        (
-            (
-                3 * (5 + 1) +
-                (4 / 3) * ((((3 + 2) * (1 + 2)) / 5) * ((5 / (4 + 0)) / 5) + 3 / 4)
-            ) + 1
-        ) / 5
-    ),
-    :(3 / 5),
-    :(3 * 4),
-    :(0 * (0 * 5)),
-    :(1 / (0 / 3)),
-    :(0 * 0),
-    :(1 + 4),
-    :(3 * (5 + 5 * 0)),
-    :(5 * 5),
-    :(0 / 5),
-    :((4 / (5 + 3 / 4)) * (0 / 0)),
-    :((5 * 0) / (0 + 0)),
-    :(5 * 4),
-    :(((((3 * ((1 * 0 + (0 * 1) * (1 * 2)) * 5)) * (3 + (4 + 0) / 2)) * 2) / 1) * 4),
-    :(((3 / 4) / 2) / 3),
-    :(3 + (4 + ((4 / 2) * 4) * (1 + 2))),
-    :(3 * 5),
-    :((2 * (1 / 0)) * 2),
-    :(1 / ((2 + 3) / 4) + 0),
-    :((5 * 3) * (((5 * 5) * 0) / 5) + 0),
-    :(5 / 3),
-    :(4 / 1),
-    :(5 / 1),
-    :(2 + 3),
-    :(((0 * 4) / 3) / 0),
-    :(3 * (4 * ((2 / (5 * 0)) / 4))),
-    :(1 + 3),
-    :(2 * 3),
-    :(2 + 5),
-    :((5 + 3 / 4) * (5 * 4)),
-    :(1 / 0),
-    :(2 * 4),
-    :(5 / 3),
-    :(0 + 0),
-    :(1 * (1 + (4 + 1))),
-    :(5 / 5),
-    :(4 + 5 * 0),
-    :(3 + 1 * (((0 + 1 * ((3 + 2) * 4 + 0)) / 1) / 0)),
-    :(1 * ((0 / 5) * (5 / 3))),
-    :(4 + 4),
-    :((5 / 4 + 0) * 0),
-    :(((4 + 1) + 4) + ((((0 / 5) / 2) * 2) * 1 + 3)),
-    :(4 * 0),
-    :(5 * 2 + 5),
-    :(5 + (0 * (3 + 1) + 1)),
-    :((1 + 3) + (1 + 3)),
-    :(2 * 1),
-    :(3 * 1),
-    :(1 / 3),
-    :(2 * 2),
-    :(3 + (3 * 1) / 1),
-    :(4 / 4 + 3),
-    :(3 * 2),
-    :(3 / (3 + 4)),
-    :(3 + 1),
-    :(((0 + 4) * 3) * (5 * 5)),
-    :(2 / 2),
-    :(5 * (2 / 3)),
-    :((4 * 5 + 4) * 2),
-    :(5 * 3),
-    :(4 / 2),
-    :(
-        ((2 * 1) / 3) / (
-            (((1 + 0) + ((2 + 5) / 5 + (4 * 0 + 0))) * ((2 * 1) / 5)) *
-            ((1 * ((((2 + 5) + 5) + 2) + 1)) / 5)
-        )
-    ),
-    :(1 * 3),
-    :(0 + 0),
-    :(2 / 4),
-    :(1 * (5 / 2)),
-    :(2 / (4 / 4)),
-    :((3 * 3) / 0),
-    :(2 + 2 * ((((1 + 4) * 2) / (1 * 1)) / 3)),
-    :((1 + 2) / (5 * 4)),
-    :(1 / 5 + 1),
-    :((1 * 0) * (3 * (5 * 4) + 5 / (1 + 4))),
-    :(5 + 5),
-    :(0 + 2),
-    :(2 * 1),
-    :(5 + 1),
-    :((4 * 1) * (5 / 3)),
-    :(1 + 5),
-    :(2 * 3),
-    :(3 * (1 / 3)),
-    :(2 * (3 * 4)),
-    :(0 * 5),
-    :((((1 / 5) / 2) * 3) / 4),
-    :(5 * 4),
-    :(((0 + ((5 * 5) * 2 + 1)) + 1) / 0),
-    :(5 * (1 / 0)),
-    :(0 / (3 + 2)),
-    :(5 / 0),
-    :(4 / (5 / 5)),
-    :(1 * (3 / ((5 + (((5 / 3 + 4) / 4) / 2) * 2) / 3))),
-    :(3 * 1),
-    :(2 + 5 * 2),
-    :(4 / 0),
-    :(
-        (1 + 0 * 2) * ((5 + 2) / 0 + (3 * 3) * 5) +
-        ((0 * (4 / 1 + 1)) / (((1 / 3 + 0) + 2) + 4) + 0)
-    ),
-    :(((4 * 4) / 5) * 1),
-    :((4 * 2) * 0),
-    :(5 * ((0 + 5) / 4)),
-    :(2 * 0),
-    :(1 + 5),
-    :(0 + 2),
-    :(5 * (1 / (1 + 5))),
-    :(3 / 5),
-    :((5 + 4) + 5 * 0),
-    :(5 * 5),
-    :(5 + 5),
-    :(0 + 3 / (4 * 0)),
-    :(5 * 0),
-    :(1 / 4),
-    :(2 + 5),
-    :(4 / ((5 / 2) / ((4 + 4) / 2) + 2)),
-    :(3 / (1 * 0)),
-    :(4 * 1),
-    :(4 + 5),
-    :(0 + 1),
-    :(3 / 1),
-    :(1 / (4 + 5)),
-    :(((0 + 1) / (0 * 4)) / 2),
-    :(5 * ((((2 / 4) / 3) * 0) / 5)),
-    :(2 + 2),
-    :((5 + ((0 + 5) + 5)) / (5 + 3 / 0)),
-    :(1 * ((0 * (4 * 5)) * (1 * 5))),
-    :(1 / 1),
-    :(1 * 0),
-    :(4 * 1),
-    :(((2 + 1) * 3) / 3),
-    :(((1 + (1 + 1)) / (5 + 5)) / 2 + 0),
-    :(3 * 5),
-    :(1 * (5 / (4 * 5 + 5))),
-    :(0 + 5),
-    :(1 * 1),
-    :(4 * (2 * (0 / (1 + 5 * ((1 + 4) * (3 / 0)))))),
-    :(3 / ((0 * (1 + 3)) / 0)),
-    :((1 / (0 + 2)) / 1),
-    :(2 + 0),
-    :((2 + 4) / (4 / 4)),
-    :(1 + 3),
-    :((3 + 1 / ((2 / 0 + 0 / 2) + (5 / 1) / 1)) + 0),
-    :(5 * 1),
-    :(3 * 5),
-    :(5 / (1 * (4 + 5)) + 1),
-    :((3 / (4 * 0)) * ((4 * 1) / (1 / 3))),
-    :(4 / 1),
-    :((((3 + 5) * 2) * 3 + 1) + 2),
-    :(4 * 5),
-    :((5 * (0 / (1 / 1) + 2)) / 0),
-    :(1 * (4 / 1)),
-    :(4 * 0),
-    :(0 + 4),
-    :((2 / 3) * 5),
-    :(((0 + 1) / 4) * 5),
-    :((4 * 3) / 0),
-    :(((1 * 3) * 1) * (3 / (4 + (3 * 3 + 5)))),
-    :(1 / 0),
-    :((3 * (3 / 2)) / 0),
-    :(1 * 3),
-    :(3 * ((0 * 2) * 1)),
-    :(3 / 2),
-    :((5 + 5) * ((5 + 2) / (2 * 3))),
-    :(3 / 3),
-    :(2 + 1),
-    :(1 * (3 + 1)),
-    :(((3 + (5 + 0)) + 0 * 2) / 4),
-    :(((2 / 2) / 5) / (0 / (((5 / (1 * (5 / (0 / 5)) + ((3 + 4) + 0))) / 5) * 3))),
-    :(((2 + (2 + 1)) * 0) / (2 + 3)),
-    :(3 * 1),
-    :(4 + 1),
-    :(4 + 5),
-    :(1 + 1),
-    :(5 + 4),
-    :(1 / (((1 * (0 + (1 + 1))) / (5 + 0)) * (1 * 2))),
-    :((4 + 3) + 5),
-    :((5 / (0 / 1)) / 5),
-    :(2 + 4),
-    :(0 * 1 + 2),
-    :((4 + 0) * 3),
-    :(5 * (3 / 2)),
-    :((2 * 2) / 1),
-    :(3 / 2 + 4),
-    :(0 * 2),
-    :(0 * (4 / 5)),
-    :(2 * 5),
-    :(5 * (0 + 2)),
-    :(5 * 4),
-    :(3 + (3 / 1) * (3 / 0)),
-    :(
-        2 / (
-            ((2 / (0 + 1) + (5 + (5 / ((0 / 1) / 3) + 3))) + 0) +
-            (2 * (4 * ((3 + 2) * 2))) * 2
-        )
-    ),
-    :(1 + (1 + 4)),
-    :(0 * ((1 + (2 * ((1 / (5 / 3) + 5) * (5 / 1))) / 1) / 2)),
-    :((0 * (4 + 2)) * 5),
-    :((5 / 0) * (5 / (3 + 3))),
-    :(4 + 5),
-    :(1 + 2),
-    :(3 + 1),
-    :(4 * 3),
-    :(0 / (2 * (1 + (0 + 2) * 5))),
-    :(5 / 3 + 2),
-    :(
-        0 + (
-            (
-                (((3 * 5) / (5 * (1 * 2)) + 2) + ((0 + (4 + 3)) / (0 / (5 / 4)) + 3)) /
-                (2 / (5 * (4 / (4 * 3))))
-            ) * 4 + 4
-        )
-    ),
-    :(3 / 0),
-    :((5 + 3) / 4),
-    :(5 / (5 + 4)),
-    :((1 + (3 / (3 * 5) + 1) / 1) / 4),
-    :((2 / 1) * ((1 * 2 + 5) * 5)),
-    :(3 * 2),
-    :((1 / 4) * 0),
-    :(0 * 5),
-    :(2 + 4),
-    :(((1 / 0) * 4) * 3),
-    :((4 + 4) * ((0 / (2 * 1)) / 3)),
-    :(0 / 2),
-    :(0 * (2 + 4)),
-    :(2 * 0),
-    :(3 / 2),
-    :(0 + 5 * 1),
-    :(3 / 4),
-    :(4 + 4 * ((2 + 3) * (5 + 3))),
-    :(5 * 4),
-    :(5 / 0),
-    :((3 + 2) * 2),
-    :(0 / (4 / (4 * ((4 + 0) + 2) + 2 * 0))),
-    :(4 + 4),
-    :((2 + 3 / 3) / (3 / 1) + 0),
-    :(1 * 3),
-    :((1 + 2) / 3),
-    :(0 * 5),
-    :(1 + 3),
-    :(4 + 5),
-    :((0 + 1) * 0),
-    :(4 * ((3 * 0) / 2)),
-    :((3 * (2 + 0)) * 1 + 3),
-    :(2 * (2 * 3)),
-    :(3 / 1),
-    :(3 / 2),
-    :(2 / (0 + 2)),
-    :(5 + 4),
-    :((4 * 3) * ((5 + 0) * 4)),
-    :(3 * 5),
-    :(5 + 3),
-    :(0 / 3),
-    :(5 / 5),
-    :(4 + 5 / (((5 / (0 + 2)) * (0 / 3)) / 3)),
-    :((0 * (5 * 1 + ((5 * 5) * 1) * ((4 + 2) / 3 + 5))) / (5 * 3)),
-    :(1 + 1),
-    :(1 + (2 + ((2 * 3) / (3 + 5)) / 5)),
-    :(((1 + 3 / 4) * (0 / (((1 + 1) + 2) + 3)) + 0) * 3),
-    :(0 / (5 / (5 * 2))),
-    :(5 * (2 / 0)),
-    :(0 * 0),
-    :(3 / 4),
-    :(3 * 3),
-    :(4 / (0 + 5)),
-    :(3 / 5),
-    :(2 + 3),
-    :(5 + 0 / (((((2 / 5 + 4) * 0) / 0) / (1 * (0 * 1))) * (5 * (3 * (0 * (0 / 5)))))),
-    :((1 + 5) + 1 / 3),
-    :((2 / (0 / (3 / 5 + 2 / 4))) / (5 * (2 / (5 + 2)))),
-    :(0 * 5 + 3),
-    :(2 / 0),
-    :(1 * 1),
-    :(4 / 0),
-    :((0 * (1 / 0)) * (4 * (2 * ((5 * (4 / (5 * (0 / 4)))) / (1 / 2))))),
-    :(3 * (1 * 2)),
-    :(0 + 4),
-    :(1 * (0 + 3)),
-    :(5 + 1),
-    :(1 / (3 * 5)),
-    :((3 + (4 / (((2 + (1 + 3)) / 2) / (0 / 0))) / 2) * (3 + 5) + 2),
-    :(4 / (2 / 4)),
-    :((5 + 1) + 2),
-]

From e8f2dfce87befc0f7e4b1915f73ff3b0228c3eec Mon Sep 17 00:00:00 2001
From: Reuben Gardos Reid <5456207+ReubenJ@users.noreply.github.com>
Date: Fri, 27 Mar 2026 15:31:51 +0100
Subject: [PATCH 7/7] [WIP] don't benchmark exceptions...

---
 Project.toml                |  3 ++
 benchmark/Project.toml      | 15 ++++++-
 benchmark/benchmarks.jl     | 66 +-----------------------------
 benchmark/src/Benchmarks.jl | 81 +++++++++++++++++++++++++++++++++++++
 test/Project.toml           |  1 +
 5 files changed, 100 insertions(+), 66 deletions(-)
 create mode 100644 benchmark/src/Benchmarks.jl

diff --git a/Project.toml b/Project.toml
index 14f916c..fa4e59d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -15,3 +15,6 @@ HerbGrammar = "1"
 HerbSpecification = "1"
 RuntimeGeneratedFunctions = "0.5.16"
 julia = "1.10"
+
+[workspace]
+projects = ["test", "benchmark"]
diff --git a/benchmark/Project.toml b/benchmark/Project.toml
index 0ac69fe..e54c7bf 100644
--- a/benchmark/Project.toml
+++ b/benchmark/Project.toml
@@ -1,9 +1,22 @@
+name = "Benchmarks"
+uuid = "6ee5f8fb-11dd-42bf-93c4-422a4e5be4dc"
+
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 HerbBenchmarks = "eadf8b74-d38a-4b1a-a063-8d36e493d376"
+HerbConstraints = "1fa96474-3206-4513-b4fa-23913f296dfc"
 HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45"
 HerbGrammar = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7"
+HerbInterpret = "5bbddadd-02c5-4713-84b8-97364418cca7"
 HerbSearch = "3008d8e8-f9aa-438a-92ed-26e9c7b4829f"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
+ProfileView = "c46f51b8-102a-5cf2-8d2c-8597cb0e0da7"
+RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 
 [sources]
-HerbBenchmarks = {url = "https://github.com/Herb-AI/HerbBenchmarks.jl", rev = "e8cd880"}
+HerbBenchmarks = {rev = "10f07530c03bd5510f636aa5aa06b66e69ad994b", url = "https://github.com/Herb-AI/HerbBenchmarks.jl"}
+
+[compat]
+HerbBenchmarks = "0.2.3"
+Profile = "1.11.0"
+ProfileView = "1.10.3"
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 82aedc4..cab9af6 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -1,65 +1 @@
-using BenchmarkTools
-using HerbGrammar: @csgrammar, expr2rulenode, grammar2symboltable, rulenode2expr
-using HerbCore: HerbCore, RuleNode
-using HerbSearch: BFSIterator
-using HerbConstraints: freeze_state
-using HerbInterpret: make_interpreter, execute_on_input
-using HerbBenchmarks: PBE_BV_Track_2018 as BV
-using HerbBenchmarks: PBE_SLIA_Track_2019 as SLIA
-using HerbBenchmarks: get_problem_grammar_pair
-using RuntimeGeneratedFunctions
-RuntimeGeneratedFunctions.init(@__MODULE__)
-
-function create_interpret_benchmark()
-    suite = BenchmarkGroup()
-    g = @csgrammar begin
-        Var = Var + Var
-        Var = Var * Var
-        Var = Var / Var
-        Var = |(0:5)
-    end
-
-    # rns = BFSIterator(g, :Var; max_depth=3)
-    #
-    # interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=@__MODULE__)
-    # suite["Random Expressions"] = @benchmarkable interpret.($rns)
-
-    return suite
-end
-
-function create_herbbench_benchmark(benchmark_module, problem_name)
-    suite = BenchmarkGroup()
-    pgp = get_problem_grammar_pair(benchmark_module, problem_name)
-    spec = pgp.problem.spec
-    g = pgp.grammar
-    st = grammar2symboltable(g, benchmark_module)
-    @info "Collecting expressions to benchmark" mod = benchmark_module prob = problem_name
-    it = BFSIterator(g, :Start; max_depth=4, max_size=8)
-    interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=benchmark_module)
-
-    rns = [freeze_state(p) for p in it]
-    @info "Expressions collected" length(rns) type = typeof(rns) examples = rns
-
-    suite["$(length(rns)) expressions"]["generated"] = @benchmarkable try
-        $interpret.($rns, ($spec,))
-    catch
-    end
-
-    suite["$(length(rns)) expressions"]["rulenode2expr"] = @benchmarkable try
-        exprs = rulenode2expr.($rns, ($g,))
-        execute_on_input.(exprs, ($spec,))
-    catch
-    end
-
-    return suite
-end
-
-function create_benchmarks()
-    suite = BenchmarkGroup()
-    suite["interpret"] = create_interpret_benchmark()
-    suite["HerbBenchmark grammars"]["BV"] = create_herbbench_benchmark(BV, "PRE_100_10")
-    suite["HerbBenchmark grammars"]["SLIA"] = create_herbbench_benchmark(SLIA, "11604909")
-    return suite
-end
-
-const SUITE = create_benchmarks()
+const SUITE = Benchmark.create_benchmarks()
diff --git a/benchmark/src/Benchmarks.jl b/benchmark/src/Benchmarks.jl
new file mode 100644
index 0000000..e32add6
--- /dev/null
+++ b/benchmark/src/Benchmarks.jl
@@ -0,0 +1,81 @@
+module Benchmarks
+
+using Base: call_composed, Callable
+using HerbInterpret: make_interpreter, execute_on_input
+using BenchmarkTools
+using HerbGrammar: @csgrammar, expr2rulenode, grammar2symboltable, rulenode2expr
+using HerbCore: HerbCore, RuleNode
+using HerbSearch: BFSIterator
+using HerbConstraints: freeze_state
+using HerbBenchmarks: PBE_BV_Track_2018 as BV
+using HerbBenchmarks: PBE_SLIA_Track_2019 as SLIA
+using HerbBenchmarks: get_problem_grammar_pair
+using RuntimeGeneratedFunctions
+RuntimeGeneratedFunctions.init(@__MODULE__)
+
+function create_interpret_benchmark()
+    suite = BenchmarkGroup()
+    g = @csgrammar begin
+        Var = Var + Var
+        Var = Var * Var
+        Var = Var / Var
+        Var = |(0:5)
+    end
+
+    # rns = BFSIterator(g, :Var; max_depth=3)
+    #
+    # interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=@__MODULE__)
+    # suite["Random Expressions"] = @benchmarkable interpret.($rns)
+
+    return suite
+end
+
+function collect_rulenodes_interpreter_herbbench(benchmark_module, problem_name)
+    pgp = get_problem_grammar_pair(benchmark_module, problem_name)
+    spec = pgp.problem.spec
+    g = pgp.grammar
+    st = grammar2symboltable(g, benchmark_module)
+    @info "Collecting expressions to benchmark" mod = benchmark_module prob = problem_name
+    it = BFSIterator(g, :Start; max_depth=4, max_size=8)
+    interpret = make_interpreter(g; cache_module=@__MODULE__, target_module=benchmark_module)
+
+    rns = [freeze_state(p) for p in it]
+    rns = rns[end-100:end]
+    @info "Expressions collected" length(rns) type = typeof(rns) examples = rns
+    return g, st, interpret, rns, spec
+end
+
+function create_herbbench_benchmark(benchmark_module, problem_name)
+    g, st, interpret, rns, spec = collect_rulenodes_interpreter_herbbench(benchmark_module, problem_name)
+    return compare_generated_vs_rulenode2expr_based(st, rns, interpret, spec)
+end
+
+function compare_generated_vs_rulenode2expr_based(symbol_table, rulenodes, interpret, spec)
+    suite = BenchmarkGroup()
+    suite["$(length(rulenodes)) expressions"]["generated"] = @benchmarkable try
+        $interpret.($rulenodes, ($spec,))
+    catch e
+        if !(e isa BoundsError)
+            rethrow(e)
+        end
+    end
+
+    suite["$(length(rulenodes)) expressions"]["rulenode2expr"] = @benchmarkable try
+        execute_on_input.(($symbol_table,), $rulenodes, ($([s.in for s in spec]),))
+    catch e
+        if !(e isa BoundsError)
+            rethrow(e)
+        end
+    end
+
+    return suite
+end
+
+function create_benchmarks()
+    suite = BenchmarkGroup()
+    suite["interpret"] = create_interpret_benchmark()
+    suite["HerbBenchmark grammars"]["BV"] = create_herbbench_benchmark(BV, "PRE_100_10")
+    suite["HerbBenchmark grammars"]["SLIA"] = create_herbbench_benchmark(SLIA, "11604909")
+    return suite
+end
+end
diff --git a/test/Project.toml b/test/Project.toml
index 91fab29..4b77e01 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -2,6 +2,7 @@
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45"
 HerbGrammar = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7"
+HerbInterpret = "5bbddadd-02c5-4713-84b8-97364418cca7"
 HerbSpecification = "6d54aada-062f-46d8-85cf-a1ceaf058a06"
 ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"