33import inspect
44import re
55from collections .abc import Callable
6+ from copy import copy
67from typing import Any , overload
78
89from mellea .backends import (
@@ -97,6 +98,9 @@ def __init__(
9798 self .validation_fn = validation_fn
9899 self .check_only = check_only
99100
101+ # Used for validation. Do not manually populate.
102+ self ._output : str | None = None
103+
100104 def validate (
101105 self ,
102106 backend : Backend ,
@@ -117,17 +121,18 @@ def validate(
117121 assert isinstance (last_output , ModelOutputThunk ), (
118122 " Context has no appropriate last output"
119123 )
120- self ._output = last_output .value # type: ignore
124+
125+ # Create a copy of the requirement that holds the output
126+ # and its template gets populated with the output correctly.
127+ req_copy = copy (self )
128+ req_copy ._output = last_output .value
121129 llm_as_a_judge_result = backend .generate_from_context (
122- self ,
130+ req_copy ,
123131 ctx ,
124132 format = format ,
125133 model_options = model_options ,
126134 generate_logs = generate_logs ,
127135 )
128- # This is crucial, because requirements can get reused;
129- # this also means requirements are not thread-safe.
130- self ._output = None
131136 return ValidationResult (
132137 result = self .output_to_bool (llm_as_a_judge_result ),
133138 reason = llm_as_a_judge_result .value ,
0 commit comments