generative-computing
diff --git a/‎mellea/backends/openai.py‎
Lines changed: 0 additions & 1 deletion b/‎mellea/backends/openai.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mellea/stdlib/requirement.py‎
Lines changed: 83 additions & 10 deletions b/‎mellea/stdlib/requirement.py‎
Lines changed: 83 additions & 10 deletions
diff --git a/‎mellea/stdlib/sampling.py‎
Lines changed: 22 additions & 10 deletions b/‎mellea/stdlib/sampling.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎mellea/stdlib/session.py‎
Lines changed: 6 additions & 9 deletions b/‎mellea/stdlib/session.py‎
Lines changed: 6 additions & 9 deletions
@@ -18,7 +18,6 @@
 from transformers.tokenization_utils import PreTrainedTokenizer
 
 import mellea.backends.model_ids as model_ids
-from cli.serve.models import ChatCompletionMessage
 from mellea.backends import BaseModelSubclass
 from mellea.backends.aloras import Alora, AloraBackendMixin
 from mellea.backends.formatter import Formatter, FormatterBackend, TemplateFormatter
 
@@ -1,8 +1,9 @@
 """Requirements are a special type of Component used as input to the "validate" step in Instruct/Validate/Repair design patterns."""
 
+import inspect
 import re
 from collections.abc import Callable
-from typing import Any
+from typing import Any, overload
 
 from mellea.backends import (
     Backend,
@@ -35,13 +36,48 @@ def default_output_to_bool(x: CBlock | str) -> bool:
     return False
 
 
+class ValidationResult:
+    """ValidationResults store the output of a Requirement's validation. They can be used to return additional info from validation functions, which is useful for sampling/repairing."""
+
+    def __init__(
+        self, result: bool, *, reason: str | None = None, score: float | None = None
+    ):
+        """The result of a requirement's validation.
+
+        A ValidationResult's result field always contains a definitive pass/fail. The other fields can be used to communicate additional information about that result.
+
+        Args:
+            result: a boolean that is true if the requirement passed
+            reason: a reason for the result
+            score: if your validator gives you a score back, you can add this as metadata
+        """
+        self._result = result
+        self._reason = reason
+        self._score = score
+
+    @property
+    def reason(self) -> str | None:
+        return self._reason
+
+    @property
+    def score(self) -> float | None:
+        return self._score
+
+    def as_bool(self) -> bool:
+        """"""
+        return self._result
+
+    def __bool__(self) -> bool:
+        return self.as_bool()
+
+
 class Requirement(Component):
     """Requirements are a special type of Component used as input to the Validate step in Instruct/Validate/Repair patterns."""
 
     def __init__(
         self,
         description: str | None = None,
-        validation_fn: Callable[[Context], Any] | None = None,
+        validation_fn: Callable[[Context], ValidationResult] | None = None,
         *,
         output_to_bool: Callable[[CBlock | str], bool] | None = default_output_to_bool,
         check_only: bool = False,
@@ -69,12 +105,11 @@ def validate(
         format: type[BaseModelSubclass] | None = None,
         model_options: dict | None = None,
         generate_logs: list[GenerateLog] | None = None,
-    ) -> tuple[Any, bool]:
+    ) -> ValidationResult:
         """Chooses the appropriate validation strategy and applies that strategy."""
         if self.validation_fn is not None:
             # Python validation strategy
-            result = self.validation_fn(ctx)
-            return result, bool(result)
+            return self.validation_fn(ctx)
         else:
             # LLMaJ validation strategy. This includes ALora because the backend generate call will appropriately dispatch.
             assert self.output_to_bool is not None
@@ -93,7 +128,10 @@ def validate(
             # This is crucial, because requirements can get reused;
             # this also means requirements are not thread-safe.
             self._output = None
-            return llm_as_a_judge_result, self.output_to_bool(llm_as_a_judge_result)
+            return ValidationResult(
+                result=self.output_to_bool(llm_as_a_judge_result),
+                reason=llm_as_a_judge_result.value,
+            )
 
     def parts(self):
         """Returns all of the constituent parts of a Requirement."""
@@ -158,7 +196,21 @@ def check(*args, **kwargs) -> Requirement:
     return Requirement(*args, **kwargs, check_only=True)
 
 
-def simple_validate(fn: Callable[[str], bool]) -> Callable[[Context], bool]:
+@overload
+def simple_validate(
+    fn: Callable[[str], tuple[bool, str]],
+) -> Callable[[Context], ValidationResult]: ...
+
+
+@overload
+def simple_validate(
+    fn: Callable[[str], bool], *, reason: str | None = None
+) -> Callable[[Context], ValidationResult]: ...
+
+
+def simple_validate(
+    fn: Callable[[str], Any], *, reason: str | None = None
+) -> Callable[[Context], ValidationResult]:
     """Syntactic sugar for writing validation functions that only operate over the last output from the model (interpreted as a string).
 
     This is useful when your validation logic only depends upon the most recent model output. For example:
@@ -170,15 +222,36 @@ def simple_validate(fn: Callable[[str], bool]) -> Callable[[Context], bool]:
     Important notes:
      - this operates over the more recent _model output_, not the most recent message.
      - Model outputs are sometimes parsed into more complex types (eg by a `Formatter.parse` call or an OutputProcessor). This validation logic will interpret the most recent output as a string, regardless of whether it has a more complex parsed representation.
+
+    Args:
+        fn: the simple validation function that takes a string and returns either a bool or (bool, str)
+        reason: only used if the provided function returns a bool; if the validation function fails, a static reason for that failure to give to the llm when repairing
     """
 
-    def validate(ctx: Context) -> bool:
+    def validate(ctx: Context) -> ValidationResult:
         o = ctx.last_output()
         if o is None or o.value is None:
             FancyLogger.get_logger().warn(
                 "Last output of context was None. That might be a problem. We return validation as False to be able to continue..."
             )
-            return False
-        return fn(o.value)
+            return ValidationResult(
+                False
+            )  # Don't pass in the static reason since the function didn't run.
+
+        result = fn(o.value)
+
+        # Only confirm that the result conforms to the fn type requirements here. Functions can
+        # declare return types and then deviate from them.
+
+        # Oneliner that checks the tuple actually contains (bool, str)
+        if isinstance(result, tuple) and list(map(type, result)) == [bool, str]:
+            return ValidationResult(result[0], reason=result[1])
+
+        elif type(result) is bool:
+            return ValidationResult(result, reason=reason)
+
+        raise ValueError(
+            f"function {fn.__name__} passed to simple_validate didn't return either bool or [bool, str]; returned {type(result)} instead"
+        )
 
     return validate
@@ -9,7 +9,7 @@
 from mellea.helpers.fancy_logger import FancyLogger
 from mellea.stdlib.base import CBlock, GenerateLog, ModelOutputThunk
 from mellea.stdlib.instruction import Instruction
-from mellea.stdlib.requirement import Requirement
+from mellea.stdlib.requirement import Requirement, ValidationResult
 
 
 class SamplingResult(CBlock):
@@ -21,15 +21,16 @@ def __init__(
         success: bool,
         *,
         sample_generations: list[ModelOutputThunk] | None = None,
-        sample_validations: list[list[tuple[Requirement, bool]]] | None = None,
+        sample_validations: list[list[tuple[Requirement, ValidationResult]]]
+        | None = None,
     ):
         """Initialize a new instance of sampling results.
 
         Args:
             result: The final output or result from applying the sampling strategy.
             success: A boolean indicating whether the operation was successful.
             sample_generations: A list containing intermediate generations produced during the process.
-            sample_validations: For each generation a list of a requirement and a boolean value indicating whether the requirement was met.
+            sample_validations: For each generation a list of tuples of a requirement and a validation result.
         """
         super().__init__(value=result.value)
         self.result = result
@@ -45,7 +46,9 @@ class SamplingStrategy(abc.ABC):
     It allows setting custom validation and generation functions through properties.
     """
 
-    validate: Callable[[list[Requirement], Any], list[bool]] | None = None
+    # the function signature here matches that of m.validate
+    validate: Callable[[list[Requirement], Any], list[ValidationResult]] | None = None
+
     generate: (
         Callable[[Instruction, list[GenerateLog] | None], ModelOutputThunk] | None
     ) = None
@@ -75,14 +78,23 @@ def __init__(
         *,
         loop_budget: int = 1,
         repair: Callable[
-            [Instruction, list[tuple[Requirement, bool]], list[Instruction]],
+            [
+                Instruction,
+                list[tuple[Requirement, ValidationResult]],
+                list[Instruction],
+            ],
             Instruction,
         ] = lambda i, r, h_i: i,
         select_from_failure: Callable[
-            [Instruction, list[ModelOutputThunk], list[list[tuple[Requirement, bool]]]],
+            [
+                Instruction,
+                list[ModelOutputThunk],
+                list[list[tuple[Requirement, ValidationResult]]],
+            ],
             ModelOutputThunk,
         ] = lambda _, results, __: results[0],
-        validate: Callable[[list[Requirement], Any], list[bool]] | None = None,
+        validate: Callable[[list[Requirement], Any], list[ValidationResult]]
+        | None = None,
         generate: (
             Callable[[Instruction, list[GenerateLog] | None], ModelOutputThunk] | None
         ) = None,
@@ -139,7 +151,7 @@ def sample(
         flog = FancyLogger.get_logger()
 
         failed_results: list[ModelOutputThunk] = []
-        failed_scores: list[list[tuple[Requirement, bool]]] = []
+        failed_scores: list[list[tuple[Requirement, ValidationResult]]] = []
         failed_instructions: list[Instruction] = []
 
         loop_count = 0
@@ -169,7 +181,7 @@ def sample(
             failed_scores.append(constraint_scores)
             failed_instructions.append(instruction)
 
-            if all(s[1] for s in constraint_scores):
+            if all(bool(s[1]) for s in constraint_scores):
                 flog.info("SUCCESS")
                 return SamplingResult(
                     result,
@@ -179,7 +191,7 @@ def sample(
                 )
 
             else:
-                count_valid = len([s for s in constraint_scores if s[1]])
+                count_valid = len([s for s in constraint_scores if bool(s[1])])
                 flog.info(f"FAILED. Valid: {count_valid}/{len(constraint_scores)}")
             # If we did not pass all constraints, update the instruction and try again.
             instruction = self.repair(
 
@@ -31,7 +31,7 @@
 from mellea.stdlib.instruction import Instruction
 from mellea.stdlib.mify import mify
 from mellea.stdlib.mobject import MObjectProtocol
-from mellea.stdlib.requirement import Requirement, check, req
+from mellea.stdlib.requirement import Requirement, ValidationResult, check, req
 from mellea.stdlib.sampling import SamplingResult, SamplingStrategy
 
 
@@ -293,11 +293,10 @@ def validate(
         reqs: Requirement | list[Requirement],
         *,
         output: CBlock | None = None,
-        return_full_validation_results: bool = False,
         format: type[BaseModelSubclass] | None = None,
         model_options: dict | None = None,
         generate_logs: list[GenerateLog] | None = None,
-    ) -> list[bool] | list[tuple[Any, bool]]:
+    ) -> list[ValidationResult]:
         """Validates a set of requirements over the output (if provided) or the current context (if the output is not provided)."""
         # Turn a solitary requirement in to a list of requirements, and then reqify if needed.
         reqs = [reqs] if not isinstance(reqs, list) else reqs
@@ -309,18 +308,16 @@ def validate(
             validation_target_ctx.insert(output)
         rvs = []
         for requirement in reqs:
-            req_v, req_satisfied = requirement.validate(
+            val_result = requirement.validate(
                 self.backend,
                 validation_target_ctx,
                 format=format,
                 model_options=model_options,
                 generate_logs=generate_logs,
             )
-            rvs.append((req_v, req_satisfied))
-        if return_full_validation_results:
-            return rvs
-        else:
-            return [b for (_, b) in rvs]
+            rvs.append(val_result)
+
+        return rvs
 
     def req(self, *args, **kwargs):
         """Shorthand for Requirement.__init__(...)."""