Bug fix for requirements, gpt for llm-judge (#1212)

vazirim · web-flow · commit 9224e1c78592 · 2025-09-20T06:53:50.000-04:00
Signed-off-by: Mandana Vaziri &lt;mvaziri@us.ibm.com&gt;
diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py
@@ -512,6 +512,28 @@ def process_advance_block_retry(  # noqa: C901
             result, background, new_scope, trace = process_block_body(
                 state, scope, block, loc
             )
+
+            result = lazy_apply(id_with_set_first_use_nanos(block.pdl__timing), result)
+            add_done_callback(
+                id_with_set_first_use_nanos(block.pdl__timing), background
+            )
+            trace = trace.model_copy(update={"pdl__result": result})
+            if block.parser is not None:
+                parser_func = partial(parse_result, block.parser)
+                result = lazy_apply(parser_func, result)
+                if init_state.yield_result:
+                    yield_result(result, block.kind)
+            if block.spec is not None and not isinstance(block, FunctionBlock):
+                checker = partial(
+                    result_with_type_checking,
+                    spec=block.spec,
+                    msg="Type errors during spec checking:",
+                    loc=append(loc, "spec"),
+                    trace=trace,
+                )
+                result = lazy_apply(checker, result)
+            if block.fallback is not None:
+                result.result()
             if block.requirements != []:
                 requirements_satisfied = True
                 for req in block.requirements:
@@ -541,28 +563,6 @@ def process_advance_block_retry(  # noqa: C901
                             scope = scope | {"pdl_context": new_context}
                 if requirements_satisfied is False:
                     continue
-
-            result = lazy_apply(id_with_set_first_use_nanos(block.pdl__timing), result)
-            add_done_callback(
-                id_with_set_first_use_nanos(block.pdl__timing), background
-            )
-            trace = trace.model_copy(update={"pdl__result": result})
-            if block.parser is not None:
-                parser_func = partial(parse_result, block.parser)
-                result = lazy_apply(parser_func, result)
-                if init_state.yield_result:
-                    yield_result(result, block.kind)
-            if block.spec is not None and not isinstance(block, FunctionBlock):
-                checker = partial(
-                    result_with_type_checking,
-                    spec=block.spec,
-                    msg="Type errors during spec checking:",
-                    loc=append(loc, "spec"),
-                    trace=trace,
-                )
-                result = lazy_apply(checker, result)
-            if block.fallback is not None:
-                result.result()
             break
         except Exception as exc:
             err_msg = traceback.format_exc()
diff --git a/src/pdl/pdl_stdlib.pdl b/src/pdl/pdl_stdlib.pdl
@@ -3,10 +3,18 @@ defs:
   reward:
     function:
       response: 
+      evaluation: string
     return:
       defs:
-        top_logprobs: ${ response.choices[0].logprobs.content[0].top_logprobs}
+        contents: ${ response['choices'][0].logprobs.content}
       lastOf:
+      - for: 
+          content: ${ contents }
+        repeat: 
+          if: ${ content.token == evaluation }
+          then: 
+            def: top_logprobs
+            data: ${ content.top_logprobs }
       - for: 
           tp: ${ top_logprobs }
         repeat: 
@@ -23,6 +31,14 @@ defs:
       - lang: python  
         code: |
           import math 
+          try:
+            lp_y
+          except NameError:
+            lp_y = -10
+          try:
+            lp_n
+          except NameError:
+            lp_n = -10
           result = math.log(math.exp(lp_y) / (math.exp(lp_y) + math.exp(lp_n)))
 
   requirements:
@@ -34,19 +50,21 @@ defs:
           llm_as_judge: {optional: string}
         return:
           lastOf:
-          - model: ${ llm_as_judge | default('watsonx/meta-llama/llama-3-3-70b-instruct') }
+          - #model: ${ llm_as_judge | default('watsonx/meta-llama/llama-3-3-70b-instruct') }
+            model: ${ llm_as_judge | default('watsonx/openai/gpt-oss-120b') }
             def: evaluation
             input: |
-                  Is the following requirement satisfied in the solution below? Requirement: ${ requirement }
-                  ${ response }
-
-                  Respond with only 'Yes' or 'No'.
+              Problem: ${ requirement }
+              Solution: ${ response }
+              
+              Respond with only ('Yes'/'No')
             modelResponse: out
             parameters:
               temperature: 0
               logprobs: true
               top_logprobs: 5
-          - ${ reward(out) }
+            spec: {enum: ['Yes', 'No']}
+          - ${ reward(response=out, evaluation=evaluation) }
           
           
       transformContext: