Skip to content

Commit 9224e1c

Browse files
authored
Bug fix for requirements, gpt for llm-judge (#1212)
Signed-off-by: Mandana Vaziri <[email protected]>
1 parent aaeba0f commit 9224e1c

File tree

2 files changed

+47
-29
lines changed

2 files changed

+47
-29
lines changed

src/pdl/pdl_interpreter.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,28 @@ def process_advance_block_retry( # noqa: C901
512512
result, background, new_scope, trace = process_block_body(
513513
state, scope, block, loc
514514
)
515+
516+
result = lazy_apply(id_with_set_first_use_nanos(block.pdl__timing), result)
517+
add_done_callback(
518+
id_with_set_first_use_nanos(block.pdl__timing), background
519+
)
520+
trace = trace.model_copy(update={"pdl__result": result})
521+
if block.parser is not None:
522+
parser_func = partial(parse_result, block.parser)
523+
result = lazy_apply(parser_func, result)
524+
if init_state.yield_result:
525+
yield_result(result, block.kind)
526+
if block.spec is not None and not isinstance(block, FunctionBlock):
527+
checker = partial(
528+
result_with_type_checking,
529+
spec=block.spec,
530+
msg="Type errors during spec checking:",
531+
loc=append(loc, "spec"),
532+
trace=trace,
533+
)
534+
result = lazy_apply(checker, result)
535+
if block.fallback is not None:
536+
result.result()
515537
if block.requirements != []:
516538
requirements_satisfied = True
517539
for req in block.requirements:
@@ -541,28 +563,6 @@ def process_advance_block_retry( # noqa: C901
541563
scope = scope | {"pdl_context": new_context}
542564
if requirements_satisfied is False:
543565
continue
544-
545-
result = lazy_apply(id_with_set_first_use_nanos(block.pdl__timing), result)
546-
add_done_callback(
547-
id_with_set_first_use_nanos(block.pdl__timing), background
548-
)
549-
trace = trace.model_copy(update={"pdl__result": result})
550-
if block.parser is not None:
551-
parser_func = partial(parse_result, block.parser)
552-
result = lazy_apply(parser_func, result)
553-
if init_state.yield_result:
554-
yield_result(result, block.kind)
555-
if block.spec is not None and not isinstance(block, FunctionBlock):
556-
checker = partial(
557-
result_with_type_checking,
558-
spec=block.spec,
559-
msg="Type errors during spec checking:",
560-
loc=append(loc, "spec"),
561-
trace=trace,
562-
)
563-
result = lazy_apply(checker, result)
564-
if block.fallback is not None:
565-
result.result()
566566
break
567567
except Exception as exc:
568568
err_msg = traceback.format_exc()

src/pdl/pdl_stdlib.pdl

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,18 @@ defs:
33
reward:
44
function:
55
response:
6+
evaluation: string
67
return:
78
defs:
8-
top_logprobs: ${ response.choices[0].logprobs.content[0].top_logprobs}
9+
contents: ${ response['choices'][0].logprobs.content}
910
lastOf:
11+
- for:
12+
content: ${ contents }
13+
repeat:
14+
if: ${ content.token == evaluation }
15+
then:
16+
def: top_logprobs
17+
data: ${ content.top_logprobs }
1018
- for:
1119
tp: ${ top_logprobs }
1220
repeat:
@@ -23,6 +31,14 @@ defs:
2331
- lang: python
2432
code: |
2533
import math
34+
try:
35+
lp_y
36+
except NameError:
37+
lp_y = -10
38+
try:
39+
lp_n
40+
except NameError:
41+
lp_n = -10
2642
result = math.log(math.exp(lp_y) / (math.exp(lp_y) + math.exp(lp_n)))
2743

2844
requirements:
@@ -34,19 +50,21 @@ defs:
3450
llm_as_judge: {optional: string}
3551
return:
3652
lastOf:
37-
- model: ${ llm_as_judge | default('watsonx/meta-llama/llama-3-3-70b-instruct') }
53+
- #model: ${ llm_as_judge | default('watsonx/meta-llama/llama-3-3-70b-instruct') }
54+
model: ${ llm_as_judge | default('watsonx/openai/gpt-oss-120b') }
3855
def: evaluation
3956
input: |
40-
Is the following requirement satisfied in the solution below? Requirement: ${ requirement }
41-
${ response }
42-
43-
Respond with only 'Yes' or 'No'.
57+
Problem: ${ requirement }
58+
Solution: ${ response }
59+
60+
Respond with only ('Yes'/'No')
4461
modelResponse: out
4562
parameters:
4663
temperature: 0
4764
logprobs: true
4865
top_logprobs: 5
49-
- ${ reward(out) }
66+
spec: {enum: ['Yes', 'No']}
67+
- ${ reward(response=out, evaluation=evaluation) }
5068

5169

5270
transformContext:

0 commit comments

Comments
 (0)