diff --git a/examples/gsm8k/gsm8.pdl b/examples/gsm8k/gsm8k-loop-fission.pdl similarity index 95% rename from examples/gsm8k/gsm8.pdl rename to examples/gsm8k/gsm8k-loop-fission.pdl index 5f43320f3..ce70d580f 100644 --- a/examples/gsm8k/gsm8.pdl +++ b/examples/gsm8k/gsm8k-loop-fission.pdl @@ -18,6 +18,10 @@ defs: # How many problems to evaluate. The entire dataset is 1319 problems. # MAX_ITERATIONS: 1319 MAX_ITERATIONS: 50 + # Which model to use + # MODEL: ollama/granite-code:8b + # MODEL: ollama/granite3.2:8b + MODEL: watsonx/ibm/granite-3-2-8b-instruct # PDL variables that hold statistics SUCCESSES: 0 @@ -29,8 +33,7 @@ text: TEST: ${ TESTS } repeat: # Ask the LLM for the answer - # - model: ollama/granite-code:8b - model: ollama/granite3.2:8b + model: ${ MODEL } # First, get LLM to answer the question input: | Question: ${ TEST.question } @@ -51,8 +54,7 @@ text: LLM_FULL_ANSWER: ${ ALL_LLM_FULL_A } repeat: # Next, get LLM to convert its answer into a single JSON key/value - # - model: ollama/granite-code:8b - model: ollama/granite3.2:8b + model: ${ MODEL } input: | # 'input' is the prompt Generate the final answer from the conclusion of this text as JSON with a single key named answer. ${ LLM_FULL_ANSWER } diff --git a/examples/gsm8k/gsm8k.pdl b/examples/gsm8k/gsm8k.pdl new file mode 100644 index 000000000..01d2cb19e --- /dev/null +++ b/examples/gsm8k/gsm8k.pdl @@ -0,0 +1,105 @@ +#!/usr/bin/env pdl + +# Grade School Math https://github.com/openai/grade-school-math is an +# open source AI dataset from 2021. +# +# https://github.com/openai/grade-school-math/blob/master/grade_school_math/data/test.jsonl +# is a file with 1319 questions and answers. +# +# + +description: Grade School Math +defs: + # The Grade School Math Dataset + ALL_TESTS: + read: ./test.jsonl + parser: jsonl + + # How many problems to evaluate. The entire dataset is 1319 problems. + # MAX_ITERATIONS: 1319 + MAX_ITERATIONS: 5 + # Which model to use + # MODEL: ollama/granite-code:8b + # MODEL: ollama/granite3.2:8b + MODEL: watsonx/ibm/granite-3-2-8b-instruct + + # PDL variables that hold statistics + SUCCESSES: 0 + FAILURES: 0 + TESTS: ${ ALL_TESTS[:MAX_ITERATIONS] } +lastOf: +- def: SOLUTIONS + contribute: [] + defs: + stats: + function: + r1: { success: integer, text: string} + r2: { success: integer, text: string} + return: + data: + success: ${ r1.success + r2.success } + text: ${ r1.text + "\n\n" + r2.text } + for: + TEST: ${ TESTS } + maxWorkers: 5 + map: + lastOf: + # First phase: ask LLM the Grade School Math questions + - def: LLM_FULL_ANSWER + model: ${ MODEL } + input: | + Question: ${ TEST.question } + Answer: + # Next, get LLM to convert its answer into a single JSON key/value + - def: SIMPLIFIED_LLM_ANSWER + model: ${ MODEL } + input: | + Generate the final answer from the conclusion of this text as JSON with a single key named answer. + ${ LLM_FULL_ANSWER } + # Third phase: Compare with Grade School Math ground truth + - lastOf: + # Convert the JSON string to JSON. (We do this in a separate step so + # we have access to the original for debugging.) + - def: JSON_SIMPLIFIED_LLM_ANSWER + data: ${ SIMPLIFIED_LLM_ANSWER } + parser: json + + # Strip off any prefix or suffix off the number (dollar signs, units, etc) + # and place it in of the JSON format { "answer": ... } + - def: EXTRACTED_SIMPLIFIED_LLM_ANSWER + data: ${ JSON_SIMPLIFIED_LLM_ANSWER.answer|string if 'answer' in JSON_SIMPLIFIED_LLM_ANSWER else ("MISSING 'answer' in " + LLM_FULL_ANSWER) } + parser: + regex: "[^0-9]*(?P[0-9]+).*$" + spec: + answer: string + # (In case the simplified answer did not contain digits.) + - if: ${ EXTRACTED_SIMPLIFIED_LLM_ANSWER == None } + then: + def: EXTRACTED_SIMPLIFIED_LLM_ANSWER + data: + answer: "none" + + # Extract the expected answer, which in this test data always follows "#### " + # into { "answer": ... } + - data: ${ TEST.answer } + parser: + regex: "(.|\n)*#### (?P([0-9])*)\n*" + spec: + answer: string + def: EXTRACTED_GROUND_TRUTH + + # Did we get the expected answer? + - if: ${ EXTRACTED_SIMPLIFIED_LLM_ANSWER.answer == EXTRACTED_GROUND_TRUTH.answer} + then: + object: + success: 1 + text: | + LLM got right answer for '${ LLM_FULL_ANSWER }' which was simplified to '${ SIMPLIFIED_LLM_ANSWER }' which was extracted to '${ EXTRACTED_SIMPLIFIED_LLM_ANSWER.answer }' + else: + object: + success: 0 + text: | + WRONG! Wanted ${ EXTRACTED_GROUND_TRUTH.answer} } / LLM said '${ LLM_FULL_ANSWER }' which was simplified to '${ SIMPLIFIED_LLM_ANSWER }' which was extracted to '${ EXTRACTED_SIMPLIFIED_LLM_ANSWER.answer }' + join: + reduce: ${ stats } +- Finished, ${ SOLUTIONS.success } successes on ${ MAX_ITERATIONS } tests diff --git a/src/pdl/pdl_dumper.py b/src/pdl/pdl_dumper.py index 52d087c71..54effbf3c 100644 --- a/src/pdl/pdl_dumper.py +++ b/src/pdl/pdl_dumper.py @@ -259,6 +259,8 @@ def block_to_dict( # noqa: C901 d["for"] = expr_to_dict(block.for_, json_compatible) if block.index is not None: d["index"] = block.index + if block.maxWorkers is not None: + d["maxWorkers"] = expr_to_dict(block.maxWorkers, json_compatible) d["map"] = block_to_dict(block.map, json_compatible) if block.maxIterations is not None: d["maxIterations"] = expr_to_dict(block.maxIterations, json_compatible) diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py index e47a388d3..407f8b3be 100644 --- a/src/pdl/pdl_interpreter.py +++ b/src/pdl/pdl_interpreter.py @@ -193,17 +193,15 @@ def with_role(self: "InterpreterState", role: RoleType) -> "InterpreterState": return self.model_copy(update={"role": role}) def with_id(self: "InterpreterState", n: str) -> "InterpreterState": - stack = self.id_stack.copy() if self.id_stack is not None else [] - stack.append(n) - return self.model_copy(update={"id_stack": stack}) + stack = self.id_stack if self.id_stack is not None else [] + return self.model_copy(update={"id_stack": stack + [n]}) def with_iter(self: "InterpreterState", i: int) -> "InterpreterState": return self.with_id(str(i)) def with_pop(self: "InterpreterState") -> "InterpreterState": - stack = self.id_stack.copy() if self.id_stack is not None else [] - stack.pop() - return self.model_copy(update={"id_stack": stack}) + stack = self.id_stack if self.id_stack is not None else [] + return self.model_copy(update={"id_stack": stack[:-1]}) class ClosureBlock(FunctionBlock): @@ -961,7 +959,6 @@ def process_block_body( block, max_iterations = _evaluate_max_iterations_field(scope, block, loc) block = _evaluate_join_field(scope, block, loc) map_loc = append(loc, "map") - iidx = 0 try: if max_iterations is not None: index_iterator: Any = range(max_iterations)