gsm8k multi-plan, tree-of-thought, tree-of-thought with few shots (#881)

vazirim · web-flow · commit 618b83853a9d · 2025-04-08T16:14:18.000-04:00
Signed-off-by: Mandana Vaziri &lt;mvaziri@us.ibm.com&gt;
diff --git a/examples/gsm8k/gsm8k-tot-few-shot.pdl b/examples/gsm8k/gsm8k-tot-few-shot.pdl
@@ -0,0 +1,221 @@
+description: Grade School Math -- for every problem we generate a plan, then exectute and evaluate it.
+defs:
+  problems:
+    read: ./test.jsonl
+    parser: jsonl
+
+  MAX_ITERATIONS: 10
+  N: 3
+
+
+  majority_vote:
+    function:
+      numbers: [float]
+    return:
+      lang: python
+      code: |
+        from collections import Counter
+        frequency = Counter( ${ numbers })
+        most_frequent = max(frequency, key=frequency.get)
+        result = most_frequent
+
+  majority_vote_json:
+    function:
+      results: [{ "result": float }]
+    return:  
+      lastOf: 
+      - lang: python
+        def: numbers
+        code: |
+          result = [o["result"] for o in ${ results }]
+      - call: ${ majority_vote }
+        args:
+          numbers: ${ numbers }
+      
+  planning:
+    function:
+      problem: str
+      demos: [str]
+    return:
+      text:
+      - |
+        Please generate a high-level plan for solving the following question. 
+        As the first step, just say what method and idea you will use to solve the question. 
+        You can reorganize the information in the question. Do not do the actual calculation. 
+        Keep your response concise and within 80 words. 
+
+      - for: 
+          demo: ${ demos } 
+        repeat: 
+          ${ demo }
+        join:
+          with: "\n"
+      - text:
+        - "\nProblem:\n"
+        - ${ problem }
+        - "\n"
+        - model: ollama/granite3.2:8b
+          parameters:
+            temperature: 0.7
+            top_p: 0.85
+  
+  solve:
+    function:
+      plan: str
+    return:
+      text:
+      - ${ plan }
+      - |
+
+        The plan looks good! Now, use real numbers and do the calculation. Please solve the question 
+        step-by-step according to the high-level plan. Give me the final answer. Make your response short.
+      - "\nThe answer is:\n"
+      - model: ollama/granite3.2:8b
+        parameters:
+          temperature: 0.7
+          top_p: 0.85
+
+  extract_final_answer:
+    function:
+      solution: str
+    return:
+      lastOf:
+      - ${ solution }
+      - Extract the result from the above solution into a JSON object with field "result" and a float as value. Remove any dollar signs or other symbols.
+      - model: ollama/granite3.2:8b
+        parser: json
+        def: result
+        spec: { "result": float }
+        fallback:
+          data:
+            result: 0
+
+  compare_to_ground_truth:
+    function:
+      result: float
+      truth: str
+    return:
+      lastOf:
+      - data: ${ truth }
+        parser:
+          regex: "(.|\n)*#### (?P<answer>([0-9])*)\n*"
+          spec:
+            answer: str
+        def: ground_truth
+      - if: ${ result|float == ground_truth.answer|float}
+        then:
+          1
+        else:
+          0
+
+text:
+- defs:
+    demos:
+      read: demos.yaml
+      parser: yaml
+  for:
+    problem: ${ problems }
+  repeat:
+    repeat:
+      call: ${ planning }
+      args:
+        pdl_context: []
+        problem: ${ problem.question }
+        demos: ${ demos }
+    max_iterations: ${ N }
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: plans
+  join:
+    as: array
+
+- for:
+    plans_for_problem: ${ plans }
+  repeat:
+    for: 
+      plan: ${ plans_for_problem }
+    repeat: 
+      repeat:
+        call: ${ solve }
+        args:
+          pdl_context: []
+          plan: ${ plan }
+      max_iterations: ${ N }
+      join:
+        as: array
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: solutions
+  join:
+    as: array
+
+- for:
+    solution: ${ solutions }
+  repeat:
+    for: 
+      solutions_for_problem: ${ solution }
+    repeat:
+      for: 
+        solution_for_problem: ${ solutions_for_problem }
+      repeat:
+        call: ${ extract_final_answer }
+        args:
+          pdl_context: []
+          solution: ${ solution_for_problem }
+      max_iterations: ${ N }
+      join:
+        as: array
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: results
+  join:
+    as: array
+
+- for:
+    all_results_for_problem: ${ results }
+  repeat: 
+    for: 
+      results_for_problem: ${ all_results_for_problem } 
+    repeat:
+      call: ${ majority_vote_json }
+      args: 
+        pdl_context: []
+        results: ${ results_for_problem }
+    max_iterations: ${ N }
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: per_plan_votes
+  join:
+    as: array
+
+- for:
+    votes: ${ per_plan_votes }
+  repeat: 
+    call: ${ majority_vote }
+    args: 
+      pdl_context: []
+      numbers: ${ votes }
+  max_iterations: ${ MAX_ITERATIONS }
+  join:
+    as: array
+  def: results
+
+- for:
+    result: ${ results }
+    problem: ${ problems[:MAX_ITERATIONS] }
+  repeat:
+    call: ${ compare_to_ground_truth }
+    args:
+      pdl_context: []
+      result: ${ result }
+      truth: ${ problem.answer }
+  max_iterations: ${ MAX_ITERATIONS }
+  def: stats
+  join:
+    as: array
+
+- "\nAccuracy: ${ stats|sum / MAX_ITERATIONS * 100}% "
diff --git a/examples/gsm8k/gsm8k-tot-multiplan.pdl b/examples/gsm8k/gsm8k-tot-multiplan.pdl
@@ -0,0 +1,167 @@
+description: Grade School Math -- for every problem we generate a plan, then exectute and evaluate it.
+defs:
+  problems:
+    read: ./test.jsonl
+    parser: jsonl
+
+  MAX_ITERATIONS: 2
+  N: 3
+
+
+  majority_vote:
+    function:
+      results: [{ "result": float }]
+    return:   
+      lang: python
+      code: |
+        from collections import Counter
+        numbers = [o["result"] for o in ${ results }]
+        frequency = Counter(numbers)
+        most_frequent = max(frequency, key=frequency.get)
+        result = most_frequent
+  planning:
+    function:
+      problem: str
+    return:
+      text:
+      - |
+        Please generate a high-level plan for solving the following question. 
+        As the first step, just say what method and idea you will use to solve the question. 
+        You can reorganize the information in the question. Do not do the actual calculation. 
+        Keep your response concise and within 80 words. 
+
+      - "\nProblem:\n"
+      - ${ problem }
+      - "\n"
+      - model: ollama/granite3.2:8b
+        parameters:
+          temperature: 0.7
+          top_p: 0.85
+  
+  solve:
+    function:
+      plan: str
+    return:
+      text:
+      - ${ plan }
+      - |
+
+        The plan looks good! Now, use real numbers and do the calculation. Please solve the question 
+        step-by-step according to the high-level plan. Give me the final answer. Make your response short.
+      - "\nThe answer is:\n"
+      - model: ollama/granite3.2:8b
+        parameters:
+          temperature: 0.7
+          top_p: 0.85
+
+  extract_final_answer:
+    function:
+      solution: str
+    return:
+      lastOf:
+      - ${ solution }
+      - Extract the result from the above solution into a JSON object with field "result" and a float as value. Remove any dollar signs or other symbols.
+      - model: ollama/granite3.2:8b
+        parser: json
+        def: result
+        spec: { "result": float }
+        fallback:
+          data:
+            result: 0
+
+  compare_to_ground_truth:
+    function:
+      result: float
+      truth: str
+    return:
+      lastOf:
+      - data: ${ truth }
+        parser:
+          regex: "(.|\n)*#### (?P<answer>([0-9])*)\n*"
+          spec:
+            answer: str
+        def: ground_truth
+      - if: ${ result|float == ground_truth.answer|float}
+        then:
+          1
+        else:
+          0
+
+text:
+- for:
+    problem: ${ problems }
+  repeat:
+    repeat:
+      call: ${ planning }
+      args:
+        pdl_context: []
+        problem: ${ problem.question }
+    max_iterations: ${ N }
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: plans
+  join:
+    as: array
+
+- for:
+    plans_for_problem: ${ plans }
+  repeat:
+    for: 
+      plan: ${ plans_for_problem }
+    repeat: 
+      call: ${ solve }
+      args:
+        pdl_context: []
+        plan: ${ plan }
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: solutions
+  join:
+    as: array
+
+- for:
+    solution: ${ solutions }
+  repeat:
+    for: 
+      solution_for_problem: ${ solution }
+    repeat:
+      call: ${ extract_final_answer }
+      args:
+        pdl_context: []
+        solution: ${ solution_for_problem }
+    join:
+      as: array
+  max_iterations: ${ MAX_ITERATIONS }
+  def: results
+  join:
+    as: array
+
+- for:
+    results_for_problem: ${ results }
+  repeat: 
+    call: ${ majority_vote }
+    args: 
+      pdl_context: []
+      results: ${ results_for_problem }
+  max_iterations: ${ MAX_ITERATIONS }
+  def: votes
+  join:
+    as: array
+
+- for:
+    result: ${ votes }
+    problem: ${ problems[:MAX_ITERATIONS] }
+  repeat:
+    call: ${ compare_to_ground_truth }
+    args:
+      pdl_context: []
+      result: ${ result }
+      truth: ${ problem.answer }
+  max_iterations: ${ MAX_ITERATIONS }
+  def: stats
+  join:
+    as: array
+
+- "\nAccuracy: ${ stats|sum / MAX_ITERATIONS * 100}% "
diff --git a/examples/gsm8k/gsm8k-tot.pdl b/examples/gsm8k/gsm8k-tot.pdl