IBM · claudiosv · May 19, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,6 +56,7 @@ repos:
     rev: 'v1.15.0'
     hooks:
       - id: mypy
+        args: [--explicit-package-bases]
         verbose: true
         additional_dependencies: ['types-PyYAML']
   # type check the Python code using pyright

diff --git a/contrib/prompt_library/ReAct.pdl b/contrib/prompt_library/ReAct.pdl
@@ -10,26 +10,28 @@ defs:
             trajectory: ${ trajectory }
           repeat:
             text:
-              - def: type
-                text: ${ trajectory.keys()|first }
-                contribute: []
-              - if: ${ type == 'question'}
-                then: |
-                  Question: ${ trajectory[type]|trim }
-              - if: ${ type == 'task'}
-                then: |
-                  Task: ${ trajectory[type]|trim }
-              - if: ${ type == 'thought'}
-                then: |
-                  Tho: ${ trajectory[type]|trim }
-              - if: ${ type == 'action'}
-                then: |
-                  Act: ${ trajectory[type]|trim }
-              - if: ${ type == 'observation'}
-                then: |
-                  Obs: ${ trajectory[type]|trim }
-              - if: ${ type not in ['question', 'task', 'thought', 'action', 'observation'] }
-                then: "${ type }: ${ trajectory[type]|trim }"
+              - defs:
+                  type:
+                    text: ${ trajectory.keys()|first }
+              - match: ${ type }
+                with:
+                  - case: question
+                    then: |
+                      Question: ${ trajectory[type]|trim }
+                  - case: task
+                    then: |
+                      Task: ${ trajectory[type]|trim }
+                  - case: thought
+                    then: |
+                      Tho: ${ trajectory[type]|trim }
+                  - case: action
+                    then: |
+                      Act: ${ trajectory[type]|trim }
+                  - case: observation
+                    then: |
+                      Obs: ${ trajectory[type]|trim }
+                  - if: ${ type not in ['question', 'task', 'thought', 'action', 'observation'] }
+                    then: "${ type }: ${ trajectory[type]|trim }"
         - "\n"
 
   react:
@@ -101,13 +103,12 @@ defs:
                   then:
                     text:
                       - "\nObs: "
-                      - if: ${ action.name in tools }
+                      - if: ${ action.name.lower() in tools }
                         then:
-                          call: ${ tools[action.name] }
+                          call: ${ tools[action.name.lower()] }
                           args:
                             arguments: ${ action.arguments }
                         else: "Invalid action. Valid actions are ${ tool_names[:-1]|join(', ') }, and ${ tool_names[-1] }."
-                      # - "\n"
                 else:
                   def: exit
                   contribute: []

diff --git a/contrib/prompt_library/ReWoo.pdl b/contrib/prompt_library/ReWoo.pdl
@@ -20,24 +20,29 @@ defs:
                     text: ${ trajectory.keys()|first }
                   content:
                     text: ${ trajectory.values()|first }
-              - if: ${ type in ['task', 'question'] }
-                then: |-
-                  Task: ${ content|trim }
-              - if: ${ type == 'thought'}
-                then: |-
+              - match: ${ type }
+                with:
+                  - case: task
+                    then: |-
+                      Task: ${ content|trim }
+                  - case: question
+                    then: |-
+                      Task: ${ content|trim }
+                  - case: thought
+                    then: |-
 
-                  Plan: ${ content|trim }
-              - if: ${ type == 'action'}
-                then:
-                  text:
-                    - " #E${ i } = ${ content|trim }"
-                    - defs:
-                        i:
-                          data: ${ i+1 }
-              - if: ${ type == 'observation'}
-                then: ""
-              - if: ${ type not in ['question', 'task', 'thought', 'action', 'observation'] }
-                then: "${ type }: ${ content|trim }\n"
+                      Plan: ${ content|trim }
+                  - case: action
+                    then:
+                      text:
+                        - " #E${ i } = ${ content|trim }"
+                        - defs:
+                            i:
+                              data: ${ i+1 }
+                  - case: observation
+                    then: ""
+                  - if: ${ type not in ['question', 'task', 'thought', 'action', 'observation'] }
+                    then: "${ type }: ${ content|trim }\n"
         - "\n"
 
   rewoo:
@@ -120,9 +125,9 @@ defs:
                               ACTION_RAW = ACTION_RAW.replace(k, v)
                           result = ACTION_RAW
                       tool_output:
-                        if: ${ ACTION.name in tools }
+                        if: ${ ACTION.name.lower() in tools }
                         then:
-                          call: ${ tools[ACTION.name] }
+                          call: ${ tools[ACTION.name.lower()] }
                           args:
                             arguments: ${ ACTION.arguments }
                         else: "Invalid action. Valid actions are ${ tools.keys() }"

diff --git a/docs/autopdl.md b/docs/autopdl.md
@@ -0,0 +1,114 @@
+---
+hide:
+  - navigation
+  - toc
+  - footer
+---
+
+# AutoPDL Tutorial
+
+The following sections show how to use the AutoPDL optimizer to produce optimized PDL programs for specific tasks.
+
+To optimize a PDL program, we need the program, an optimizer configuration, a dataset, and an _evaluator_. An evaluator is a Python subclass of `OptimizerEvaluator` that evaluates a candidate, which is a generated configuration instance consisting of e.g. fewshot examples. The evaluator class follows this structure:
+
+```python title="src/pdl/optimize/optimizer_evaluator.py" linenums="1"
+class OptimizerEvaluator(Thread):
+    """Evaluates a candidate (configuration, i.e. fewshots, style) against **one** test example."""
+
+    def __init__(
+        self,
+        pdl_program: Program,
+        example: dict,
+        candidate: dict,
+        index: int,
+        timeout: int,
+        yield_output: bool,
+        config: OptimizationConfig,
+        cwd: Path,
+        answer_key: str = "answer",
+    ) -> None:
+        super().__init__()
+        self.pdl_program = pdl_program
+        ...
+
+    def get_scope(self) -> ScopeType:
+        """
+        Constructs a PDL scope for the candidate,
+        can take self.candidate and self.config into account
+        """
+
+    def extract_answer(self, document: str) -> Any:
+        """
+        Extracts the final answer from the PDL result document,
+        i.e. the string the PDL program returns
+        """
+
+    def answer_correct(self, document: str, answer: Any, truth: Any) -> bool:
+        """
+        Checks the extracted answer against the groundtruth value,
+        in self.example[self.answer_key]
+        """
+```
+
+Let's go through an example for `GSM8K`. Our PDL program uses different prompt patterns from the prompt library, and the variables `prompt_pattern`, `question`, `model`, and `demonstrations` are inserted at runtime by the evaluator.
+
+
+```yaml title="examples/optimizer/gsm8k.pdl" linenums="1"
+--8<-- "./examples/optimizer/gsm8k.pdl"
+```
+
+We write a configuration file for the optimizer, see `src/pdl/optimize/config_parser.py` for all fields:
+
+``` { .yaml .copy .annotate title="gsm8k_optimizer_config.yml" linenums="1" }
+benchmark: gsm8k # Name our benchmark
+budget: null # Set a budget, can be number of iterations, or a duration string e.g. "2h"
+budget_growth: double # double validation set size each iteration
+# or to_max: reach max_test_set_size by final iteration
+initial_test_set_size: 2 # size of test set in first iteration
+max_test_set_size: 10 # maximum test set size
+num_candidates: 100 # how many candidates to evaluate
+num_demonstrations: 5 # how many demonstrations to include per candidate
+parallelism: 1 # how many threads to run evaluations across
+shuffle_test: false # shuffling of test set
+test_set_name: test # name of test set
+train_set_name: train # name of train set
+validation_set_name: validation # name of validation set
+demonstrations_variable_name: demonstrations # variable name to insert demonstrations into
+variables: # define discrete options to sample from
+  model: # set ${ model } variable
+    - watsonx/meta-llama/llama-3-1-8b-instruct
+  prompt_pattern: # set ${ prompt_pattern } variable to one of these
+    - cot
+    - react
+    - rewoo
+  num_demonstrations: # overrides num demonstrations above
+    - 0
+    - 3
+    - 5
+```
+
+
+```python title="examples/optimizer/gsm8k_evaluator.py" linenums="1"
+--8<-- "./examples/optimizer/gsm8k_evaluator.py"
+```
+
+We can see an example of a script to run the optimization process in `examples/optimizer/optimize.py`.
+Usage:
+
+```
+python optimize.py optimize -h
+usage: optimize.py optimize [-h] --config CONFIG --dataset-path DATASET_PATH [--experiments-path EXPERIMENTS_PATH]
+                            [--yield_output | --no-yield_output] [--dry | --no-dry]
+                            pdl_file
+```
+
+We also need a dataset to optimize against, with `train`, `test`, and `validation` splits. To produce such a dataset, we can use HuggingFace Datasets `load_dataset` and `save_to_disk`. This example requires the dataset to have columns `question`, `reasoning`, and `answer`, which can be created from the original `openai/gsm8k` dataset. Processing scripts are under development and will follow shortly.
+
+We can run an example like so:
+
+```
+cd examples/optimizer
+python optimize.py optimize --config config.yml --dataset-path datasets/gsm8k gsm8k.pdl
+```
+
+Once the process is complete, a file `optimized_gsm8k.pdl` is written. This file contains the optimal configuration and is directly executable by the standard PDL interpreter.
diff --git a/examples/optimizer/__init__.py b/examples/optimizer/__init__.py
diff --git a/examples/optimizer/config.yml b/examples/optimizer/config.yml
@@ -0,0 +1,18 @@
+benchmark: "gsm8k"
+initial_test_set_size: 1
+max_test_set_size: 1
+num_candidates: 5
+num_demonstrations: 3
+parallelism: 1
+shuffle_test: false
+test_set_name: "test"
+train_set_name: "train"
+timeout: 120
+experiment_prefix: "granite_3_8b_instruct_gsm8k_3_shot_"
+variables:
+  model:
+  - "watsonx_text/ibm/granite-3-8b-instruct"
+  prompt_pattern:
+  - "cot"
+  num_demonstrations:
+  - 3
diff --git a/examples/optimizer/fever.pdl b/examples/optimizer/fever.pdl
@@ -0,0 +1,63 @@
+description: Demo of ReAct template fever
+defs:
+  cot:
+    import: ../../contrib/prompt_library/CoT
+  react:
+    import: ../../contrib/prompt_library/ReAct
+  rewoo:
+    import: ../../contrib/prompt_library/ReWoo
+  tools:
+    import: ../../contrib/prompt_library/tools
+
+  search_tools:
+    data:
+      - name: Search
+        description: Search Wikipedia for a summary
+        parameters:
+          type: object
+          properties:
+            topic:
+              type: string
+              description: The topic of interest
+          required:
+            - topic
+
+  task: |-
+    Task: On June 2017, the following claim was made: ${ claim }
+    Q: Was this claim true or false?
+match: ${ prompt_pattern }
+with:
+  # CoT
+  - case: cot
+    then:
+      text:
+        call: ${ cot.chain_of_thought }
+        args:
+          examples: "${ demonstrations }"
+          question: "${ task }"
+          model: "${ model }"
+
+  # ReAct
+  - case: react
+    then:
+      text:
+        call: ${ react.react }
+        args:
+          task: ${ task }
+          model: ${ model }
+          tool_schema: ${ search_tools }
+          tools: ${ tools.tools }
+          trajectories: ${ demonstrations }
+
+  # ReWOO
+  - case: rewoo
+    then:
+      text:
+        call: ${ rewoo.rewoo }
+        args:
+          task: ${ task }
+          model: ${ model }
+          tool_schema: ${ search_tools }
+          tools: ${ tools.tools }
+          trajectories: ${ demonstrations }
+          show_plans: false