Add config and expand docs

claudiosv · claudiosv · commit 91408e0274e2 · 2025-05-16T15:25:40.000-07:00
Signed-off-by: Claudio Spiess &lt;claudiosv@users.noreply.github.com&gt;
diff --git a/docs/autopdl.md b/docs/autopdl.md
@@ -91,3 +91,24 @@ variables: # define discrete options to sample from
 ```python title="examples/optimizer/gsm8k_evaluator.py" linenums="1"
 --8<-- "./examples/optimizer/gsm8k_evaluator.py"
 ```
+
+We can see an example of a script to run the optimization process in `examples/optimizer/optimize.py`.
+Usage:
+
+```
+python optimize.py optimize -h
+usage: optimize.py optimize [-h] --config CONFIG --dataset-path DATASET_PATH [--experiments-path EXPERIMENTS_PATH]
+                            [--yield_output | --no-yield_output] [--dry | --no-dry]
+                            pdl_file
+```
+
+We also need a dataset to optimize against, with `train`, `test`, and `validation` splits. To produce such a dataset, we can use HuggingFace Datasets `load_dataset` and `save_to_disk`. This example requires the dataset to have columns `question`, `reasoning`, and `answer`, which can be created from the original `openai/gsm8k` dataset. Processing scripts are under development and will follow shortly.
+
+We can run an example like so:
+
+```
+cd examples/optimizer
+python optimize.py optimize --config config.yml --dataset-path datasets/gsm8k gsm8k.pdl
+```
+
+Once the process is complete, a file `optimized_gsm8k.pdl` is written. This file contains the optimal configuration and is directly executable by the standard PDL interpreter.
diff --git a/examples/optimizer/config.yml b/examples/optimizer/config.yml
@@ -0,0 +1,18 @@
+benchmark: "gsm8k"
+initial_test_set_size: 1
+max_test_set_size: 1
+num_candidates: 5
+num_demonstrations: 3
+parallelism: 1
+shuffle_test: false
+test_set_name: "test"
+train_set_name: "train"
+timeout: 120
+experiment_prefix: "granite_3_8b_instruct_gsm8k_3_shot_"
+variables:
+  model:
+  - "watsonx_text/ibm/granite-3-8b-instruct"
+  prompt_pattern:
+  - "cot"
+  num_demonstrations:
+  - 3
diff --git a/examples/optimizer/optimize.py b/examples/optimizer/optimize.py
@@ -6,16 +6,15 @@
 
 import yaml
 from datasets import load_from_disk
+from fever_evaluator import FEVEREvaluator
+from gsm8k_evaluator import Gsm8kEvaluator
+from gsmhard_evaluator import GsmHardEvaluator
+from mbpp_dataset import MBPPDataset
+from mbpp_evaluator import MBPPEvaluator
 
 from pdl.optimize.config_parser import OptimizationConfig
 from pdl.optimize.pdl_optimizer import PDLOptimizer
 
-from .fever_evaluator import FEVEREvaluator
-from .gsm8k_evaluator import Gsm8kEvaluator
-from .gsmhard_evaluator import GsmHardEvaluator
-from .mbpp_dataset import MBPPDataset
-from .mbpp_evaluator import MBPPEvaluator
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="PDL optimization and benchmarking tool",
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -47,7 +47,7 @@ nav:
   - API Reference: api_reference.md
   - Contribute: contrib.md
   - Viewer: viewer.md
-  - AutoPDL: autopdl.md
+  # - AutoPDL: autopdl.md # Hide documentation for now
 
 # Define some IBM colors
 extra_css: