From a97cce61b224730d031032ae42ccbb3d248329c3 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 06:05:26 -0400 Subject: [PATCH 1/7] tests: disable non-deterministic granite-io tests Signed-off-by: Louis Mandel --- tests/test_examples_run.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index 5e9ff3585..db022e87b 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -20,6 +20,9 @@ skip: - pdl-live-react/demos/demo2.pdl - pdl-live-react/src-tauri/tests/cli/read-stdin.pdl - examples/granite-io/granite_io_transformers.pdl + - examples/granite-io/granite_io_thinking.pdl + - examples/granite-io/granite_io_object.pdl + - examples/granite-io/granite_io_hallucinations.pdl - examples/optimizer/mbpp.pdl - examples/optimizer/fever.pdl - examples/optimizer/gsm8k.pdl From 86d862544955fee2d31a54a2a53105ba74f5afe5 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 10:29:22 -0400 Subject: [PATCH 2/7] Add tests results Signed-off-by: Louis Mandel --- .../results/examples/react/react_call.3.result | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/results/examples/react/react_call.3.result diff --git a/tests/results/examples/react/react_call.3.result b/tests/results/examples/react/react_call.3.result new file mode 100644 index 000000000..48e5efe3f --- /dev/null +++ b/tests/results/examples/react/react_call.3.result @@ -0,0 +1,18 @@ + How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025. When searching avoid using the words discovery or birthday. + +To find out how many years ago Henry Hudson was born without using the words 'discovery' or 'birthday', I will use the Search tool. + +Action: +[{"name": "Search", "arguments": {"topic": "Henry Hudson"}}] +Observation: Henry Hudson (c. 1565 – disappeared 23 June 1611) was an English sea explorer and navigator during the early 17th century, best known for his explorations of present-day Canada and parts of the Northeastern United States. +In 1607 and 1608, Hudson made two attempts on behalf of English merchants to find a rumoured Northeast Passage to Cathay via a route above the Arctic Circle. In 1609, he landed in North America on behalf of the Dutch East India Company and explored the region around the modern New York metropolitan area. Looking for a Northwest Passage to Asia on his ship Halve Maen ("Half Moon"), he sailed up the Hudson River, which was later named after him, and thereby laid the foundation for Dutch colonization of the region. His contributions to the exploration of the New World were significant and lasting. His voyages helped to establish European contact with the native peoples of North America and contributed to the development of trade and commerce. +On his final expedition, while still searching for the Northwest Passage, Hudson became the first European to see Hudson Strait and the immense Hudson Bay. In 1611, after wintering on the shore of James Bay, Hudson wanted to press on to the west, but most of his crew mutinied. The mutineers cast Hudson, his son, and six others adrift; what then happened to the Hudsons and their companions is unknown. +According to this information, Henry Hudson was born around 1565. As we are in 2025, we can calculate how many years ago that was by subtracting 1565 from 2025. + +Action: +[{"name": "Calc", "arguments": {"expr": "2025 - 1565"}}] +Observation: 460 +Henry Hudson was born approximately 460 years ago, as calculated from the year 2025. + +Action: +[{"name": "Finish", "arguments": {}}] \ No newline at end of file From c7c293fe3c6f6497f259df0790a0931764508ed3 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 15:08:59 -0400 Subject: [PATCH 3/7] Change model used in prompt_library examples Signed-off-by: Louis Mandel --- examples/prompt_library/gsm8k_cot.pdl | 2 +- examples/prompt_library/gsm8k_react.pdl | 2 +- examples/prompt_library/gsm8k_rewoo.pdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/prompt_library/gsm8k_cot.pdl b/examples/prompt_library/gsm8k_cot.pdl index 834e5266a..2c430309f 100644 --- a/examples/prompt_library/gsm8k_cot.pdl +++ b/examples/prompt_library/gsm8k_cot.pdl @@ -2,7 +2,7 @@ description: Demo of CoT pattern defs: cot: import: ../../contrib/prompt_library/CoT - model: watsonx_text/ibm/granite-3-8b-instruct + model: ollama/granite3.2:8b demonstrations: data: - question: |- diff --git a/examples/prompt_library/gsm8k_react.pdl b/examples/prompt_library/gsm8k_react.pdl index 90648dca5..1a9b711f7 100644 --- a/examples/prompt_library/gsm8k_react.pdl +++ b/examples/prompt_library/gsm8k_react.pdl @@ -4,7 +4,7 @@ defs: import: ../../contrib/prompt_library/ReAct tools: import: ../../contrib/prompt_library/tools - model: watsonx_text/ibm/granite-3-8b-instruct + model: ollama/granite3.2:8b demonstrations: data: - - question: Noah charges $60 for a large painting and $30 for a small painting. Last month he sold eight large paintings and four small paintings. If he sold twice as much this month, how much is his sales for this month? diff --git a/examples/prompt_library/gsm8k_rewoo.pdl b/examples/prompt_library/gsm8k_rewoo.pdl index 38fbf5433..1fd6233bd 100644 --- a/examples/prompt_library/gsm8k_rewoo.pdl +++ b/examples/prompt_library/gsm8k_rewoo.pdl @@ -4,7 +4,7 @@ defs: import: ../../contrib/prompt_library/ReWoo tools: import: ../../contrib/prompt_library/tools - model: watsonx_text/ibm/granite-3-8b-instruct + model: ollama/granite3.2:8b demonstrations: data: - - question: Noah charges $60 for a large painting and $30 for a small painting. Last month he sold eight large paintings and four small paintings. If he sold twice as much this month, how much is his sales for this month? From a6e479a4b93098b66b171290c0234d6fb1e397b0 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 15:34:15 -0400 Subject: [PATCH 4/7] Update results Signed-off-by: Louis Mandel --- tests/results/examples/prompt_library/gsm8k_cot.1.result | 3 +++ tests/results/examples/prompt_library/gsm8k_rewoo.1.result | 1 + tests/test_examples_run.yaml | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/results/examples/prompt_library/gsm8k_cot.1.result create mode 100644 tests/results/examples/prompt_library/gsm8k_rewoo.1.result diff --git a/tests/results/examples/prompt_library/gsm8k_cot.1.result b/tests/results/examples/prompt_library/gsm8k_cot.1.result new file mode 100644 index 000000000..560db4e70 --- /dev/null +++ b/tests/results/examples/prompt_library/gsm8k_cot.1.result @@ -0,0 +1,3 @@ +Answer the questions to the best of your abilities. + +Result: {'answer': "Jacob earns $6 per hour. \n\nJake earns thrice what Jacob does, so Jake earns 3 * $6 = $18 per hour.\n\nJake works 8 hours a day for 5 days, so he works a total of 8 * 5 = 40 hours in those 5 days.\n\nTherefore, Jake's earnings for those 5 days are 40 * $18 = $720.\n\nThe answer is $720."} \ No newline at end of file diff --git a/tests/results/examples/prompt_library/gsm8k_rewoo.1.result b/tests/results/examples/prompt_library/gsm8k_rewoo.1.result new file mode 100644 index 000000000..6d977009f --- /dev/null +++ b/tests/results/examples/prompt_library/gsm8k_rewoo.1.result @@ -0,0 +1 @@ +Result: {'answer': "Jake earns $18 per hour (3 times Jacob's $6 per hour). In a day, working 8 hours, Jake earns $144 ($18/hour * 8 hours). Over 5 days, Jake earns $720 ($144/day * 5 days)."} \ No newline at end of file diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index db022e87b..35da38b77 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -1,5 +1,8 @@ update_results: false -check: [] +check: + - examples/prompt_library/gsm8k_cot.pdl + - examples/prompt_library/gsm8k_react.pdl + - examples/prompt_library/gsm8k_rewoo.pdl skip: - examples/cldk/cldk-assistant.pdl - examples/gsm8k/gsm8.pdl From 1d01f0cc706bf3c6392673df396ce45d1075df5d Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 15:47:11 -0400 Subject: [PATCH 5/7] Update results Signed-off-by: Louis Mandel --- tests/results/examples/prompt_library/gsm8k_rewoo.2.result | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/results/examples/prompt_library/gsm8k_rewoo.2.result diff --git a/tests/results/examples/prompt_library/gsm8k_rewoo.2.result b/tests/results/examples/prompt_library/gsm8k_rewoo.2.result new file mode 100644 index 000000000..740505d30 --- /dev/null +++ b/tests/results/examples/prompt_library/gsm8k_rewoo.2.result @@ -0,0 +1 @@ +Result: {'answer': '720 dollars.'} \ No newline at end of file From e36cba425dc88ed254a9b101f66edaac0dbc1dd1 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 16:03:02 -0400 Subject: [PATCH 6/7] Update results Signed-off-by: Louis Mandel --- tests/results/examples/prompt_library/gsm8k_react.1.result | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/results/examples/prompt_library/gsm8k_react.1.result diff --git a/tests/results/examples/prompt_library/gsm8k_react.1.result b/tests/results/examples/prompt_library/gsm8k_react.1.result new file mode 100644 index 000000000..372e1c0e6 --- /dev/null +++ b/tests/results/examples/prompt_library/gsm8k_react.1.result @@ -0,0 +1 @@ +Result: {'answer': 'Jake earns $720 in 5 days working 8 hours a day.'} \ No newline at end of file From d28cb323421eac54309eb32c75a74c865a181981 Mon Sep 17 00:00:00 2001 From: Louis Mandel Date: Fri, 20 Jun 2025 16:29:04 -0400 Subject: [PATCH 7/7] Fix run_examples config Signed-off-by: Louis Mandel --- tests/test_examples_run.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml index 35da38b77..db022e87b 100644 --- a/tests/test_examples_run.yaml +++ b/tests/test_examples_run.yaml @@ -1,8 +1,5 @@ update_results: false -check: - - examples/prompt_library/gsm8k_cot.pdl - - examples/prompt_library/gsm8k_react.pdl - - examples/prompt_library/gsm8k_rewoo.pdl +check: [] skip: - examples/cldk/cldk-assistant.pdl - examples/gsm8k/gsm8.pdl