From d6d916bcefc3069d90d65e5487df8101de89f924 Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Thu, 3 Apr 2025 20:45:35 -0400
Subject: [PATCH 1/7] Update pipeline to do a pre-example run on push and PRs

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 106 ++++++++++
 tests/test_examples_run.py                  | 217 +++++++++++---------
 2 files changed, 224 insertions(+), 99 deletions(-)
 create mode 100644 .github/workflows/run-examples-modified.yml

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
new file mode 100644
index 000000000..a8079a7b2
--- /dev/null
+++ b/.github/workflows/run-examples-modified.yml
@@ -0,0 +1,106 @@
+---
+name: Run examples on modified PDL files
+
+on: [push, pull_request]
+
+
+jobs:
+  tests:
+    name: Execution tests
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # python-version: ['3.11', '3.12', '3.13']
+        python-version: ['3.11']
+
+    steps:
+      # Free up some disk space
+    - name: Remove unnecessary files
+      run: |
+        sudo rm -rf /usr/share/dotnet
+        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      # Set up Ollama
+    - name: Install Ollama and start server
+      shell: bash
+      run: |
+        curl -fsSL https://ollama.com/install.sh | sudo -E sh
+
+    - name: Pull models in examples/
+      shell: bash
+      run: |
+        ollama pull granite3.2:2b
+        ollama pull granite3.2:8b
+        ollama pull mxbai-embed-large
+        ollama list
+
+    - name: Check that all required models are available
+      shell: bash
+      run: |
+        models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
+        missing=0
+        for model in "${models[@]}"; do
+          if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
+            echo "❌ Model $model (or substring) is missing!"
+            missing=1
+          fi
+        done
+
+        if [ "$missing" -eq 1 ]; then
+          exit 1
+        else
+          echo "✅ All expected models are available."
+        fi
+
+    - name: Wait for Ollama server
+      shell: bash
+      run: |
+        sleep 10
+        time curl -i http://localhost:11434
+
+    # Run tests
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Detect all PDL files that were changed or added
+      id: changed-pdl-files
+      uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46
+      with:
+        files: |
+          **.pdl
+    - name: List PDL files that were modified or added
+      env:
+        MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
+      run: echo "$MODIFIED_PDL_FILES"
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Cache pip
+      uses: actions/cache@v4
+      with:
+        # This path is specific to Ubuntu
+        path: ${{ env.pythonLocation }}
+        # Look to see if there is a cache hit for the setup file
+        key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-new3
+          ${{ runner.os }}-new3
+    - name: Install dependencies
+      run: pip install --upgrade --upgrade-strategy eager .[all]
+    - name: pip list packages
+      run: pip list
+    - name: show pip dependencies
+      run: |
+        pip install pipdeptree
+        pipdeptree -fl
+    - name: run tests
+      env:
+        WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+        WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
+        WATSONX_URL: ${{ secrets.WATSONX_URL }}
+        REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
+        OLLAMA_GHACTIONS_RESULTS: true
+        MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
+      run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py
index d3e08689a..2d811baeb 100644
--- a/tests/test_examples_run.py
+++ b/tests/test_examples_run.py
@@ -3,7 +3,7 @@
 import pathlib
 import random
 from dataclasses import dataclass
-from typing import Optional
+from typing import Optional, Tuple
 
 from pytest import CaptureFixture, MonkeyPatch
 
@@ -131,6 +131,100 @@ class InputsType:
     pathlib.Path("tests") / "data" / "line" / "hello9.pdl",
 ]
 
+# ACTUAL_NO_ERROR indicates there was no error when running pdl.exec_file
+ACTUAL_NO_ERROR = 0
+# ACTUAL_NO_ERROR indicates there was PdlParserError when running pdl.exec_file
+ACTUAL_PARSE_ERROR_CODE = 1
+# ACTUAL_RUNTIME_ERROR_CODE indicates there was runtime error when running pdl.exec_file
+ACTUAL_RUNTIME_ERROR_CODE = 2
+
+def run_single_file(pdl_file_name: str, monkeypatch: MonkeyPatch) -> Tuple[bool, str, int]:
+    """
+    Tests a single file
+    Returns:
+    - bool: True if runs successfully and False otherwise
+    - str: "" if runs succesfully and the actual results otherwise
+    - int: a code to indicate what kind of error occured. 0 for no error, 1 for parse error, and 2 for runtime error
+    """
+    if pdl_file_name in TO_SKIP:
+        print(f"File {pdl_file_name} is part of TO_SKIP, skipping test...")
+        return True, "", ACTUAL_NO_ERROR
+
+    path_obj = pathlib.Path(pdl_file_name)
+    scope: ScopeType = PdlDict({})
+
+    if pdl_file_name in TESTS_WITH_INPUT:
+        inputs = TESTS_WITH_INPUT[pdl_file_name]
+        if inputs.stdin is not None:
+            monkeypatch.setattr(
+                "sys.stdin",
+                io.StringIO(inputs.stdin),
+            )
+        if inputs.scope is not None:
+            scope = inputs.scope
+
+    try:
+        random.seed(11)
+        output = pdl.exec_file(
+            path_obj,
+            scope=scope,
+            output="all",
+            config=pdl.InterpreterConfig(batch=0),
+        )
+
+        actual_result = output["result"]
+        block_to_dict(output["trace"], json_compatible=True)
+        result_dir_name = (
+            pathlib.Path(".") / "tests" / "results" / path_obj.parent
+        )
+
+        print(actual_result)
+
+        # Find and compare results
+        if not __find_and_compare_results(path_obj, str(actual_result)):
+            if OLLAMA_GHACTIONS_RESULTS:
+                print(
+                    f"Program {pdl_file_name} requries updating its result on GitHub Actions"
+                )
+                print(f"Actual results: {str(actual_result)}")
+                result_file_name = f"{path_obj.stem}.ollama_ghactions.result"
+                __write_to_results_file(result_dir_name, result_file_name, str(actual_result))
+
+                # Evaluate the results again. If fails again, then consider this program as failing
+                if not __find_and_compare_results(
+                    path_obj, str(actual_result)
+                ):
+                    print(
+                        f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
+                    )
+
+                    return False, str(actual_result), ACTUAL_NO_ERROR
+                else:
+                    return True, "", ACTUAL_NO_ERROR
+
+            if UPDATE_RESULTS:
+                result_file_name = (
+                        f"{path_obj.stem}.{str(RESULTS_VERSION)}.result"
+                    )
+                __write_to_results_file(
+                    result_dir_name, result_file_name, str(actual_result)
+                )
+
+            return False, str(actual_result), ACTUAL_NO_ERROR
+
+    except PDLParseError:
+        expected_parse_errors = set(str(p) for p in EXPECTED_PARSE_ERROR)
+        if pdl_file_name in expected_parse_errors:
+            return True, "", ACTUAL_PARSE_ERROR_CODE
+        return False, "", ACTUAL_PARSE_ERROR_CODE
+
+    except Exception:
+        expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
+        if pdl_file_name in expected_runtime_error:
+            return True, "", ACTUAL_RUNTIME_ERROR_CODE
+        return False, "", ACTUAL_RUNTIME_ERROR_CODE
+
+    return True, "", ACTUAL_NO_ERROR
 
 def __write_to_results_file(
     dir_name: pathlib.Path, filename: str, content: str
@@ -162,112 +256,37 @@ def __find_and_compare_results(
                 return True
     return False
 
+def test_all_pdl_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
 
-def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
-    actual_parse_error: set[str] = set()
-    actual_runtime_error: set[str] = set()
+    unexpected_parse_error: set[str] = set()
+    unexpected_runtime_error: set[str] = set()
     wrong_results = {}
 
     files = pathlib.Path(".").glob("**/*.pdl")
+    files = [str(f) for f in files]
 
-    for pdl_file_name in files:
-
-        scope: ScopeType = PdlDict({})
-        if str(pdl_file_name) in TO_SKIP:
-            continue
-        if str(pdl_file_name) in TESTS_WITH_INPUT:
-            inputs = TESTS_WITH_INPUT[str(pdl_file_name)]
-            if inputs.stdin is not None:
-                monkeypatch.setattr(
-                    "sys.stdin",
-                    io.StringIO(inputs.stdin),
-                )
-            if inputs.scope is not None:
-                scope = inputs.scope
-        try:
-            random.seed(11)
-            output = pdl.exec_file(
-                pdl_file_name,
-                scope=scope,
-                output="all",
-                config=pdl.InterpreterConfig(batch=0),
-            )
-            actual_result = output["result"]
+    # Check if we only want to test a subset of PDL programs
+    # MODIFIED_PDL_FILES_ENV_VAR is a string of PDL files, comma separated
+    MODIFIED_PDL_FILES_ENV_VAR = os.getenv("MODIFIED_PDL_FILES", "")
+    MODIFIED_PDL_FILES = [item.strip() for item in MODIFIED_PDL_FILES_ENV_VAR.split(",")]
 
-            block_to_dict(output["trace"], json_compatible=True)
-            result_dir_name = (
-                pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent
-            )
+    if len(MODIFIED_PDL_FILES) > 0:
+        print("Only testing a subset of PDL programs, particularly newly added examples or PDL files that were modified.")
+        files = MODIFIED_PDL_FILES
 
-            if not __find_and_compare_results(pdl_file_name, str(actual_result)):
+    for pdl_file_name in files:
 
-                if OLLAMA_GHACTIONS_RESULTS:
-                    print(
-                        f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions"
-                    )
-                    print(f"Actual results: {str(actual_result)}")
-                    result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result"
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
+        pdl_file_name_str = str(pdl_file_name)
+        successful, actual_results, error_code = run_single_file(pdl_file_name_str, monkeypatch)
 
-                    # Evaluate the results again. If fails again, then consider this program as failing
-                    if not __find_and_compare_results(
-                        pdl_file_name, str(actual_result)
-                    ):
-                        print(
-                            f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
-                        )
-                        wrong_results[str(pdl_file_name)] = {
-                            "actual": str(actual_result),
-                        }
-                    # If evaluating results produces correct result, then this is considered passing
-                    else:
-                        continue
-
-                if UPDATE_RESULTS:
-                    result_file_name = (
-                        f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result"
-                    )
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
+        if not successful:
+            if error_code == ACTUAL_PARSE_ERROR_CODE:
+                unexpected_parse_error |= {pdl_file_name_str}
+            elif error_code == ACTUAL_RUNTIME_ERROR_CODE:
+                unexpected_runtime_error |= {pdl_file_name_str}
+            else:
+                wrong_results[pdl_file_name_str] = actual_results
 
-                wrong_results[str(pdl_file_name)] = {
-                    "actual": str(actual_result),
-                }
-        except PDLParseError:
-            actual_parse_error |= {str(pdl_file_name)}
-        except Exception as exc:
-            if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
-                print(f"{pdl_file_name}: {exc}")  # unexpected error: breakpoint
-            actual_runtime_error |= {str(pdl_file_name)}
-            print(exc)
-
-    # Parse errors
-    expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
-    unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
-    assert (
-        len(unexpected_parse_error) == 0
-    ), f"Unexpected parse error: {unexpected_parse_error}"
-
-    # Runtime errors
-    expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
-    unexpected_runtime_error = sorted(
-        list(actual_runtime_error - expected_runtime_error)
-    )
-    assert (
-        len(unexpected_runtime_error) == 0
-    ), f"Unexpected runtime error: {unexpected_runtime_error}"
-
-    # Unexpected valid
-    unexpected_valid = sorted(
-        list(
-            (expected_parse_error - actual_parse_error).union(
-                expected_runtime_error - actual_runtime_error
-            )
-        )
-    )
-    assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}"
-    # Unexpected results
+    assert len(unexpected_parse_error) == 0, f"Unexpected parse error: {unexpected_parse_error}"
+    assert len(unexpected_runtime_error) == 0, f"Unexpected runtime error: {unexpected_runtime_error}"
     assert len(wrong_results) == 0, f"Wrong results: {wrong_results}"

From 1a90386f8805658cc94a0eb4f1a388999b271396 Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Thu, 3 Apr 2025 20:47:20 -0400
Subject: [PATCH 2/7] Modify a PDL file

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 examples/demo/1-hello.pdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/demo/1-hello.pdl b/examples/demo/1-hello.pdl
index 8f0a85019..f17fc4144 100644
--- a/examples/demo/1-hello.pdl
+++ b/examples/demo/1-hello.pdl
@@ -1,6 +1,6 @@
 description: Model call
 text:
-- "Hello\n"
+- "Hello!\n"
 - model: ollama_chat/granite3.2:2b
   parameters:
     stop: ["!"]

From abbb79440126c79245ff0b7caf922f4849b7c87f Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Fri, 4 Apr 2025 19:23:24 -0400
Subject: [PATCH 3/7] Test withnew pipline

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 150 +++++-----
 examples/chatbot/chatbot.pdl                |   2 +-
 tests/test_examples_run.yaml                | 113 ++++++++
 tests/test_examples_run_new.py              | 302 ++++++++++++++++++++
 4 files changed, 496 insertions(+), 71 deletions(-)
 create mode 100644 tests/test_examples_run.yaml
 create mode 100644 tests/test_examples_run_new.py

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
index a8079a7b2..74d4b2cb2 100644
--- a/.github/workflows/run-examples-modified.yml
+++ b/.github/workflows/run-examples-modified.yml
@@ -3,7 +3,6 @@ name: Run examples on modified PDL files
 
 on: [push, pull_request]
 
-
 jobs:
   tests:
     name: Execution tests
@@ -15,49 +14,49 @@ jobs:
         python-version: ['3.11']
 
     steps:
-      # Free up some disk space
-    - name: Remove unnecessary files
-      run: |
-        sudo rm -rf /usr/share/dotnet
-        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+    #   # Free up some disk space
+    # - name: Remove unnecessary files
+    #   run: |
+    #     sudo rm -rf /usr/share/dotnet
+    #     sudo rm -rf "$AGENT_TOOLSDIRECTORY"
 
-      # Set up Ollama
-    - name: Install Ollama and start server
-      shell: bash
-      run: |
-        curl -fsSL https://ollama.com/install.sh | sudo -E sh
+    #   # Set up Ollama
+    # - name: Install Ollama and start server
+    #   shell: bash
+    #   run: |
+    #     curl -fsSL https://ollama.com/install.sh | sudo -E sh
 
-    - name: Pull models in examples/
-      shell: bash
-      run: |
-        ollama pull granite3.2:2b
-        ollama pull granite3.2:8b
-        ollama pull mxbai-embed-large
-        ollama list
+    # - name: Pull models in examples/
+    #   shell: bash
+    #   run: |
+    #     ollama pull granite3.2:2b
+    #     ollama pull granite3.2:8b
+    #     ollama pull mxbai-embed-large
+    #     ollama list
 
-    - name: Check that all required models are available
-      shell: bash
-      run: |
-        models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
-        missing=0
-        for model in "${models[@]}"; do
-          if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
-            echo "❌ Model $model (or substring) is missing!"
-            missing=1
-          fi
-        done
+    # - name: Check that all required models are available
+    #   shell: bash
+    #   run: |
+    #     models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
+    #     missing=0
+    #     for model in "${models[@]}"; do
+    #       if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
+    #         echo "❌ Model $model (or substring) is missing!"
+    #         missing=1
+    #       fi
+    #     done
 
-        if [ "$missing" -eq 1 ]; then
-          exit 1
-        else
-          echo "✅ All expected models are available."
-        fi
+    #     if [ "$missing" -eq 1 ]; then
+    #       exit 1
+    #     else
+    #       echo "✅ All expected models are available."
+    #     fi
 
-    - name: Wait for Ollama server
-      shell: bash
-      run: |
-        sleep 10
-        time curl -i http://localhost:11434
+    # - name: Wait for Ollama server
+    #   shell: bash
+    #   run: |
+    #     sleep 10
+    #     time curl -i http://localhost:11434
 
     # Run tests
     - uses: actions/checkout@v4
@@ -69,38 +68,49 @@ jobs:
       with:
         files: |
           **.pdl
-    - name: List PDL files that were modified or added
+        json: 'true'
+    - name: List PDL files that were modified or added and append to test_examples_run
       env:
         MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
       run: echo "$MODIFIED_PDL_FILES"
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Cache pip
-      uses: actions/cache@v4
+    - name: Update tests/test_example_run.yaml
+      uses: fjogeleit/yaml-update-action@main
       with:
-        # This path is specific to Ubuntu
-        path: ${{ env.pythonLocation }}
-        # Look to see if there is a cache hit for the setup file
-        key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-new3
-          ${{ runner.os }}-new3
-    - name: Install dependencies
-      run: pip install --upgrade --upgrade-strategy eager .[all]
-    - name: pip list packages
-      run: pip list
-    - name: show pip dependencies
-      run: |
-        pip install pipdeptree
-        pipdeptree -fl
-    - name: run tests
-      env:
-        WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
-        WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
-        WATSONX_URL: ${{ secrets.WATSONX_URL }}
-        REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
-        OLLAMA_GHACTIONS_RESULTS: true
-        MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
-      run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
+        valueFile: 'tests/test_example_run.yaml'
+        propertyPath: 'check'
+        value: "f${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}"
+        commitChange: false
+
+    - name: print yaml config
+      run: cat tests/test_example_run.yaml
+
+
+    # - name: Set up Python ${{ matrix.python-version }}
+    #   uses: actions/setup-python@v5
+    #   with:
+    #     python-version: ${{ matrix.python-version }}
+    # - name: Cache pip
+    #   uses: actions/cache@v4
+    #   with:
+    #     # This path is specific to Ubuntu
+    #     path: ${{ env.pythonLocation }}
+    #     # Look to see if there is a cache hit for the setup file
+    #     key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+    #     restore-keys: |
+    #       ${{ runner.os }}-pip-new3
+    #       ${{ runner.os }}-new3
+    # - name: Install dependencies
+    #   run: pip install --upgrade --upgrade-strategy eager .[all]
+    # - name: pip list packages
+    #   run: pip list
+    # - name: show pip dependencies
+    #   run: |
+    #     pip install pipdeptree
+    #     pipdeptree -fl
+    # - name: run tests
+    #   env:
+    #     WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+    #     WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
+    #     WATSONX_URL: ${{ secrets.WATSONX_URL }}
+    #     REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
+    #   run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
diff --git a/examples/chatbot/chatbot.pdl b/examples/chatbot/chatbot.pdl
index a53f68efd..7a53a9b35 100644
--- a/examples/chatbot/chatbot.pdl
+++ b/examples/chatbot/chatbot.pdl
@@ -2,7 +2,7 @@ description: Chatbot
 text:
 # Allow the user to type any question, implicitly adding the question to the context.
 - read:
-  message: "What is your query?\n"
+  message: "What is your query??\n"
 - repeat:
     text:
     # Send context to Granite model hosted at ollama
diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml
new file mode 100644
index 000000000..4ef1cc8d7
--- /dev/null
+++ b/tests/test_examples_run.yaml
@@ -0,0 +1,113 @@
+# Configuration file for test_examples_run.py
+update_results: False
+results_version: 1
+
+# A subset of files to check, useful for local dev
+# If empty, check all files
+# check accepts a list of strings
+check:
+  # - examples/chatbot/chatbot.pdl
+  # - examples/tutorial/programs/chatbot.pdl # with inputs
+  # - examples/rag/pdf_index.pdl  # skip
+  # - tests/data/line/hello3.pdl  # parse error
+  # - examples/callback/repair_prompt.pdl # runtime error
+
+# Files to skip
+skip:
+  # Requires dataset dependency
+  - examples/cldk/cldk-assistant.pdl
+  - examples/gsm8k/gsm8.pdl
+  - examples/gsm8k/gsm8k-plan.pdl
+
+  # Requires installation dependencies
+  - examples/intrinsics/demo-hallucination.pdl
+  - examples/tutorial/programs/demo-hallucination.pdl
+
+  # Skip RAG examples
+  - examples/rag/pdf_index.pdl
+  - examples/rag/pdf_query.pdl
+  - examples/rag/rag_library1.pdl
+
+  # Skip structure decoding examples for now
+  - examples/tutorial/structured_decoding.pdl
+
+  # Output result include trace (and thus timing) for some reason. Investigate why
+  - examples/react/react_call.pdl
+
+  # UI examples
+  - pdl-live-react/demos/error.pdl
+  - pdl-live-react/demos/demo1.pdl
+  - pdl-live-react/demos/demo2.pdl
+
+  # Granite-io examples
+  - examples/granite-io/granite_io_hallucinations.pdl
+  - examples/granite-io/granite_io_openai.pdl
+  - examples/granite-io/granite_io_thinking.pdl
+  - examples/granite-io/granite_io_transformers.pdl
+
+# Files that require input
+with_inputs:
+  examples/tutorial/programs/chatbot.pdl:
+    stdin: "What is APR?\nyes\n"
+    scope: null
+  examples/chatbot/chatbot.pdl:
+    stdin: "What is APR?\nyes\n"
+    scope: null
+  examples/demo/7-chatbot-roles.pdl:
+    stdin: "What is APR?\nquit\n"
+    scope: null
+  examples/tutorial/input_stdin.pdl:
+    stdin: "What is APR?\nyes\n"
+    scope: null
+  examples/tutorial/input_stdin_multiline.pdl:
+    stdin: "Hello\nBye\n"
+    scope: null
+  examples/input/input_test1.pdl:
+    stdin: "Hello\n"
+    scope: null
+  examples/input/input_test2.pdl:
+    stdin: "Hello\n"
+    scope: null
+  examples/tutorial/free_variables.pdl:
+    stdin: null
+    scope: {
+        "something": "ABC"
+      }
+
+# Files expecting PDL parse error
+expected_parse_error:
+  - tests/data/line/hello.pdl
+  - tests/data/line/hello1.pdl
+  - tests/data/line/hello4.pdl
+  - tests/data/line/hello7.pdl
+  - tests/data/line/hello8.pdl
+  - tests/data/line/hello10.pdl
+  - tests/data/line/hello11.pdl
+  - tests/data/line/hello31.pdl
+
+# Files expecting runtime error
+expected_runtime_error:
+  - examples/callback/repair_prompt.pdl
+  - examples/tutorial/type_list.pdl
+  - examples/tutorial/type_checking.pdl
+  - tests/data/line/hello3.pdl
+  - tests/data/line/hello9.pdl
+  - tests/data/line/hello12.pdl
+  - tests/data/line/hello13.pdl
+  - tests/data/line/hello14.pdl
+  - tests/data/line/hello15.pdl
+  - tests/data/line/hello16.pdl
+  - tests/data/line/hello17.pdl
+  - tests/data/line/hello18.pdl
+  - tests/data/line/hello19.pdl
+  - tests/data/line/hello20.pdl
+  - tests/data/line/hello21.pdl
+  - tests/data/line/hello22.pdl
+  - tests/data/line/hello23.pdl
+  - tests/data/line/hello24.pdl
+  - tests/data/line/hello25.pdl
+  - tests/data/line/hello26.pdl
+  - tests/data/line/hello27.pdl
+  - tests/data/line/hello28.pdl
+  - tests/data/line/hello29.pdl
+  - tests/data/line/hello30.pdl
diff --git a/tests/test_examples_run_new.py b/tests/test_examples_run_new.py
new file mode 100644
index 000000000..6ecbee905
--- /dev/null
+++ b/tests/test_examples_run_new.py
@@ -0,0 +1,302 @@
+from enum import Enum
+import io
+import os
+import pathlib
+import random
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
+
+from pytest import CaptureFixture, MonkeyPatch
+import yaml
+
+from pdl import pdl
+from pdl.pdl_ast import ScopeType
+from pdl.pdl_dumper import block_to_dict
+from pdl.pdl_lazy import PdlDict
+from pdl.pdl_parser import PDLParseError
+
+EXAMPLES_RUN_FILE = os.getenv("EXAMPLES_RUN_FILE", "tests/test_examples_run.yaml")
+
+@dataclass
+class InputsType:
+    """
+    Inputs to the PDL program for testing
+    """
+    stdin: Optional[str] = None
+    scope: Optional[ScopeType] = None
+
+class ExecutionErrorCode(Enum):
+    """
+    PDLExecutionErrorCode describes the execution result of the PDL file
+    """
+    NO_ERROR = 1
+    PARSE_ERROR = 2
+    RUNTIME_ERROR = 3
+
+@dataclass
+class ExecutionResult:
+    """
+    ExecutionResult captures the execution result of a PDL file
+    """
+    result: str | None = None
+    error_code: ExecutionErrorCode | None = None
+
+@dataclass
+class ExpectedResult:
+    """
+    ExpectedResult captures the expected result of a PDL file.
+    Non-deterministic programs contain more than one valid result
+    """
+    results: List[str] | None = None
+    error_code: ExecutionErrorCode | None = None
+
+    def compare_to_execution(self, execution_result: ExecutionResult) -> bool:
+        """
+        Returns true if execution matches to expected results and false otherwise
+        """
+
+        # ExecutionErrorCode codes must match
+        if execution_result.error_code != self.error_code:
+            return False
+
+        # Check if parse or runtime error
+        if self.error_code == ExecutionErrorCode.PARSE_ERROR:
+            return execution_result.error_code == ExecutionErrorCode.PARSE_ERROR
+        elif self.error_code == ExecutionErrorCode.RUNTIME_ERROR:
+            return execution_result.error_code == ExecutionErrorCode.RUNTIME_ERROR
+
+        # At this point, it's NO_ERROR, so check for results
+        actual_result = execution_result.result
+        if actual_result is not None and self.results is not None:
+            actual_result_stripped = actual_result.strip()
+            for expected_result in self.results:
+                expected_result_stripped = expected_result.strip()
+                if actual_result_stripped == expected_result_stripped:
+                    return True
+        return False
+
+@dataclass
+class FailedResults:
+    """
+    FailedResults are all the files that failed
+    """
+
+    wrong_results: Dict[str, str] = field(default_factory=lambda: {})
+    unexpected_parse_error: List[str] = field(default_factory=lambda: [])
+    unexpected_runtime_error: List[str] = field(default_factory=lambda: [])
+
+
+
+class ExamplesRun:
+    """
+    ExamplesRun outlines PDL files
+    - to skip
+    - requires inputs
+    - expects parse error
+    - expects runtime error
+    by loading the configuration from EXAMPLES_RUN_FILE
+    and runs the test
+    """
+    def __init__(self, monkeypatch: MonkeyPatch) -> None:
+        # Pytest
+        self.monkeypatch = monkeypatch
+
+        # Configuration
+        self.update_results: bool = False
+        self.results_version: int = 0
+
+        # File manipulation
+        self.check = [str(f) for f in pathlib.Path(".").glob("**/*.pdl")]
+        self.skip: List[str] = []
+        self.with_inputs: Dict[str, InputsType] = {}
+        self.expected_parse_error: List[str] = []
+        self.expected_runtime_error: List[str] = []
+
+        # Load content from EXAMPLES_RUN_FILE
+        with open(EXAMPLES_RUN_FILE, 'r') as file:
+            content = yaml.safe_load(file)
+            self.update_results = content['update_results']
+            self.results_version = content['results_version']
+
+            # Update files to check iff check is specified
+            if content['check'] is not None:
+                self.check = content['check']
+
+            self.skip = content['skip']
+            self.expected_parse_error = content['expected_parse_error']
+            self.expected_runtime_error = content['expected_runtime_error']
+
+            for filename, inputs_type in content['with_inputs'].items():
+                stdin = inputs_type['stdin']
+                scope = inputs_type['scope']
+                self.with_inputs[filename] = InputsType(
+                    stdin=stdin,
+                    scope=PdlDict(scope) if scope is not None else None
+                )
+
+        # Inits expected results
+        self.expected_results: Dict[str, ExpectedResult] = {}
+        self.__collect_expected_results()
+
+        # Inits execution results for each PDL file
+        self.execution_results: Dict[str, ExecutionResult] = {}
+
+        # Init failed results
+        self.failed_results = FailedResults()
+
+    def __get_results_dir(self) -> pathlib.Path:
+        return pathlib.Path(".") / "tests" / "results"
+
+    def __collect_expected_results(self) -> None:
+        """
+        Collects possible results for programs in self.check
+        """
+
+        for file in self.check:
+            expected_result = ExpectedResult()
+
+            if file in self.expected_parse_error:
+                expected_result.error_code = ExecutionErrorCode.PARSE_ERROR
+            elif file in self.expected_runtime_error:
+                expected_result.error_code = ExecutionErrorCode.RUNTIME_ERROR
+            else:
+
+                # Collect possible results
+                res_list = []
+                file_path: pathlib.Path = pathlib.Path(file)
+                result_dir_name = self.__get_results_dir() / file_path.parent
+                expected_files = result_dir_name.glob(file_path.stem + ".*.result")
+
+                for expected_file in expected_files:
+                    with open(expected_file, "r", encoding="utf-8") as truth_file:
+                        content = truth_file.read()
+                        res_list.append(content)
+
+                expected_result.error_code = ExecutionErrorCode.NO_ERROR
+                expected_result.results = res_list
+
+            self.expected_results[file] = expected_result
+
+    def __execute_file(self, pdl_file_name: str) -> None:
+        """
+        Tests the result of a single file and returns the result output and the error code
+        """
+
+        exec_result = ExecutionResult()
+
+        pdl_file_path = pathlib.Path(pdl_file_name)
+        scope: ScopeType = PdlDict({})
+
+        # Patch with inputs
+        if pdl_file_name in self.with_inputs:
+            inputs = self.with_inputs[pdl_file_name]
+            if inputs.stdin is not None:
+                self.monkeypatch.setattr(
+                "sys.stdin",
+                io.StringIO(inputs.stdin))
+            if inputs.scope is not None:
+                scope = inputs.scope
+
+        try:
+            # Execute file
+            output = pdl.exec_file(
+                pdl_file_path,
+                scope=scope,
+                output="all",
+                config=pdl.InterpreterConfig(batch=0),
+            )
+
+            exec_result.result = str(output["result"])
+            exec_result.error_code = ExecutionErrorCode.NO_ERROR
+
+        except PDLParseError:
+            exec_result.error_code = ExecutionErrorCode.PARSE_ERROR
+        except Exception:
+            exec_result.error_code = ExecutionErrorCode.RUNTIME_ERROR
+
+        self.execution_results[pdl_file_name] = exec_result
+
+    def populate_exec_result_for_checks(self) -> None:
+        """
+        Populates the execution result for all files in self.checks
+        """
+
+        for file in self.check:
+            if file not in self.skip:
+                self.__execute_file(file)
+
+    def validate_expected_and_actual(self) -> None:
+        """
+        Validates the expected result to actual result
+        Must be run after populate_exec_result_for_checks
+        """
+
+        wrong_result: Dict[str, str] = {}
+        unexpected_parse_error: List[str] = []
+        unexpected_runtime_error: List[str] = []
+
+        for file in self.check:
+            if file not in self.skip:
+                expected_result = self.expected_results[file]
+                actual_result = self.execution_results[file]
+
+                match = expected_result.compare_to_execution(actual_result)
+
+                if not match:
+                    # Check if actual results caused any error
+                    if actual_result.error_code == ExecutionErrorCode.PARSE_ERROR:
+                        unexpected_parse_error.append(file)
+                    elif actual_result.error_code == ExecutionErrorCode.RUNTIME_ERROR:
+                        unexpected_runtime_error.append(file)
+                    # If no error, then the results are wrong
+                    else:
+                        if actual_result.result is not None:
+                            wrong_result[file] = actual_result.result
+
+                            # print(f"{actual_result.result=}")
+                            # print(f"{expected_result.results=}")
+
+        self.failed_results.wrong_results = wrong_result
+        self.failed_results.unexpected_parse_error = unexpected_parse_error
+        self.failed_results.unexpected_runtime_error = unexpected_runtime_error
+
+    def write_results(self) -> None:
+        """
+        Writes new results for failed files
+        """
+
+        results_dir = self.__get_results_dir()
+        for file in self.failed_results.wrong_results:
+            # Mkdir if not exist
+            results_dir.mkdir(parents=True, exist_ok=True)
+
+            # Write to new file
+            actual_result = self.failed_results.wrong_results[file]
+            file_path = results_dir / pathlib.Path(file)
+            write_file_name = f"{file_path.stem}.{str(self.results_version)}.result"
+
+            with open(write_file_name, 'w', encoding="utf-8") as f:
+                f.write(actual_result)
+
+def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
+    """
+    Runs the test
+    """
+
+    random.seed(11)
+    background = ExamplesRun(monkeypatch)
+
+    background.populate_exec_result_for_checks()
+    background.validate_expected_and_actual()
+
+    if background.update_results:
+        background.write_results()
+
+    # Print the wrong results
+    for file, actual in background.failed_results.wrong_results.items():
+        print(f"File that produced wrong result: {file}")
+        print(f"Actual:\n{actual}\n")
+
+    assert len(background.failed_results.unexpected_parse_error) == 0, f"Unexpected parse error: {background.failed_results.unexpected_parse_error}"
+    assert len(background.failed_results.unexpected_runtime_error) == 0, f"Unexpected runtime error: {background.failed_results.unexpected_runtime_error}"
+    assert len(background.failed_results.wrong_results) == 0, f"Wrong results: {background.failed_results.wrong_results}"

From 1ba2149ae43dfb8f72f4951b8e6ea23494716d6e Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Fri, 4 Apr 2025 19:24:54 -0400
Subject: [PATCH 4/7] Typo

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
index 74d4b2cb2..fad85cd0c 100644
--- a/.github/workflows/run-examples-modified.yml
+++ b/.github/workflows/run-examples-modified.yml
@@ -78,7 +78,7 @@ jobs:
       with:
         valueFile: 'tests/test_example_run.yaml'
         propertyPath: 'check'
-        value: "f${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}"
+        value: "${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}"
         commitChange: false
 
     - name: print yaml config

From e16106fae5651d8607f18aa76d6f904035f04ca8 Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Fri, 4 Apr 2025 19:26:44 -0400
Subject: [PATCH 5/7] Fix syntax

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
index fad85cd0c..2d5437477 100644
--- a/.github/workflows/run-examples-modified.yml
+++ b/.github/workflows/run-examples-modified.yml
@@ -78,7 +78,7 @@ jobs:
       with:
         valueFile: 'tests/test_example_run.yaml'
         propertyPath: 'check'
-        value: "${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}"
+        value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
         commitChange: false
 
     - name: print yaml config

From a33f3d6a85094201139189235e9af91ee53555b5 Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Fri, 4 Apr 2025 19:30:04 -0400
Subject: [PATCH 6/7] Fix 2

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
index 2d5437477..e1fe6b34c 100644
--- a/.github/workflows/run-examples-modified.yml
+++ b/.github/workflows/run-examples-modified.yml
@@ -77,10 +77,13 @@ jobs:
       uses: fjogeleit/yaml-update-action@main
       with:
         valueFile: 'tests/test_example_run.yaml'
-        propertyPath: 'check'
-        value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
+        changes: |
+          {
+            "check": "${{ steps.changed-pdl-files.outputs.all_changed_files }}"
+          }
+        # propertyPath: 'check'
+        # value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
         commitChange: false
-
     - name: print yaml config
       run: cat tests/test_example_run.yaml
 

From 8a5f11375d29d4ea08731ce327194a35b754859d Mon Sep 17 00:00:00 2001
From: Jing Chen <jing.chen2@ibm.com>
Date: Fri, 4 Apr 2025 19:31:35 -0400
Subject: [PATCH 7/7] Fix 3

Signed-off-by: Jing Chen <jing.chen2@ibm.com>
---
 .github/workflows/run-examples-modified.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
index e1fe6b34c..6d982410c 100644
--- a/.github/workflows/run-examples-modified.yml
+++ b/.github/workflows/run-examples-modified.yml
@@ -73,14 +73,14 @@ jobs:
       env:
         MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
       run: echo "$MODIFIED_PDL_FILES"
-    - name: Update tests/test_example_run.yaml
+    - name: Update tests/test_examplea_run.yaml
       uses: fjogeleit/yaml-update-action@main
       with:
-        valueFile: 'tests/test_example_run.yaml'
+        valueFile: 'tests/test_examples_run.yaml'
         changes: |
           {
             "check": "${{ steps.changed-pdl-files.outputs.all_changed_files }}"
-          }
+          s
         # propertyPath: 'check'
         # value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
         commitChange: false