IBM · jgchn · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml
@@ -0,0 +1,119 @@
+---
+name: Run examples on modified PDL files
+
+on: [push, pull_request]
+
+jobs:
+  tests:
+    name: Execution tests
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        # python-version: ['3.11', '3.12', '3.13']
+        python-version: ['3.11']
+
+    steps:
+    #   # Free up some disk space
+    # - name: Remove unnecessary files
+    #   run: |
+    #     sudo rm -rf /usr/share/dotnet
+    #     sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+    #   # Set up Ollama
+    # - name: Install Ollama and start server
+    #   shell: bash
+    #   run: |
+    #     curl -fsSL https://ollama.com/install.sh | sudo -E sh
+
+    # - name: Pull models in examples/
+    #   shell: bash
+    #   run: |
+    #     ollama pull granite3.2:2b
+    #     ollama pull granite3.2:8b
+    #     ollama pull mxbai-embed-large
+    #     ollama list
+
+    # - name: Check that all required models are available
+    #   shell: bash
+    #   run: |
+    #     models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
+    #     missing=0
+    #     for model in "${models[@]}"; do
+    #       if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
+    #         echo "❌ Model $model (or substring) is missing!"
+    #         missing=1
+    #       fi
+    #     done
+
+    #     if [ "$missing" -eq 1 ]; then
+    #       exit 1
+    #     else
+    #       echo "✅ All expected models are available."
+    #     fi
+
+    # - name: Wait for Ollama server
+    #   shell: bash
+    #   run: |
+    #     sleep 10
+    #     time curl -i http://localhost:11434
+
+    # Run tests
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Detect all PDL files that were changed or added
+      id: changed-pdl-files
+      uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46
+      with:
+        files: |
+          **.pdl
+        json: 'true'
+    - name: List PDL files that were modified or added and append to test_examples_run
+      env:
+        MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
+      run: echo "$MODIFIED_PDL_FILES"
+    - name: Update tests/test_examplea_run.yaml
+      uses: fjogeleit/yaml-update-action@main
+      with:
+        valueFile: 'tests/test_examples_run.yaml'
+        changes: |
+          {
+            "check": "${{ steps.changed-pdl-files.outputs.all_changed_files }}"
+          s
+        # propertyPath: 'check'
+        # value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
+        commitChange: false
+    - name: print yaml config
+      run: cat tests/test_example_run.yaml
+
+
+    # - name: Set up Python ${{ matrix.python-version }}
+    #   uses: actions/setup-python@v5
+    #   with:
+    #     python-version: ${{ matrix.python-version }}
+    # - name: Cache pip
+    #   uses: actions/cache@v4
+    #   with:
+    #     # This path is specific to Ubuntu
+    #     path: ${{ env.pythonLocation }}
+    #     # Look to see if there is a cache hit for the setup file
+    #     key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
+    #     restore-keys: |
+    #       ${{ runner.os }}-pip-new3
+    #       ${{ runner.os }}-new3
+    # - name: Install dependencies
+    #   run: pip install --upgrade --upgrade-strategy eager .[all]
+    # - name: pip list packages
+    #   run: pip list
+    # - name: show pip dependencies
+    #   run: |
+    #     pip install pipdeptree
+    #     pipdeptree -fl
+    # - name: run tests
+    #   env:
+    #     WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
+    #     WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
+    #     WATSONX_URL: ${{ secrets.WATSONX_URL }}
+    #     REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
+    #   run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
diff --git a/examples/chatbot/chatbot.pdl b/examples/chatbot/chatbot.pdl
@@ -2,7 +2,7 @@ description: Chatbot
 text:
 # Allow the user to type any question, implicitly adding the question to the context.
 - read:
-  message: "What is your query?\n"
+  message: "What is your query??\n"
 - repeat:
     text:
     # Send context to Granite model hosted at ollama

diff --git a/examples/demo/1-hello.pdl b/examples/demo/1-hello.pdl
@@ -1,6 +1,6 @@
 description: Model call
 text:
-- "Hello\n"
+- "Hello!\n"
 - model: ollama_chat/granite3.2:2b
   parameters:
     stop: ["!"]

diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py
@@ -3,7 +3,7 @@
 import pathlib
 import random
 from dataclasses import dataclass
-from typing import Optional
+from typing import Optional, Tuple
 
 from pytest import CaptureFixture, MonkeyPatch
 
@@ -131,6 +131,100 @@ class InputsType:
     pathlib.Path("tests") / "data" / "line" / "hello9.pdl",
 ]
 
+# ACTUAL_NO_ERROR indicates there was no error when running pdl.exec_file
+ACTUAL_NO_ERROR = 0
+# ACTUAL_NO_ERROR indicates there was PdlParserError when running pdl.exec_file
+ACTUAL_PARSE_ERROR_CODE = 1
+# ACTUAL_RUNTIME_ERROR_CODE indicates there was runtime error when running pdl.exec_file
+ACTUAL_RUNTIME_ERROR_CODE = 2
+
+def run_single_file(pdl_file_name: str, monkeypatch: MonkeyPatch) -> Tuple[bool, str, int]:
+    """
+    Tests a single file
+    Returns:
+    - bool: True if runs successfully and False otherwise
+    - str: "" if runs succesfully and the actual results otherwise
+    - int: a code to indicate what kind of error occured. 0 for no error, 1 for parse error, and 2 for runtime error
+    """
+    if pdl_file_name in TO_SKIP:
+        print(f"File {pdl_file_name} is part of TO_SKIP, skipping test...")
+        return True, "", ACTUAL_NO_ERROR
+
+    path_obj = pathlib.Path(pdl_file_name)
+    scope: ScopeType = PdlDict({})
+
+    if pdl_file_name in TESTS_WITH_INPUT:
+        inputs = TESTS_WITH_INPUT[pdl_file_name]
+        if inputs.stdin is not None:
+            monkeypatch.setattr(
+                "sys.stdin",
+                io.StringIO(inputs.stdin),
+            )
+        if inputs.scope is not None:
+            scope = inputs.scope
+
+    try:
+        random.seed(11)
+        output = pdl.exec_file(
+            path_obj,
+            scope=scope,
+            output="all",
+            config=pdl.InterpreterConfig(batch=0),
+        )
+
+        actual_result = output["result"]
+        block_to_dict(output["trace"], json_compatible=True)
+        result_dir_name = (
+            pathlib.Path(".") / "tests" / "results" / path_obj.parent
+        )
+
+        print(actual_result)
+
+        # Find and compare results
+        if not __find_and_compare_results(path_obj, str(actual_result)):
+            if OLLAMA_GHACTIONS_RESULTS:
+                print(
+                    f"Program {pdl_file_name} requries updating its result on GitHub Actions"
+                )
+                print(f"Actual results: {str(actual_result)}")
+                result_file_name = f"{path_obj.stem}.ollama_ghactions.result"
+                __write_to_results_file(result_dir_name, result_file_name, str(actual_result))
+
+                # Evaluate the results again. If fails again, then consider this program as failing
+                if not __find_and_compare_results(
+                    path_obj, str(actual_result)
+                ):
+                    print(
+                        f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
+                    )
+
+                    return False, str(actual_result), ACTUAL_NO_ERROR
+                else:
+                    return True, "", ACTUAL_NO_ERROR
+
+            if UPDATE_RESULTS:
+                result_file_name = (
+                        f"{path_obj.stem}.{str(RESULTS_VERSION)}.result"
+                    )
+                __write_to_results_file(
+                    result_dir_name, result_file_name, str(actual_result)
+                )
+
+            return False, str(actual_result), ACTUAL_NO_ERROR
+
+    except PDLParseError:
+        expected_parse_errors = set(str(p) for p in EXPECTED_PARSE_ERROR)
+        if pdl_file_name in expected_parse_errors:
+            return True, "", ACTUAL_PARSE_ERROR_CODE
+        return False, "", ACTUAL_PARSE_ERROR_CODE
+
+    except Exception:
+        expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
+        if pdl_file_name in expected_runtime_error:
+            return True, "", ACTUAL_RUNTIME_ERROR_CODE
+        return False, "", ACTUAL_RUNTIME_ERROR_CODE
+
+    return True, "", ACTUAL_NO_ERROR
 
 def __write_to_results_file(
     dir_name: pathlib.Path, filename: str, content: str
@@ -162,112 +256,37 @@ def __find_and_compare_results(
                 return True
     return False
 
+def test_all_pdl_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
 
-def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
-    actual_parse_error: set[str] = set()
-    actual_runtime_error: set[str] = set()
+    unexpected_parse_error: set[str] = set()
+    unexpected_runtime_error: set[str] = set()
     wrong_results = {}
 
     files = pathlib.Path(".").glob("**/*.pdl")
+    files = [str(f) for f in files]
 
-    for pdl_file_name in files:
-
-        scope: ScopeType = PdlDict({})
-        if str(pdl_file_name) in TO_SKIP:
-            continue
-        if str(pdl_file_name) in TESTS_WITH_INPUT:
-            inputs = TESTS_WITH_INPUT[str(pdl_file_name)]
-            if inputs.stdin is not None:
-                monkeypatch.setattr(
-                    "sys.stdin",
-                    io.StringIO(inputs.stdin),
-                )
-            if inputs.scope is not None:
-                scope = inputs.scope
-        try:
-            random.seed(11)
-            output = pdl.exec_file(
-                pdl_file_name,
-                scope=scope,
-                output="all",
-                config=pdl.InterpreterConfig(batch=0),
-            )
-            actual_result = output["result"]
+    # Check if we only want to test a subset of PDL programs
+    # MODIFIED_PDL_FILES_ENV_VAR is a string of PDL files, comma separated
+    MODIFIED_PDL_FILES_ENV_VAR = os.getenv("MODIFIED_PDL_FILES", "")
+    MODIFIED_PDL_FILES = [item.strip() for item in MODIFIED_PDL_FILES_ENV_VAR.split(",")]
 
-            block_to_dict(output["trace"], json_compatible=True)
-            result_dir_name = (
-                pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent
-            )
+    if len(MODIFIED_PDL_FILES) > 0:
+        print("Only testing a subset of PDL programs, particularly newly added examples or PDL files that were modified.")
+        files = MODIFIED_PDL_FILES
 
-            if not __find_and_compare_results(pdl_file_name, str(actual_result)):
+    for pdl_file_name in files:
 
-                if OLLAMA_GHACTIONS_RESULTS:
-                    print(
-                        f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions"
-                    )
-                    print(f"Actual results: {str(actual_result)}")
-                    result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result"
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
+        pdl_file_name_str = str(pdl_file_name)
+        successful, actual_results, error_code = run_single_file(pdl_file_name_str, monkeypatch)
 
-                    # Evaluate the results again. If fails again, then consider this program as failing
-                    if not __find_and_compare_results(
-                        pdl_file_name, str(actual_result)
-                    ):
-                        print(
-                            f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
-                        )
-                        wrong_results[str(pdl_file_name)] = {
-                            "actual": str(actual_result),
-                        }
-                    # If evaluating results produces correct result, then this is considered passing
-                    else:
-                        continue
-
-                if UPDATE_RESULTS:
-                    result_file_name = (
-                        f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result"
-                    )
-                    __write_to_results_file(
-                        result_dir_name, result_file_name, str(actual_result)
-                    )
+        if not successful:
+            if error_code == ACTUAL_PARSE_ERROR_CODE:
+                unexpected_parse_error |= {pdl_file_name_str}
+            elif error_code == ACTUAL_RUNTIME_ERROR_CODE:
+                unexpected_runtime_error |= {pdl_file_name_str}
+            else:
+                wrong_results[pdl_file_name_str] = actual_results
 
-                wrong_results[str(pdl_file_name)] = {
-                    "actual": str(actual_result),
-                }
-        except PDLParseError:
-            actual_parse_error |= {str(pdl_file_name)}
-        except Exception as exc:
-            if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
-                print(f"{pdl_file_name}: {exc}")  # unexpected error: breakpoint
-            actual_runtime_error |= {str(pdl_file_name)}
-            print(exc)
-
-    # Parse errors
-    expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
-    unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
-    assert (
-        len(unexpected_parse_error) == 0
-    ), f"Unexpected parse error: {unexpected_parse_error}"
-
-    # Runtime errors
-    expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
-    unexpected_runtime_error = sorted(
-        list(actual_runtime_error - expected_runtime_error)
-    )
-    assert (
-        len(unexpected_runtime_error) == 0
-    ), f"Unexpected runtime error: {unexpected_runtime_error}"
-
-    # Unexpected valid
-    unexpected_valid = sorted(
-        list(
-            (expected_parse_error - actual_parse_error).union(
-                expected_runtime_error - actual_runtime_error
-            )
-        )
-    )
-    assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}"
-    # Unexpected results
+    assert len(unexpected_parse_error) == 0, f"Unexpected parse error: {unexpected_parse_error}"
+    assert len(unexpected_runtime_error) == 0, f"Unexpected runtime error: {unexpected_runtime_error}"
     assert len(wrong_results) == 0, f"Wrong results: {wrong_results}"