Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions .github/workflows/run-examples-modified.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
---
name: Run examples on modified PDL files

on: [push, pull_request]

jobs:
tests:
name: Execution tests
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# python-version: ['3.11', '3.12', '3.13']
python-version: ['3.11']

steps:
# # Free up some disk space
# - name: Remove unnecessary files
# run: |
# sudo rm -rf /usr/share/dotnet
# sudo rm -rf "$AGENT_TOOLSDIRECTORY"

# # Set up Ollama
# - name: Install Ollama and start server
# shell: bash
# run: |
# curl -fsSL https://ollama.com/install.sh | sudo -E sh

# - name: Pull models in examples/
# shell: bash
# run: |
# ollama pull granite3.2:2b
# ollama pull granite3.2:8b
# ollama pull mxbai-embed-large
# ollama list

# - name: Check that all required models are available
# shell: bash
# run: |
# models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
# missing=0
# for model in "${models[@]}"; do
# if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
# echo "❌ Model $model (or substring) is missing!"
# missing=1
# fi
# done

# if [ "$missing" -eq 1 ]; then
# exit 1
# else
# echo "✅ All expected models are available."
# fi

# - name: Wait for Ollama server
# shell: bash
# run: |
# sleep 10
# time curl -i http://localhost:11434

# Run tests
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Detect all PDL files that were changed or added
id: changed-pdl-files
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46
with:
files: |
**.pdl
json: 'true'
- name: List PDL files that were modified or added and append to test_examples_run
env:
MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
run: echo "$MODIFIED_PDL_FILES"
- name: Update tests/test_examplea_run.yaml
uses: fjogeleit/yaml-update-action@main
with:
valueFile: 'tests/test_examples_run.yaml'
changes: |
{
"check": "${{ steps.changed-pdl-files.outputs.all_changed_files }}"
s
# propertyPath: 'check'
# value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }}
commitChange: false
- name: print yaml config
run: cat tests/test_example_run.yaml


# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v5
# with:
# python-version: ${{ matrix.python-version }}
# - name: Cache pip
# uses: actions/cache@v4
# with:
# # This path is specific to Ubuntu
# path: ${{ env.pythonLocation }}
# # Look to see if there is a cache hit for the setup file
# key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
# restore-keys: |
# ${{ runner.os }}-pip-new3
# ${{ runner.os }}-new3
# - name: Install dependencies
# run: pip install --upgrade --upgrade-strategy eager .[all]
# - name: pip list packages
# run: pip list
# - name: show pip dependencies
# run: |
# pip install pipdeptree
# pipdeptree -fl
# - name: run tests
# env:
# WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
# WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
# WATSONX_URL: ${{ secrets.WATSONX_URL }}
# REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
# run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
2 changes: 1 addition & 1 deletion examples/chatbot/chatbot.pdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ description: Chatbot
text:
# Allow the user to type any question, implicitly adding the question to the context.
- read:
message: "What is your query?\n"
message: "What is your query??\n"
- repeat:
text:
# Send context to Granite model hosted at ollama
Expand Down
2 changes: 1 addition & 1 deletion examples/demo/1-hello.pdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
description: Model call
text:
- "Hello\n"
- "Hello!\n"
- model: ollama_chat/granite3.2:2b
parameters:
stop: ["!"]
Expand Down
217 changes: 118 additions & 99 deletions tests/test_examples_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pathlib
import random
from dataclasses import dataclass
from typing import Optional
from typing import Optional, Tuple

from pytest import CaptureFixture, MonkeyPatch

Expand Down Expand Up @@ -131,6 +131,100 @@ class InputsType:
pathlib.Path("tests") / "data" / "line" / "hello9.pdl",
]

# ACTUAL_NO_ERROR indicates there was no error when running pdl.exec_file
ACTUAL_NO_ERROR = 0
# ACTUAL_NO_ERROR indicates there was PdlParserError when running pdl.exec_file
ACTUAL_PARSE_ERROR_CODE = 1
# ACTUAL_RUNTIME_ERROR_CODE indicates there was runtime error when running pdl.exec_file
ACTUAL_RUNTIME_ERROR_CODE = 2

def run_single_file(pdl_file_name: str, monkeypatch: MonkeyPatch) -> Tuple[bool, str, int]:
"""
Tests a single file
Returns:
- bool: True if runs successfully and False otherwise
- str: "" if runs succesfully and the actual results otherwise
- int: a code to indicate what kind of error occured. 0 for no error, 1 for parse error, and 2 for runtime error
"""
if pdl_file_name in TO_SKIP:
print(f"File {pdl_file_name} is part of TO_SKIP, skipping test...")
return True, "", ACTUAL_NO_ERROR

path_obj = pathlib.Path(pdl_file_name)
scope: ScopeType = PdlDict({})

if pdl_file_name in TESTS_WITH_INPUT:
inputs = TESTS_WITH_INPUT[pdl_file_name]
if inputs.stdin is not None:
monkeypatch.setattr(
"sys.stdin",
io.StringIO(inputs.stdin),
)
if inputs.scope is not None:
scope = inputs.scope

try:
random.seed(11)
output = pdl.exec_file(
path_obj,
scope=scope,
output="all",
config=pdl.InterpreterConfig(batch=0),
)

actual_result = output["result"]
block_to_dict(output["trace"], json_compatible=True)
result_dir_name = (
pathlib.Path(".") / "tests" / "results" / path_obj.parent
)

print(actual_result)

# Find and compare results
if not __find_and_compare_results(path_obj, str(actual_result)):
if OLLAMA_GHACTIONS_RESULTS:
print(
f"Program {pdl_file_name} requries updating its result on GitHub Actions"
)
print(f"Actual results: {str(actual_result)}")
result_file_name = f"{path_obj.stem}.ollama_ghactions.result"
__write_to_results_file(result_dir_name, result_file_name, str(actual_result))

# Evaluate the results again. If fails again, then consider this program as failing
if not __find_and_compare_results(
path_obj, str(actual_result)
):
print(
f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
)

return False, str(actual_result), ACTUAL_NO_ERROR
else:
return True, "", ACTUAL_NO_ERROR

if UPDATE_RESULTS:
result_file_name = (
f"{path_obj.stem}.{str(RESULTS_VERSION)}.result"
)
__write_to_results_file(
result_dir_name, result_file_name, str(actual_result)
)

return False, str(actual_result), ACTUAL_NO_ERROR

except PDLParseError:
expected_parse_errors = set(str(p) for p in EXPECTED_PARSE_ERROR)
if pdl_file_name in expected_parse_errors:
return True, "", ACTUAL_PARSE_ERROR_CODE
return False, "", ACTUAL_PARSE_ERROR_CODE

except Exception:
expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
if pdl_file_name in expected_runtime_error:
return True, "", ACTUAL_RUNTIME_ERROR_CODE
return False, "", ACTUAL_RUNTIME_ERROR_CODE

return True, "", ACTUAL_NO_ERROR

def __write_to_results_file(
dir_name: pathlib.Path, filename: str, content: str
Expand Down Expand Up @@ -162,112 +256,37 @@ def __find_and_compare_results(
return True
return False

def test_all_pdl_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:

def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
actual_parse_error: set[str] = set()
actual_runtime_error: set[str] = set()
unexpected_parse_error: set[str] = set()
unexpected_runtime_error: set[str] = set()
wrong_results = {}

files = pathlib.Path(".").glob("**/*.pdl")
files = [str(f) for f in files]

for pdl_file_name in files:

scope: ScopeType = PdlDict({})
if str(pdl_file_name) in TO_SKIP:
continue
if str(pdl_file_name) in TESTS_WITH_INPUT:
inputs = TESTS_WITH_INPUT[str(pdl_file_name)]
if inputs.stdin is not None:
monkeypatch.setattr(
"sys.stdin",
io.StringIO(inputs.stdin),
)
if inputs.scope is not None:
scope = inputs.scope
try:
random.seed(11)
output = pdl.exec_file(
pdl_file_name,
scope=scope,
output="all",
config=pdl.InterpreterConfig(batch=0),
)
actual_result = output["result"]
# Check if we only want to test a subset of PDL programs
# MODIFIED_PDL_FILES_ENV_VAR is a string of PDL files, comma separated
MODIFIED_PDL_FILES_ENV_VAR = os.getenv("MODIFIED_PDL_FILES", "")
MODIFIED_PDL_FILES = [item.strip() for item in MODIFIED_PDL_FILES_ENV_VAR.split(",")]

block_to_dict(output["trace"], json_compatible=True)
result_dir_name = (
pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent
)
if len(MODIFIED_PDL_FILES) > 0:
print("Only testing a subset of PDL programs, particularly newly added examples or PDL files that were modified.")
files = MODIFIED_PDL_FILES

if not __find_and_compare_results(pdl_file_name, str(actual_result)):
for pdl_file_name in files:

if OLLAMA_GHACTIONS_RESULTS:
print(
f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions"
)
print(f"Actual results: {str(actual_result)}")
result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result"
__write_to_results_file(
result_dir_name, result_file_name, str(actual_result)
)
pdl_file_name_str = str(pdl_file_name)
successful, actual_results, error_code = run_single_file(pdl_file_name_str, monkeypatch)

# Evaluate the results again. If fails again, then consider this program as failing
if not __find_and_compare_results(
pdl_file_name, str(actual_result)
):
print(
f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
)
wrong_results[str(pdl_file_name)] = {
"actual": str(actual_result),
}
# If evaluating results produces correct result, then this is considered passing
else:
continue

if UPDATE_RESULTS:
result_file_name = (
f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result"
)
__write_to_results_file(
result_dir_name, result_file_name, str(actual_result)
)
if not successful:
if error_code == ACTUAL_PARSE_ERROR_CODE:
unexpected_parse_error |= {pdl_file_name_str}
elif error_code == ACTUAL_RUNTIME_ERROR_CODE:
unexpected_runtime_error |= {pdl_file_name_str}
else:
wrong_results[pdl_file_name_str] = actual_results

wrong_results[str(pdl_file_name)] = {
"actual": str(actual_result),
}
except PDLParseError:
actual_parse_error |= {str(pdl_file_name)}
except Exception as exc:
if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
print(f"{pdl_file_name}: {exc}") # unexpected error: breakpoint
actual_runtime_error |= {str(pdl_file_name)}
print(exc)

# Parse errors
expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
assert (
len(unexpected_parse_error) == 0
), f"Unexpected parse error: {unexpected_parse_error}"

# Runtime errors
expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
unexpected_runtime_error = sorted(
list(actual_runtime_error - expected_runtime_error)
)
assert (
len(unexpected_runtime_error) == 0
), f"Unexpected runtime error: {unexpected_runtime_error}"

# Unexpected valid
unexpected_valid = sorted(
list(
(expected_parse_error - actual_parse_error).union(
expected_runtime_error - actual_runtime_error
)
)
)
assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}"
# Unexpected results
assert len(unexpected_parse_error) == 0, f"Unexpected parse error: {unexpected_parse_error}"
assert len(unexpected_runtime_error) == 0, f"Unexpected runtime error: {unexpected_runtime_error}"
assert len(wrong_results) == 0, f"Wrong results: {wrong_results}"
Loading
Loading