Skip to content

Commit d6d916b

Browse files
committed
Update pipeline to do a pre-example run on push and PRs
Signed-off-by: Jing Chen <[email protected]>
1 parent 2e660ef commit d6d916b

File tree

2 files changed

+224
-99
lines changed

2 files changed

+224
-99
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
---
2+
name: Run examples on modified PDL files
3+
4+
on: [push, pull_request]
5+
6+
7+
jobs:
8+
tests:
9+
name: Execution tests
10+
runs-on: ubuntu-latest
11+
strategy:
12+
fail-fast: false
13+
matrix:
14+
# python-version: ['3.11', '3.12', '3.13']
15+
python-version: ['3.11']
16+
17+
steps:
18+
# Free up some disk space
19+
- name: Remove unnecessary files
20+
run: |
21+
sudo rm -rf /usr/share/dotnet
22+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
23+
24+
# Set up Ollama
25+
- name: Install Ollama and start server
26+
shell: bash
27+
run: |
28+
curl -fsSL https://ollama.com/install.sh | sudo -E sh
29+
30+
- name: Pull models in examples/
31+
shell: bash
32+
run: |
33+
ollama pull granite3.2:2b
34+
ollama pull granite3.2:8b
35+
ollama pull mxbai-embed-large
36+
ollama list
37+
38+
- name: Check that all required models are available
39+
shell: bash
40+
run: |
41+
models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
42+
missing=0
43+
for model in "${models[@]}"; do
44+
if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
45+
echo "❌ Model $model (or substring) is missing!"
46+
missing=1
47+
fi
48+
done
49+
50+
if [ "$missing" -eq 1 ]; then
51+
exit 1
52+
else
53+
echo "✅ All expected models are available."
54+
fi
55+
56+
- name: Wait for Ollama server
57+
shell: bash
58+
run: |
59+
sleep 10
60+
time curl -i http://localhost:11434
61+
62+
# Run tests
63+
- uses: actions/checkout@v4
64+
with:
65+
fetch-depth: 0
66+
- name: Detect all PDL files that were changed or added
67+
id: changed-pdl-files
68+
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46
69+
with:
70+
files: |
71+
**.pdl
72+
- name: List PDL files that were modified or added
73+
env:
74+
MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
75+
run: echo "$MODIFIED_PDL_FILES"
76+
- name: Set up Python ${{ matrix.python-version }}
77+
uses: actions/setup-python@v5
78+
with:
79+
python-version: ${{ matrix.python-version }}
80+
- name: Cache pip
81+
uses: actions/cache@v4
82+
with:
83+
# This path is specific to Ubuntu
84+
path: ${{ env.pythonLocation }}
85+
# Look to see if there is a cache hit for the setup file
86+
key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}
87+
restore-keys: |
88+
${{ runner.os }}-pip-new3
89+
${{ runner.os }}-new3
90+
- name: Install dependencies
91+
run: pip install --upgrade --upgrade-strategy eager .[all]
92+
- name: pip list packages
93+
run: pip list
94+
- name: show pip dependencies
95+
run: |
96+
pip install pipdeptree
97+
pipdeptree -fl
98+
- name: run tests
99+
env:
100+
WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }}
101+
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
102+
WATSONX_URL: ${{ secrets.WATSONX_URL }}
103+
REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
104+
OLLAMA_GHACTIONS_RESULTS: true
105+
MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }}
106+
run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py

tests/test_examples_run.py

Lines changed: 118 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pathlib
44
import random
55
from dataclasses import dataclass
6-
from typing import Optional
6+
from typing import Optional, Tuple
77

88
from pytest import CaptureFixture, MonkeyPatch
99

@@ -131,6 +131,100 @@ class InputsType:
131131
pathlib.Path("tests") / "data" / "line" / "hello9.pdl",
132132
]
133133

134+
# ACTUAL_NO_ERROR indicates there was no error when running pdl.exec_file
135+
ACTUAL_NO_ERROR = 0
136+
# ACTUAL_NO_ERROR indicates there was PdlParserError when running pdl.exec_file
137+
ACTUAL_PARSE_ERROR_CODE = 1
138+
# ACTUAL_RUNTIME_ERROR_CODE indicates there was runtime error when running pdl.exec_file
139+
ACTUAL_RUNTIME_ERROR_CODE = 2
140+
141+
def run_single_file(pdl_file_name: str, monkeypatch: MonkeyPatch) -> Tuple[bool, str, int]:
142+
"""
143+
Tests a single file
144+
Returns:
145+
- bool: True if runs successfully and False otherwise
146+
- str: "" if runs succesfully and the actual results otherwise
147+
- int: a code to indicate what kind of error occured. 0 for no error, 1 for parse error, and 2 for runtime error
148+
"""
149+
if pdl_file_name in TO_SKIP:
150+
print(f"File {pdl_file_name} is part of TO_SKIP, skipping test...")
151+
return True, "", ACTUAL_NO_ERROR
152+
153+
path_obj = pathlib.Path(pdl_file_name)
154+
scope: ScopeType = PdlDict({})
155+
156+
if pdl_file_name in TESTS_WITH_INPUT:
157+
inputs = TESTS_WITH_INPUT[pdl_file_name]
158+
if inputs.stdin is not None:
159+
monkeypatch.setattr(
160+
"sys.stdin",
161+
io.StringIO(inputs.stdin),
162+
)
163+
if inputs.scope is not None:
164+
scope = inputs.scope
165+
166+
try:
167+
random.seed(11)
168+
output = pdl.exec_file(
169+
path_obj,
170+
scope=scope,
171+
output="all",
172+
config=pdl.InterpreterConfig(batch=0),
173+
)
174+
175+
actual_result = output["result"]
176+
block_to_dict(output["trace"], json_compatible=True)
177+
result_dir_name = (
178+
pathlib.Path(".") / "tests" / "results" / path_obj.parent
179+
)
180+
181+
print(actual_result)
182+
183+
# Find and compare results
184+
if not __find_and_compare_results(path_obj, str(actual_result)):
185+
if OLLAMA_GHACTIONS_RESULTS:
186+
print(
187+
f"Program {pdl_file_name} requries updating its result on GitHub Actions"
188+
)
189+
print(f"Actual results: {str(actual_result)}")
190+
result_file_name = f"{path_obj.stem}.ollama_ghactions.result"
191+
__write_to_results_file(result_dir_name, result_file_name, str(actual_result))
192+
193+
# Evaluate the results again. If fails again, then consider this program as failing
194+
if not __find_and_compare_results(
195+
path_obj, str(actual_result)
196+
):
197+
print(
198+
f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
199+
)
200+
201+
return False, str(actual_result), ACTUAL_NO_ERROR
202+
else:
203+
return True, "", ACTUAL_NO_ERROR
204+
205+
if UPDATE_RESULTS:
206+
result_file_name = (
207+
f"{path_obj.stem}.{str(RESULTS_VERSION)}.result"
208+
)
209+
__write_to_results_file(
210+
result_dir_name, result_file_name, str(actual_result)
211+
)
212+
213+
return False, str(actual_result), ACTUAL_NO_ERROR
214+
215+
except PDLParseError:
216+
expected_parse_errors = set(str(p) for p in EXPECTED_PARSE_ERROR)
217+
if pdl_file_name in expected_parse_errors:
218+
return True, "", ACTUAL_PARSE_ERROR_CODE
219+
return False, "", ACTUAL_PARSE_ERROR_CODE
220+
221+
except Exception:
222+
expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
223+
if pdl_file_name in expected_runtime_error:
224+
return True, "", ACTUAL_RUNTIME_ERROR_CODE
225+
return False, "", ACTUAL_RUNTIME_ERROR_CODE
226+
227+
return True, "", ACTUAL_NO_ERROR
134228

135229
def __write_to_results_file(
136230
dir_name: pathlib.Path, filename: str, content: str
@@ -162,112 +256,37 @@ def __find_and_compare_results(
162256
return True
163257
return False
164258

259+
def test_all_pdl_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
165260

166-
def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None:
167-
actual_parse_error: set[str] = set()
168-
actual_runtime_error: set[str] = set()
261+
unexpected_parse_error: set[str] = set()
262+
unexpected_runtime_error: set[str] = set()
169263
wrong_results = {}
170264

171265
files = pathlib.Path(".").glob("**/*.pdl")
266+
files = [str(f) for f in files]
172267

173-
for pdl_file_name in files:
174-
175-
scope: ScopeType = PdlDict({})
176-
if str(pdl_file_name) in TO_SKIP:
177-
continue
178-
if str(pdl_file_name) in TESTS_WITH_INPUT:
179-
inputs = TESTS_WITH_INPUT[str(pdl_file_name)]
180-
if inputs.stdin is not None:
181-
monkeypatch.setattr(
182-
"sys.stdin",
183-
io.StringIO(inputs.stdin),
184-
)
185-
if inputs.scope is not None:
186-
scope = inputs.scope
187-
try:
188-
random.seed(11)
189-
output = pdl.exec_file(
190-
pdl_file_name,
191-
scope=scope,
192-
output="all",
193-
config=pdl.InterpreterConfig(batch=0),
194-
)
195-
actual_result = output["result"]
268+
# Check if we only want to test a subset of PDL programs
269+
# MODIFIED_PDL_FILES_ENV_VAR is a string of PDL files, comma separated
270+
MODIFIED_PDL_FILES_ENV_VAR = os.getenv("MODIFIED_PDL_FILES", "")
271+
MODIFIED_PDL_FILES = [item.strip() for item in MODIFIED_PDL_FILES_ENV_VAR.split(",")]
196272

197-
block_to_dict(output["trace"], json_compatible=True)
198-
result_dir_name = (
199-
pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent
200-
)
273+
if len(MODIFIED_PDL_FILES) > 0:
274+
print("Only testing a subset of PDL programs, particularly newly added examples or PDL files that were modified.")
275+
files = MODIFIED_PDL_FILES
201276

202-
if not __find_and_compare_results(pdl_file_name, str(actual_result)):
277+
for pdl_file_name in files:
203278

204-
if OLLAMA_GHACTIONS_RESULTS:
205-
print(
206-
f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions"
207-
)
208-
print(f"Actual results: {str(actual_result)}")
209-
result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result"
210-
__write_to_results_file(
211-
result_dir_name, result_file_name, str(actual_result)
212-
)
279+
pdl_file_name_str = str(pdl_file_name)
280+
successful, actual_results, error_code = run_single_file(pdl_file_name_str, monkeypatch)
213281

214-
# Evaluate the results again. If fails again, then consider this program as failing
215-
if not __find_and_compare_results(
216-
pdl_file_name, str(actual_result)
217-
):
218-
print(
219-
f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!"
220-
)
221-
wrong_results[str(pdl_file_name)] = {
222-
"actual": str(actual_result),
223-
}
224-
# If evaluating results produces correct result, then this is considered passing
225-
else:
226-
continue
227-
228-
if UPDATE_RESULTS:
229-
result_file_name = (
230-
f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result"
231-
)
232-
__write_to_results_file(
233-
result_dir_name, result_file_name, str(actual_result)
234-
)
282+
if not successful:
283+
if error_code == ACTUAL_PARSE_ERROR_CODE:
284+
unexpected_parse_error |= {pdl_file_name_str}
285+
elif error_code == ACTUAL_RUNTIME_ERROR_CODE:
286+
unexpected_runtime_error |= {pdl_file_name_str}
287+
else:
288+
wrong_results[pdl_file_name_str] = actual_results
235289

236-
wrong_results[str(pdl_file_name)] = {
237-
"actual": str(actual_result),
238-
}
239-
except PDLParseError:
240-
actual_parse_error |= {str(pdl_file_name)}
241-
except Exception as exc:
242-
if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR):
243-
print(f"{pdl_file_name}: {exc}") # unexpected error: breakpoint
244-
actual_runtime_error |= {str(pdl_file_name)}
245-
print(exc)
246-
247-
# Parse errors
248-
expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR)
249-
unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error))
250-
assert (
251-
len(unexpected_parse_error) == 0
252-
), f"Unexpected parse error: {unexpected_parse_error}"
253-
254-
# Runtime errors
255-
expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR)
256-
unexpected_runtime_error = sorted(
257-
list(actual_runtime_error - expected_runtime_error)
258-
)
259-
assert (
260-
len(unexpected_runtime_error) == 0
261-
), f"Unexpected runtime error: {unexpected_runtime_error}"
262-
263-
# Unexpected valid
264-
unexpected_valid = sorted(
265-
list(
266-
(expected_parse_error - actual_parse_error).union(
267-
expected_runtime_error - actual_runtime_error
268-
)
269-
)
270-
)
271-
assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}"
272-
# Unexpected results
290+
assert len(unexpected_parse_error) == 0, f"Unexpected parse error: {unexpected_parse_error}"
291+
assert len(unexpected_runtime_error) == 0, f"Unexpected runtime error: {unexpected_runtime_error}"
273292
assert len(wrong_results) == 0, f"Wrong results: {wrong_results}"

0 commit comments

Comments
 (0)