diff --git a/.github/workflows/run-examples-modified.yml b/.github/workflows/run-examples-modified.yml new file mode 100644 index 000000000..6d982410c --- /dev/null +++ b/.github/workflows/run-examples-modified.yml @@ -0,0 +1,119 @@ +--- +name: Run examples on modified PDL files + +on: [push, pull_request] + +jobs: + tests: + name: Execution tests + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # python-version: ['3.11', '3.12', '3.13'] + python-version: ['3.11'] + + steps: + # # Free up some disk space + # - name: Remove unnecessary files + # run: | + # sudo rm -rf /usr/share/dotnet + # sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + # # Set up Ollama + # - name: Install Ollama and start server + # shell: bash + # run: | + # curl -fsSL https://ollama.com/install.sh | sudo -E sh + + # - name: Pull models in examples/ + # shell: bash + # run: | + # ollama pull granite3.2:2b + # ollama pull granite3.2:8b + # ollama pull mxbai-embed-large + # ollama list + + # - name: Check that all required models are available + # shell: bash + # run: | + # models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b") + # missing=0 + # for model in "${models[@]}"; do + # if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then + # echo "❌ Model $model (or substring) is missing!" + # missing=1 + # fi + # done + + # if [ "$missing" -eq 1 ]; then + # exit 1 + # else + # echo "✅ All expected models are available." + # fi + + # - name: Wait for Ollama server + # shell: bash + # run: | + # sleep 10 + # time curl -i http://localhost:11434 + + # Run tests + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Detect all PDL files that were changed or added + id: changed-pdl-files + uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46 + with: + files: | + **.pdl + json: 'true' + - name: List PDL files that were modified or added and append to test_examples_run + env: + MODIFIED_PDL_FILES: ${{ steps.changed-pdl-files.outputs.all_changed_files }} + run: echo "$MODIFIED_PDL_FILES" + - name: Update tests/test_examplea_run.yaml + uses: fjogeleit/yaml-update-action@main + with: + valueFile: 'tests/test_examples_run.yaml' + changes: | + { + "check": "${{ steps.changed-pdl-files.outputs.all_changed_files }}" + s + # propertyPath: 'check' + # value: ${{ fromJSON(steps.changed-pdl-files.outputs.all_changed_files) }} + commitChange: false + - name: print yaml config + run: cat tests/test_example_run.yaml + + + # - name: Set up Python ${{ matrix.python-version }} + # uses: actions/setup-python@v5 + # with: + # python-version: ${{ matrix.python-version }} + # - name: Cache pip + # uses: actions/cache@v4 + # with: + # # This path is specific to Ubuntu + # path: ${{ env.pythonLocation }} + # # Look to see if there is a cache hit for the setup file + # key: ${{ runner.os }}-pip-new3-${{ env.pythonLocation }}-${{ hashFiles('setup.py') }} + # restore-keys: | + # ${{ runner.os }}-pip-new3 + # ${{ runner.os }}-new3 + # - name: Install dependencies + # run: pip install --upgrade --upgrade-strategy eager .[all] + # - name: pip list packages + # run: pip list + # - name: show pip dependencies + # run: | + # pip install pipdeptree + # pipdeptree -fl + # - name: run tests + # env: + # WATSONX_PROJECT_ID: ${{ secrets.WATSONX_PROJECT_ID }} + # WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }} + # WATSONX_URL: ${{ secrets.WATSONX_URL }} + # REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }} + # run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py diff --git a/examples/chatbot/chatbot.pdl b/examples/chatbot/chatbot.pdl index a53f68efd..7a53a9b35 100644 --- a/examples/chatbot/chatbot.pdl +++ b/examples/chatbot/chatbot.pdl @@ -2,7 +2,7 @@ description: Chatbot text: # Allow the user to type any question, implicitly adding the question to the context. - read: - message: "What is your query?\n" + message: "What is your query??\n" - repeat: text: # Send context to Granite model hosted at ollama diff --git a/examples/demo/1-hello.pdl b/examples/demo/1-hello.pdl index 8f0a85019..f17fc4144 100644 --- a/examples/demo/1-hello.pdl +++ b/examples/demo/1-hello.pdl @@ -1,6 +1,6 @@ description: Model call text: -- "Hello\n" +- "Hello!\n" - model: ollama_chat/granite3.2:2b parameters: stop: ["!"] diff --git a/tests/test_examples_run.py b/tests/test_examples_run.py index d3e08689a..2d811baeb 100644 --- a/tests/test_examples_run.py +++ b/tests/test_examples_run.py @@ -3,7 +3,7 @@ import pathlib import random from dataclasses import dataclass -from typing import Optional +from typing import Optional, Tuple from pytest import CaptureFixture, MonkeyPatch @@ -131,6 +131,100 @@ class InputsType: pathlib.Path("tests") / "data" / "line" / "hello9.pdl", ] +# ACTUAL_NO_ERROR indicates there was no error when running pdl.exec_file +ACTUAL_NO_ERROR = 0 +# ACTUAL_NO_ERROR indicates there was PdlParserError when running pdl.exec_file +ACTUAL_PARSE_ERROR_CODE = 1 +# ACTUAL_RUNTIME_ERROR_CODE indicates there was runtime error when running pdl.exec_file +ACTUAL_RUNTIME_ERROR_CODE = 2 + +def run_single_file(pdl_file_name: str, monkeypatch: MonkeyPatch) -> Tuple[bool, str, int]: + """ + Tests a single file + Returns: + - bool: True if runs successfully and False otherwise + - str: "" if runs succesfully and the actual results otherwise + - int: a code to indicate what kind of error occured. 0 for no error, 1 for parse error, and 2 for runtime error + """ + if pdl_file_name in TO_SKIP: + print(f"File {pdl_file_name} is part of TO_SKIP, skipping test...") + return True, "", ACTUAL_NO_ERROR + + path_obj = pathlib.Path(pdl_file_name) + scope: ScopeType = PdlDict({}) + + if pdl_file_name in TESTS_WITH_INPUT: + inputs = TESTS_WITH_INPUT[pdl_file_name] + if inputs.stdin is not None: + monkeypatch.setattr( + "sys.stdin", + io.StringIO(inputs.stdin), + ) + if inputs.scope is not None: + scope = inputs.scope + + try: + random.seed(11) + output = pdl.exec_file( + path_obj, + scope=scope, + output="all", + config=pdl.InterpreterConfig(batch=0), + ) + + actual_result = output["result"] + block_to_dict(output["trace"], json_compatible=True) + result_dir_name = ( + pathlib.Path(".") / "tests" / "results" / path_obj.parent + ) + + print(actual_result) + + # Find and compare results + if not __find_and_compare_results(path_obj, str(actual_result)): + if OLLAMA_GHACTIONS_RESULTS: + print( + f"Program {pdl_file_name} requries updating its result on GitHub Actions" + ) + print(f"Actual results: {str(actual_result)}") + result_file_name = f"{path_obj.stem}.ollama_ghactions.result" + __write_to_results_file(result_dir_name, result_file_name, str(actual_result)) + + # Evaluate the results again. If fails again, then consider this program as failing + if not __find_and_compare_results( + path_obj, str(actual_result) + ): + print( + f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!" + ) + + return False, str(actual_result), ACTUAL_NO_ERROR + else: + return True, "", ACTUAL_NO_ERROR + + if UPDATE_RESULTS: + result_file_name = ( + f"{path_obj.stem}.{str(RESULTS_VERSION)}.result" + ) + __write_to_results_file( + result_dir_name, result_file_name, str(actual_result) + ) + + return False, str(actual_result), ACTUAL_NO_ERROR + + except PDLParseError: + expected_parse_errors = set(str(p) for p in EXPECTED_PARSE_ERROR) + if pdl_file_name in expected_parse_errors: + return True, "", ACTUAL_PARSE_ERROR_CODE + return False, "", ACTUAL_PARSE_ERROR_CODE + + except Exception: + expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR) + if pdl_file_name in expected_runtime_error: + return True, "", ACTUAL_RUNTIME_ERROR_CODE + return False, "", ACTUAL_RUNTIME_ERROR_CODE + + return True, "", ACTUAL_NO_ERROR def __write_to_results_file( dir_name: pathlib.Path, filename: str, content: str @@ -162,112 +256,37 @@ def __find_and_compare_results( return True return False +def test_all_pdl_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None: -def test_valid_programs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None: - actual_parse_error: set[str] = set() - actual_runtime_error: set[str] = set() + unexpected_parse_error: set[str] = set() + unexpected_runtime_error: set[str] = set() wrong_results = {} files = pathlib.Path(".").glob("**/*.pdl") + files = [str(f) for f in files] - for pdl_file_name in files: - - scope: ScopeType = PdlDict({}) - if str(pdl_file_name) in TO_SKIP: - continue - if str(pdl_file_name) in TESTS_WITH_INPUT: - inputs = TESTS_WITH_INPUT[str(pdl_file_name)] - if inputs.stdin is not None: - monkeypatch.setattr( - "sys.stdin", - io.StringIO(inputs.stdin), - ) - if inputs.scope is not None: - scope = inputs.scope - try: - random.seed(11) - output = pdl.exec_file( - pdl_file_name, - scope=scope, - output="all", - config=pdl.InterpreterConfig(batch=0), - ) - actual_result = output["result"] + # Check if we only want to test a subset of PDL programs + # MODIFIED_PDL_FILES_ENV_VAR is a string of PDL files, comma separated + MODIFIED_PDL_FILES_ENV_VAR = os.getenv("MODIFIED_PDL_FILES", "") + MODIFIED_PDL_FILES = [item.strip() for item in MODIFIED_PDL_FILES_ENV_VAR.split(",")] - block_to_dict(output["trace"], json_compatible=True) - result_dir_name = ( - pathlib.Path(".") / "tests" / "results" / pdl_file_name.parent - ) + if len(MODIFIED_PDL_FILES) > 0: + print("Only testing a subset of PDL programs, particularly newly added examples or PDL files that were modified.") + files = MODIFIED_PDL_FILES - if not __find_and_compare_results(pdl_file_name, str(actual_result)): + for pdl_file_name in files: - if OLLAMA_GHACTIONS_RESULTS: - print( - f"Program {str(pdl_file_name)} requries updating its result on GitHub Actions" - ) - print(f"Actual results: {str(actual_result)}") - result_file_name = f"{pdl_file_name.stem}.ollama_ghactions.result" - __write_to_results_file( - result_dir_name, result_file_name, str(actual_result) - ) + pdl_file_name_str = str(pdl_file_name) + successful, actual_results, error_code = run_single_file(pdl_file_name_str, monkeypatch) - # Evaluate the results again. If fails again, then consider this program as failing - if not __find_and_compare_results( - pdl_file_name, str(actual_result) - ): - print( - f"Program {str(pdl_file_name)} failed second time even after generating results from Github Actions. Consider this failing!" - ) - wrong_results[str(pdl_file_name)] = { - "actual": str(actual_result), - } - # If evaluating results produces correct result, then this is considered passing - else: - continue - - if UPDATE_RESULTS: - result_file_name = ( - f"{pdl_file_name.stem}.{str(RESULTS_VERSION)}.result" - ) - __write_to_results_file( - result_dir_name, result_file_name, str(actual_result) - ) + if not successful: + if error_code == ACTUAL_PARSE_ERROR_CODE: + unexpected_parse_error |= {pdl_file_name_str} + elif error_code == ACTUAL_RUNTIME_ERROR_CODE: + unexpected_runtime_error |= {pdl_file_name_str} + else: + wrong_results[pdl_file_name_str] = actual_results - wrong_results[str(pdl_file_name)] = { - "actual": str(actual_result), - } - except PDLParseError: - actual_parse_error |= {str(pdl_file_name)} - except Exception as exc: - if str(pdl_file_name) not in set(str(p) for p in EXPECTED_RUNTIME_ERROR): - print(f"{pdl_file_name}: {exc}") # unexpected error: breakpoint - actual_runtime_error |= {str(pdl_file_name)} - print(exc) - - # Parse errors - expected_parse_error = set(str(p) for p in EXPECTED_PARSE_ERROR) - unexpected_parse_error = sorted(list(actual_parse_error - expected_parse_error)) - assert ( - len(unexpected_parse_error) == 0 - ), f"Unexpected parse error: {unexpected_parse_error}" - - # Runtime errors - expected_runtime_error = set(str(p) for p in EXPECTED_RUNTIME_ERROR) - unexpected_runtime_error = sorted( - list(actual_runtime_error - expected_runtime_error) - ) - assert ( - len(unexpected_runtime_error) == 0 - ), f"Unexpected runtime error: {unexpected_runtime_error}" - - # Unexpected valid - unexpected_valid = sorted( - list( - (expected_parse_error - actual_parse_error).union( - expected_runtime_error - actual_runtime_error - ) - ) - ) - assert len(unexpected_valid) == 0, f"Unexpected valid: {unexpected_valid}" - # Unexpected results + assert len(unexpected_parse_error) == 0, f"Unexpected parse error: {unexpected_parse_error}" + assert len(unexpected_runtime_error) == 0, f"Unexpected runtime error: {unexpected_runtime_error}" assert len(wrong_results) == 0, f"Wrong results: {wrong_results}" diff --git a/tests/test_examples_run.yaml b/tests/test_examples_run.yaml new file mode 100644 index 000000000..4ef1cc8d7 --- /dev/null +++ b/tests/test_examples_run.yaml @@ -0,0 +1,113 @@ +# Configuration file for test_examples_run.py +update_results: False +results_version: 1 + +# A subset of files to check, useful for local dev +# If empty, check all files +# check accepts a list of strings +check: + # - examples/chatbot/chatbot.pdl + # - examples/tutorial/programs/chatbot.pdl # with inputs + # - examples/rag/pdf_index.pdl # skip + # - tests/data/line/hello3.pdl # parse error + # - examples/callback/repair_prompt.pdl # runtime error + +# Files to skip +skip: + # Requires dataset dependency + - examples/cldk/cldk-assistant.pdl + - examples/gsm8k/gsm8.pdl + - examples/gsm8k/gsm8k-plan.pdl + + # Requires installation dependencies + - examples/intrinsics/demo-hallucination.pdl + - examples/tutorial/programs/demo-hallucination.pdl + + # Skip RAG examples + - examples/rag/pdf_index.pdl + - examples/rag/pdf_query.pdl + - examples/rag/rag_library1.pdl + + # Skip structure decoding examples for now + - examples/tutorial/structured_decoding.pdl + + # Output result include trace (and thus timing) for some reason. Investigate why + - examples/react/react_call.pdl + + # UI examples + - pdl-live-react/demos/error.pdl + - pdl-live-react/demos/demo1.pdl + - pdl-live-react/demos/demo2.pdl + + # Granite-io examples + - examples/granite-io/granite_io_hallucinations.pdl + - examples/granite-io/granite_io_openai.pdl + - examples/granite-io/granite_io_thinking.pdl + - examples/granite-io/granite_io_transformers.pdl + +# Files that require input +with_inputs: + examples/tutorial/programs/chatbot.pdl: + stdin: "What is APR?\nyes\n" + scope: null + examples/chatbot/chatbot.pdl: + stdin: "What is APR?\nyes\n" + scope: null + examples/demo/7-chatbot-roles.pdl: + stdin: "What is APR?\nquit\n" + scope: null + examples/tutorial/input_stdin.pdl: + stdin: "What is APR?\nyes\n" + scope: null + examples/tutorial/input_stdin_multiline.pdl: + stdin: "Hello\nBye\n" + scope: null + examples/input/input_test1.pdl: + stdin: "Hello\n" + scope: null + examples/input/input_test2.pdl: + stdin: "Hello\n" + scope: null + examples/tutorial/free_variables.pdl: + stdin: null + scope: { + "something": "ABC" + } + +# Files expecting PDL parse error +expected_parse_error: + - tests/data/line/hello.pdl + - tests/data/line/hello1.pdl + - tests/data/line/hello4.pdl + - tests/data/line/hello7.pdl + - tests/data/line/hello8.pdl + - tests/data/line/hello10.pdl + - tests/data/line/hello11.pdl + - tests/data/line/hello31.pdl + +# Files expecting runtime error +expected_runtime_error: + - examples/callback/repair_prompt.pdl + - examples/tutorial/type_list.pdl + - examples/tutorial/type_checking.pdl + - tests/data/line/hello3.pdl + - tests/data/line/hello9.pdl + - tests/data/line/hello12.pdl + - tests/data/line/hello13.pdl + - tests/data/line/hello14.pdl + - tests/data/line/hello15.pdl + - tests/data/line/hello16.pdl + - tests/data/line/hello17.pdl + - tests/data/line/hello18.pdl + - tests/data/line/hello19.pdl + - tests/data/line/hello20.pdl + - tests/data/line/hello21.pdl + - tests/data/line/hello22.pdl + - tests/data/line/hello23.pdl + - tests/data/line/hello24.pdl + - tests/data/line/hello25.pdl + - tests/data/line/hello26.pdl + - tests/data/line/hello27.pdl + - tests/data/line/hello28.pdl + - tests/data/line/hello29.pdl + - tests/data/line/hello30.pdl diff --git a/tests/test_examples_run_new.py b/tests/test_examples_run_new.py new file mode 100644 index 000000000..6ecbee905 --- /dev/null +++ b/tests/test_examples_run_new.py @@ -0,0 +1,302 @@ +from enum import Enum +import io +import os +import pathlib +import random +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Tuple + +from pytest import CaptureFixture, MonkeyPatch +import yaml + +from pdl import pdl +from pdl.pdl_ast import ScopeType +from pdl.pdl_dumper import block_to_dict +from pdl.pdl_lazy import PdlDict +from pdl.pdl_parser import PDLParseError + +EXAMPLES_RUN_FILE = os.getenv("EXAMPLES_RUN_FILE", "tests/test_examples_run.yaml") + +@dataclass +class InputsType: + """ + Inputs to the PDL program for testing + """ + stdin: Optional[str] = None + scope: Optional[ScopeType] = None + +class ExecutionErrorCode(Enum): + """ + PDLExecutionErrorCode describes the execution result of the PDL file + """ + NO_ERROR = 1 + PARSE_ERROR = 2 + RUNTIME_ERROR = 3 + +@dataclass +class ExecutionResult: + """ + ExecutionResult captures the execution result of a PDL file + """ + result: str | None = None + error_code: ExecutionErrorCode | None = None + +@dataclass +class ExpectedResult: + """ + ExpectedResult captures the expected result of a PDL file. + Non-deterministic programs contain more than one valid result + """ + results: List[str] | None = None + error_code: ExecutionErrorCode | None = None + + def compare_to_execution(self, execution_result: ExecutionResult) -> bool: + """ + Returns true if execution matches to expected results and false otherwise + """ + + # ExecutionErrorCode codes must match + if execution_result.error_code != self.error_code: + return False + + # Check if parse or runtime error + if self.error_code == ExecutionErrorCode.PARSE_ERROR: + return execution_result.error_code == ExecutionErrorCode.PARSE_ERROR + elif self.error_code == ExecutionErrorCode.RUNTIME_ERROR: + return execution_result.error_code == ExecutionErrorCode.RUNTIME_ERROR + + # At this point, it's NO_ERROR, so check for results + actual_result = execution_result.result + if actual_result is not None and self.results is not None: + actual_result_stripped = actual_result.strip() + for expected_result in self.results: + expected_result_stripped = expected_result.strip() + if actual_result_stripped == expected_result_stripped: + return True + return False + +@dataclass +class FailedResults: + """ + FailedResults are all the files that failed + """ + + wrong_results: Dict[str, str] = field(default_factory=lambda: {}) + unexpected_parse_error: List[str] = field(default_factory=lambda: []) + unexpected_runtime_error: List[str] = field(default_factory=lambda: []) + + + +class ExamplesRun: + """ + ExamplesRun outlines PDL files + - to skip + - requires inputs + - expects parse error + - expects runtime error + by loading the configuration from EXAMPLES_RUN_FILE + and runs the test + """ + def __init__(self, monkeypatch: MonkeyPatch) -> None: + # Pytest + self.monkeypatch = monkeypatch + + # Configuration + self.update_results: bool = False + self.results_version: int = 0 + + # File manipulation + self.check = [str(f) for f in pathlib.Path(".").glob("**/*.pdl")] + self.skip: List[str] = [] + self.with_inputs: Dict[str, InputsType] = {} + self.expected_parse_error: List[str] = [] + self.expected_runtime_error: List[str] = [] + + # Load content from EXAMPLES_RUN_FILE + with open(EXAMPLES_RUN_FILE, 'r') as file: + content = yaml.safe_load(file) + self.update_results = content['update_results'] + self.results_version = content['results_version'] + + # Update files to check iff check is specified + if content['check'] is not None: + self.check = content['check'] + + self.skip = content['skip'] + self.expected_parse_error = content['expected_parse_error'] + self.expected_runtime_error = content['expected_runtime_error'] + + for filename, inputs_type in content['with_inputs'].items(): + stdin = inputs_type['stdin'] + scope = inputs_type['scope'] + self.with_inputs[filename] = InputsType( + stdin=stdin, + scope=PdlDict(scope) if scope is not None else None + ) + + # Inits expected results + self.expected_results: Dict[str, ExpectedResult] = {} + self.__collect_expected_results() + + # Inits execution results for each PDL file + self.execution_results: Dict[str, ExecutionResult] = {} + + # Init failed results + self.failed_results = FailedResults() + + def __get_results_dir(self) -> pathlib.Path: + return pathlib.Path(".") / "tests" / "results" + + def __collect_expected_results(self) -> None: + """ + Collects possible results for programs in self.check + """ + + for file in self.check: + expected_result = ExpectedResult() + + if file in self.expected_parse_error: + expected_result.error_code = ExecutionErrorCode.PARSE_ERROR + elif file in self.expected_runtime_error: + expected_result.error_code = ExecutionErrorCode.RUNTIME_ERROR + else: + + # Collect possible results + res_list = [] + file_path: pathlib.Path = pathlib.Path(file) + result_dir_name = self.__get_results_dir() / file_path.parent + expected_files = result_dir_name.glob(file_path.stem + ".*.result") + + for expected_file in expected_files: + with open(expected_file, "r", encoding="utf-8") as truth_file: + content = truth_file.read() + res_list.append(content) + + expected_result.error_code = ExecutionErrorCode.NO_ERROR + expected_result.results = res_list + + self.expected_results[file] = expected_result + + def __execute_file(self, pdl_file_name: str) -> None: + """ + Tests the result of a single file and returns the result output and the error code + """ + + exec_result = ExecutionResult() + + pdl_file_path = pathlib.Path(pdl_file_name) + scope: ScopeType = PdlDict({}) + + # Patch with inputs + if pdl_file_name in self.with_inputs: + inputs = self.with_inputs[pdl_file_name] + if inputs.stdin is not None: + self.monkeypatch.setattr( + "sys.stdin", + io.StringIO(inputs.stdin)) + if inputs.scope is not None: + scope = inputs.scope + + try: + # Execute file + output = pdl.exec_file( + pdl_file_path, + scope=scope, + output="all", + config=pdl.InterpreterConfig(batch=0), + ) + + exec_result.result = str(output["result"]) + exec_result.error_code = ExecutionErrorCode.NO_ERROR + + except PDLParseError: + exec_result.error_code = ExecutionErrorCode.PARSE_ERROR + except Exception: + exec_result.error_code = ExecutionErrorCode.RUNTIME_ERROR + + self.execution_results[pdl_file_name] = exec_result + + def populate_exec_result_for_checks(self) -> None: + """ + Populates the execution result for all files in self.checks + """ + + for file in self.check: + if file not in self.skip: + self.__execute_file(file) + + def validate_expected_and_actual(self) -> None: + """ + Validates the expected result to actual result + Must be run after populate_exec_result_for_checks + """ + + wrong_result: Dict[str, str] = {} + unexpected_parse_error: List[str] = [] + unexpected_runtime_error: List[str] = [] + + for file in self.check: + if file not in self.skip: + expected_result = self.expected_results[file] + actual_result = self.execution_results[file] + + match = expected_result.compare_to_execution(actual_result) + + if not match: + # Check if actual results caused any error + if actual_result.error_code == ExecutionErrorCode.PARSE_ERROR: + unexpected_parse_error.append(file) + elif actual_result.error_code == ExecutionErrorCode.RUNTIME_ERROR: + unexpected_runtime_error.append(file) + # If no error, then the results are wrong + else: + if actual_result.result is not None: + wrong_result[file] = actual_result.result + + # print(f"{actual_result.result=}") + # print(f"{expected_result.results=}") + + self.failed_results.wrong_results = wrong_result + self.failed_results.unexpected_parse_error = unexpected_parse_error + self.failed_results.unexpected_runtime_error = unexpected_runtime_error + + def write_results(self) -> None: + """ + Writes new results for failed files + """ + + results_dir = self.__get_results_dir() + for file in self.failed_results.wrong_results: + # Mkdir if not exist + results_dir.mkdir(parents=True, exist_ok=True) + + # Write to new file + actual_result = self.failed_results.wrong_results[file] + file_path = results_dir / pathlib.Path(file) + write_file_name = f"{file_path.stem}.{str(self.results_version)}.result" + + with open(write_file_name, 'w', encoding="utf-8") as f: + f.write(actual_result) + +def test_example_runs(capsys: CaptureFixture[str], monkeypatch: MonkeyPatch) -> None: + """ + Runs the test + """ + + random.seed(11) + background = ExamplesRun(monkeypatch) + + background.populate_exec_result_for_checks() + background.validate_expected_and_actual() + + if background.update_results: + background.write_results() + + # Print the wrong results + for file, actual in background.failed_results.wrong_results.items(): + print(f"File that produced wrong result: {file}") + print(f"Actual:\n{actual}\n") + + assert len(background.failed_results.unexpected_parse_error) == 0, f"Unexpected parse error: {background.failed_results.unexpected_parse_error}" + assert len(background.failed_results.unexpected_runtime_error) == 0, f"Unexpected runtime error: {background.failed_results.unexpected_runtime_error}" + assert len(background.failed_results.wrong_results) == 0, f"Wrong results: {background.failed_results.wrong_results}"