Skip to content

Commit b3de342

Browse files
anvacaruehildenb
andauthored
Update to conformance test suite (#2742)
* Create new pytest harness for execution-spec-tests * refactor duplicated code * refactor duplicated code II * add pyupgrade suggestions * formatting * remove execution-spec-tests/slow.llvm * apply review suggestions * Update Makefile Co-authored-by: Everett Hildenbrandt <[email protected]> --------- Co-authored-by: Everett Hildenbrandt <[email protected]>
1 parent d9842f7 commit b3de342

File tree

11 files changed

+2951
-84
lines changed

11 files changed

+2951
-84
lines changed

.github/workflows/test-pr.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ jobs:
7272
run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-integration'
7373
- name: 'Test conformance'
7474
run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-conformance'
75+
- name: 'Test execution-spec-tests'
76+
run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-fixtures'
7577
- name: 'Test llvm krun'
7678
run: docker exec -u github-user kevm-ci-concrete-${{ github.sha }} /bin/bash -c 'make test-interactive'
7779
- name: 'Tear down Docker'

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
/tests/specs/opcodes/evm-optimizations-spec.md
1818
/tests/specs/**/*.prove.out
1919
/tests/specs/**/*.sol.json
20+
/tests/execution-spec-tests/fixtures
2021
/tests/vm/*.out
2122
.DS_Store
2223
.idea/

Makefile

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ POETRY := poetry -C $(KEVM_PYK_DIR)
1515
POETRY_RUN := $(POETRY) run --
1616

1717

18-
.PHONY: poetry-env
18+
.PHONY: poetry-env download-json-fixtures
1919
poetry-env:
2020
$(POETRY) env use --no-cache $(PYTHON_BIN)
2121

@@ -51,6 +51,24 @@ conformance-failing-list: poetry
5151
sed -i '1{/^[[:space:]]*$$/d;}' tests/failing.llvm ;\
5252
fi
5353

54+
download-json-fixtures:
55+
rm -rf tests/execution-spec-tests/fixtures
56+
cd tests/execution-spec-tests && bash get_execution_spec_tests.sh
57+
58+
test-fixtures: poetry download-json-fixtures
59+
$(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_execution_spec_tests.py"
60+
61+
fixtures-failing-list: poetry download-json-fixtures
62+
cat /dev/null > tests/ethereum-sepc-tests/failing.llvm
63+
- $(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_execution_spec_tests.py --save-failing --maxfail=10000"
64+
LC_ALL=en_US.UTF-8 sort -f -d -o tests/execution-spec-tests/failing.llvm tests/execution-spec-tests/failing.llvm
65+
if [ "$(shell uname)" = "Darwin" ]; then \
66+
sed -i '' '1{/^[[:space:]]*$$/d;}' tests/ethereum-sepc-tests/failing.llvm ;\
67+
echo >> tests/ethereum-sepc-tests/failing.llvm ;\
68+
else \
69+
sed -i '1{/^[[:space:]]*$$/d;}' tests/ethereum-sepc-tests/failing.llvm ;\
70+
fi
71+
5472
test-vm: poetry
5573
$(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+="-k test_vm"
5674

kevm-pyk/src/kevm_pyk/gst_to_kore.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
'chainname',
3939
'lastblockhash',
4040
'hasBigInt',
41+
'config',
4142
]
4243
)
4344
_GST_LOAD_KEYS: Final = frozenset(

kevm-pyk/src/kevm_pyk/interpreter.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,8 @@
1515

1616
def interpret(gst_data: Any, schedule: str, mode: str, chainid: int, usegas: bool, *, check: bool = True) -> Pattern:
1717
"""Interpret the given GST data using the LLVM backend."""
18+
if 'config' in gst_data.keys():
19+
schedule = gst_data['config']['network'].upper()
20+
chainid = int(gst_data['config']['network'], 16)
1821
init_kore = gst_to_kore(filter_gst_keys(gst_data), schedule, mode, chainid, usegas)
1922
return llvm_interpret(kdist.get('evm-semantics.llvm'), init_kore, check=check)
Lines changed: 49 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,17 @@
11
from __future__ import annotations
22

3-
import csv
4-
import json
53
import logging
64
import sys
7-
from pathlib import Path
85
from typing import TYPE_CHECKING
96

107
import pytest
11-
from pyk.kdist import kdist
12-
from pyk.kore.prelude import int_dv
13-
from pyk.kore.syntax import App
14-
from pyk.kore.tools import PrintOutput, kore_print
158

16-
from kevm_pyk.interpreter import interpret
17-
18-
from ..utils import REPO_ROOT
9+
from ..utils import REPO_ROOT, _skipped_tests, _test
1910

2011
if TYPE_CHECKING:
12+
from pathlib import Path
2113
from typing import Final
2214

23-
from pyk.kore.syntax import Pattern
24-
2515

2616
_LOGGER: Final = logging.getLogger(__name__)
2717

@@ -34,76 +24,12 @@
3424
SLOW_TESTS_FILE: Final = REPO_ROOT / 'tests/slow.llvm'
3525

3626

37-
def _test(gst_file: Path, *, schedule: str, mode: str, usegas: bool, save_failing: bool) -> None:
38-
skipped_gst_tests = SKIPPED_TESTS.get(gst_file, [])
39-
if '*' in skipped_gst_tests:
40-
pytest.skip()
41-
42-
failing_tests: list[str] = []
43-
gst_file_relative_path: Final[str] = str(gst_file.relative_to(TEST_DIR))
44-
chainid = 0 if gst_file_relative_path in TEST_FILES_WITH_CID_0 else 1
45-
46-
with gst_file.open() as f:
47-
gst_data = json.load(f)
48-
49-
for test_name, test in gst_data.items():
50-
_LOGGER.info(f'Running test: {gst_file} - {test_name}')
51-
if test_name in skipped_gst_tests:
52-
continue
53-
res = interpret({test_name: test}, schedule, mode, chainid, usegas, check=False)
54-
55-
try:
56-
_assert_exit_code_zero(res)
57-
except AssertionError:
58-
if not save_failing:
59-
raise
60-
failing_tests.append(test_name)
61-
62-
if not failing_tests:
63-
return
64-
if save_failing:
65-
with FAILING_TESTS_FILE.open('a', newline='') as ff:
66-
writer = csv.writer(ff)
67-
if len(failing_tests) == len(gst_data):
68-
writer.writerow([gst_file_relative_path, '*'])
69-
else:
70-
for test_name in sorted(failing_tests):
71-
writer.writerow([gst_file_relative_path, test_name])
72-
raise AssertionError(f'Found failing tests in GST file {gst_file_relative_path}: {failing_tests}')
73-
74-
75-
def _assert_exit_code_zero(pattern: Pattern) -> None:
76-
assert type(pattern) is App
77-
kevm_cell = pattern.args[0]
78-
assert type(kevm_cell) is App
79-
exit_code_cell = kevm_cell.args[1]
80-
assert type(exit_code_cell) is App
81-
82-
exit_code = exit_code_cell.args[0]
83-
if exit_code == int_dv(0):
84-
return
85-
86-
pretty = kore_print(pattern, definition_dir=kdist.get('evm-semantics.llvm'), output=PrintOutput.PRETTY)
87-
assert pretty == GOLDEN
88-
89-
90-
def _skipped_tests() -> dict[Path, list[str]]:
91-
slow_tests = read_csv_file(SLOW_TESTS_FILE)
92-
failing_tests = read_csv_file(FAILING_TESTS_FILE)
93-
skipped: dict[Path, list[str]] = {}
94-
for test_file, test in slow_tests + failing_tests:
95-
test_file = TEST_DIR / test_file
96-
skipped.setdefault(test_file, []).append(test)
97-
return skipped
98-
27+
SKIPPED_TESTS: Final = _skipped_tests(TEST_DIR, SLOW_TESTS_FILE, FAILING_TESTS_FILE)
9928

100-
def read_csv_file(csv_file: Path) -> tuple[tuple[Path, str], ...]:
101-
with csv_file.open(newline='') as file:
102-
reader = csv.reader(file)
103-
return tuple((Path(row[0]), row[1]) for row in reader)
10429

30+
def compute_chain_id(gst_file: str) -> int:
31+
return 0 if gst_file in TEST_FILES_WITH_CID_0 else 1
10532

106-
SKIPPED_TESTS: Final = _skipped_tests()
10733

10834
VM_TEST_DIR: Final = TEST_DIR / 'BlockchainTests/GeneralStateTests/VMTests'
10935
VM_TESTS: Final = tuple(VM_TEST_DIR.glob('*/*.json'))
@@ -116,7 +42,17 @@ def read_csv_file(csv_file: Path) -> tuple[tuple[Path, str], ...]:
11642
ids=[str(test_file.relative_to(VM_TEST_DIR)) for test_file in VM_TESTS],
11743
)
11844
def test_vm(test_file: Path, save_failing: bool) -> None:
119-
_test(test_file, schedule='DEFAULT', mode='VMTESTS', usegas=True, save_failing=save_failing)
45+
_test(
46+
test_file,
47+
schedule='DEFAULT',
48+
mode='VMTESTS',
49+
usegas=True,
50+
save_failing=save_failing,
51+
compute_chain_id=compute_chain_id,
52+
skipped_tests=SKIPPED_TESTS,
53+
test_dir=TEST_DIR,
54+
failing_tests_file=FAILING_TESTS_FILE,
55+
)
12056

12157

12258
@pytest.mark.skip(reason='failing / slow VM tests')
@@ -126,7 +62,17 @@ def test_vm(test_file: Path, save_failing: bool) -> None:
12662
ids=[str(test_file.relative_to(VM_TEST_DIR)) for test_file in SKIPPED_VM_TESTS],
12763
)
12864
def test_rest_vm(test_file: Path, save_failing: bool) -> None:
129-
_test(test_file, schedule='DEFAULT', mode='VMTESTS', usegas=True, save_failing=save_failing)
65+
_test(
66+
test_file,
67+
schedule='DEFAULT',
68+
mode='VMTESTS',
69+
usegas=True,
70+
save_failing=save_failing,
71+
compute_chain_id=compute_chain_id,
72+
skipped_tests=SKIPPED_TESTS,
73+
test_dir=TEST_DIR,
74+
failing_tests_file=FAILING_TESTS_FILE,
75+
)
13076

13177

13278
ALL_TEST_DIR: Final = TEST_DIR / 'BlockchainTests/GeneralStateTests'
@@ -141,7 +87,17 @@ def test_rest_vm(test_file: Path, save_failing: bool) -> None:
14187
ids=[str(test_file.relative_to(ALL_TEST_DIR)) for test_file in BCHAIN_TESTS],
14288
)
14389
def test_bchain(test_file: Path, save_failing: bool) -> None:
144-
_test(test_file, schedule='CANCUN', mode='NORMAL', usegas=True, save_failing=save_failing)
90+
_test(
91+
test_file,
92+
schedule='CANCUN',
93+
mode='NORMAL',
94+
usegas=True,
95+
save_failing=save_failing,
96+
compute_chain_id=compute_chain_id,
97+
skipped_tests=SKIPPED_TESTS,
98+
test_dir=TEST_DIR,
99+
failing_tests_file=FAILING_TESTS_FILE,
100+
)
145101

146102

147103
@pytest.mark.skip(reason='failing / slow blockchain tests')
@@ -151,4 +107,14 @@ def test_bchain(test_file: Path, save_failing: bool) -> None:
151107
ids=[str(test_file.relative_to(ALL_TEST_DIR)) for test_file in SKIPPED_BCHAIN_TESTS],
152108
)
153109
def test_rest_bchain(test_file: Path, save_failing: bool) -> None:
154-
_test(test_file, schedule='CANCUN', mode='NORMAL', usegas=True, save_failing=save_failing)
110+
_test(
111+
test_file,
112+
schedule='CANCUN',
113+
mode='NORMAL',
114+
usegas=True,
115+
save_failing=save_failing,
116+
compute_chain_id=compute_chain_id,
117+
skipped_tests=SKIPPED_TESTS,
118+
test_dir=TEST_DIR,
119+
failing_tests_file=FAILING_TESTS_FILE,
120+
)
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import sys
5+
from typing import TYPE_CHECKING
6+
7+
import pytest
8+
9+
from ..utils import REPO_ROOT, _skipped_tests, _test
10+
11+
if TYPE_CHECKING:
12+
from pathlib import Path
13+
from typing import Final
14+
15+
16+
_LOGGER: Final = logging.getLogger(__name__)
17+
18+
sys.setrecursionlimit(10**8)
19+
20+
WORK_DIR: Final = REPO_ROOT / 'tests/execution-spec-tests'
21+
TEST_DIR: Final = WORK_DIR / 'fixtures'
22+
FAILING_TESTS_FILE: Final = WORK_DIR / 'failing.llvm'
23+
SLOW_TESTS_FILE: Final = WORK_DIR / 'slow.llvm'
24+
25+
SKIPPED_TESTS: Final = _skipped_tests(TEST_DIR, SLOW_TESTS_FILE, FAILING_TESTS_FILE)
26+
27+
28+
BCHAIN_TEST_DIR: Final = TEST_DIR / 'blockchain_tests'
29+
BCHAIN_TESTS: Final = tuple(BCHAIN_TEST_DIR.rglob('**/*.json'))
30+
31+
32+
def chain_id_always_one(_file: str) -> int:
33+
return 1
34+
35+
36+
@pytest.mark.parametrize(
37+
'test_file',
38+
BCHAIN_TESTS,
39+
ids=[str(test_file.relative_to(BCHAIN_TEST_DIR)) for test_file in BCHAIN_TESTS],
40+
)
41+
def test_bchain(test_file: Path, save_failing: bool) -> None:
42+
_test(
43+
test_file,
44+
schedule='CANCUN',
45+
mode='NORMAL',
46+
usegas=True,
47+
save_failing=save_failing,
48+
compute_chain_id=chain_id_always_one,
49+
skipped_tests=SKIPPED_TESTS,
50+
test_dir=TEST_DIR,
51+
failing_tests_file=FAILING_TESTS_FILE,
52+
)
53+
54+
55+
BCHAIN_ENGINE_TEST_DIR: Final = TEST_DIR / 'blockchain_tests_engine'
56+
BCHAIN_ENGINE_TESTS: Final = tuple(BCHAIN_ENGINE_TEST_DIR.rglob('**/*.json'))
57+
58+
59+
@pytest.mark.parametrize(
60+
'test_file',
61+
BCHAIN_ENGINE_TESTS,
62+
ids=[str(test_file.relative_to(BCHAIN_ENGINE_TEST_DIR)) for test_file in BCHAIN_ENGINE_TESTS],
63+
)
64+
def test_bchain_engine(test_file: Path, save_failing: bool) -> None:
65+
_test(
66+
test_file,
67+
schedule='CANCUN',
68+
mode='NORMAL',
69+
usegas=True,
70+
save_failing=save_failing,
71+
compute_chain_id=chain_id_always_one,
72+
skipped_tests=SKIPPED_TESTS,
73+
test_dir=TEST_DIR,
74+
failing_tests_file=FAILING_TESTS_FILE,
75+
)
76+
77+
78+
STATE_TEST_DIR: Final = TEST_DIR / 'state_tests'
79+
STATE_TESTS: Final = tuple(STATE_TEST_DIR.rglob('**/*.json'))
80+
81+
82+
@pytest.mark.parametrize(
83+
'test_file',
84+
STATE_TESTS,
85+
ids=[str(test_file.relative_to(STATE_TEST_DIR)) for test_file in STATE_TESTS],
86+
)
87+
def test_state(test_file: Path, save_failing: bool) -> None:
88+
_test(
89+
test_file,
90+
schedule='CANCUN',
91+
mode='NORMAL',
92+
usegas=True,
93+
save_failing=save_failing,
94+
compute_chain_id=chain_id_always_one,
95+
skipped_tests=SKIPPED_TESTS,
96+
test_dir=TEST_DIR,
97+
failing_tests_file=FAILING_TESTS_FILE,
98+
)
99+
100+
101+
TRANSACTION_TEST_DIR: Final = TEST_DIR / 'transaction_tests'
102+
TRANSACTION_TESTS: Final = tuple(TRANSACTION_TEST_DIR.rglob('**/*.json'))
103+
104+
105+
@pytest.mark.parametrize(
106+
'test_file',
107+
TRANSACTION_TESTS,
108+
ids=[str(test_file.relative_to(TRANSACTION_TEST_DIR)) for test_file in TRANSACTION_TESTS],
109+
)
110+
def test_transaction(test_file: Path, save_failing: bool) -> None:
111+
_test(
112+
test_file,
113+
schedule='CANCUN',
114+
mode='NORMAL',
115+
usegas=True,
116+
save_failing=save_failing,
117+
compute_chain_id=chain_id_always_one,
118+
skipped_tests=SKIPPED_TESTS,
119+
test_dir=TEST_DIR,
120+
failing_tests_file=FAILING_TESTS_FILE,
121+
)

0 commit comments

Comments
 (0)