Skip to content

Commit 7f2167a

Browse files
committed
integrate testbench as E2E replay test
1 parent e9539c1 commit 7f2167a

File tree

5 files changed

+145
-3
lines changed

5 files changed

+145
-3
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: end-to-end-test
2+
3+
on:
4+
pull_request:
5+
workflow_dispatch:
6+
7+
jobs:
8+
tracer-replay-testbench:
9+
runs-on: ubuntu-latest
10+
env:
11+
CODEFLASH_AIS_SERVER: prod
12+
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
13+
CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
14+
COLUMNS: 110
15+
MAX_RETRIES: 3
16+
RETRY_DELAY: 5
17+
EXPECTED_IMPROVEMENT_PCT: 10
18+
CODEFLASH_END_TO_END: 1
19+
steps:
20+
- uses: actions/checkout@v4
21+
with:
22+
fetch-depth: 0
23+
token: ${{ secrets.GITHUB_TOKEN }}
24+
25+
- name: Set up Python 3.11 for CLI
26+
uses: astral-sh/setup-uv@v5
27+
with:
28+
python-version: 3.11.6
29+
30+
- name: Install dependencies (CLI)
31+
run: |
32+
uv tool install poetry
33+
uv venv
34+
source .venv/bin/activate
35+
poetry install --with dev
36+
37+
- name: Run Codeflash to optimize code
38+
id: optimize_code
39+
run: |
40+
source .venv/bin/activate
41+
poetry run python tests/scripts/end_to_end_test_tracer_replay_testbench.py
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from concurrent.futures import ThreadPoolExecutor
2+
3+
def funcA(number):
4+
k = 0
5+
for i in range(number * 100):
6+
k += i
7+
# Simplify the for loop by using sum with a range object
8+
j = sum(range(number))
9+
10+
# Use a generator expression directly in join for more efficiency
11+
return " ".join(str(i) for i in range(number))
12+
13+
14+
def test_threadpool() -> None:
15+
pool = ThreadPoolExecutor(max_workers=3)
16+
args = list(range(10, 31, 10))
17+
result = pool.map(funcA, args)
18+
19+
for r in result:
20+
print(r)
21+
22+
23+
if __name__ == "__main__":
24+
test_threadpool()

tests/scripts/end_to_end_test_tracer_replay.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77
def run_test(expected_improvement_pct: int) -> bool:
88
config = TestConfig(
99
trace_mode=True,
10+
trace_load="workload",
1011
min_improvement_x=0.1,
1112
expected_unit_tests=1,
1213
coverage_expectations=[
13-
CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[2, 3, 4, 6, 9])
14+
CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[2, 3, 4, 6, 9]),
1415
],
1516
)
1617
cwd = (
1718
pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e"
1819
).resolve()
1920
return run_codeflash_command(cwd, config, expected_improvement_pct)
2021

21-
2222
if __name__ == "__main__":
2323
exit(run_with_retries(run_test, int(os.getenv("EXPECTED_IMPROVEMENT_PCT", 10))))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import os
2+
import pathlib
3+
4+
from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries
5+
6+
7+
def run_test(expected_improvement_pct: int) -> bool:
8+
config = TestConfig(
9+
trace_mode=True,
10+
trace_load="testbench",
11+
min_improvement_x=0.1,
12+
expected_unit_tests=1,
13+
coverage_expectations=[
14+
CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[4, 5, 6, 8, 11])
15+
],
16+
)
17+
cwd = (
18+
pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e"
19+
).resolve()
20+
return run_codeflash_command(cwd, config, expected_improvement_pct)
21+
22+
23+
24+
if __name__ == "__main__":
25+
exit(run_with_retries(run_test, int(os.getenv("EXPECTED_IMPROVEMENT_PCT", 10))))

tests/scripts/end_to_end_test_utilities.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class TestConfig:
2525
expected_unit_tests: Optional[int] = None
2626
min_improvement_x: float = 0.1
2727
trace_mode: bool = False
28+
trace_load: str = "workload"
2829
coverage_expectations: list[CoverageExpectation] = field(default_factory=list)
2930

3031

@@ -80,7 +81,10 @@ def run_codeflash_command(
8081
) -> bool:
8182
logging.basicConfig(level=logging.INFO)
8283
if config.trace_mode:
83-
return run_trace_test(cwd, config, expected_improvement_pct)
84+
if config.trace_load == "workload":
85+
return run_trace_test(cwd, config, expected_improvement_pct)
86+
if config.trace_load == "testbench":
87+
return run_trace_test2(cwd, config, expected_improvement_pct)
8488

8589
path_to_file = cwd / config.file_path
8690
file_contents = path_to_file.read_text("utf-8")
@@ -228,6 +232,54 @@ def run_trace_test(cwd: pathlib.Path, config: TestConfig, expected_improvement_p
228232
return validate_output(stdout, return_code, expected_improvement_pct, config)
229233

230234

235+
def run_trace_test2(cwd: pathlib.Path, config: TestConfig, expected_improvement_pct: int) -> bool:
236+
# First command: Run the tracer
237+
test_root = cwd / "tests" / (config.test_framework or "")
238+
clear_directory(test_root)
239+
command = ["python", "-m", "codeflash.tracer", "-o", "codeflash.trace", "testbench.py"]
240+
process = subprocess.Popen(
241+
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
242+
)
243+
244+
output = []
245+
for line in process.stdout:
246+
logging.info(line.strip())
247+
output.append(line)
248+
249+
return_code = process.wait()
250+
stdout = "".join(output)
251+
252+
if return_code != 0:
253+
logging.error(f"Tracer command returned exit code {return_code}")
254+
return False
255+
256+
functions_traced = re.search(r"Traced (\d+) function calls successfully and replay test created at - (.*)$", stdout)
257+
if not functions_traced or int(functions_traced.group(1)) != 5:
258+
logging.error("Expected 5 traced functions")
259+
return False
260+
261+
replay_test_path = pathlib.Path(functions_traced.group(2))
262+
if not replay_test_path.exists():
263+
logging.error(f"Replay test file missing at {replay_test_path}")
264+
return False
265+
266+
# Second command: Run optimization
267+
command = ["python", "../../../codeflash/main.py", "--replay-test", str(replay_test_path), "--no-pr"]
268+
process = subprocess.Popen(
269+
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
270+
)
271+
272+
output = []
273+
for line in process.stdout:
274+
logging.info(line.strip())
275+
output.append(line)
276+
277+
return_code = process.wait()
278+
stdout = "".join(output)
279+
280+
return validate_output(stdout, return_code, expected_improvement_pct, config)
281+
282+
231283
def run_with_retries(test_func, *args, **kwargs) -> bool:
232284
max_retries = int(os.getenv("MAX_RETRIES", 3))
233285
retry_delay = int(os.getenv("RETRY_DELAY", 5))

0 commit comments

Comments
 (0)