Skip to content

Commit 96dd780

Browse files
committed
new end to end test for benchmarking bubble sort
1 parent 217e239 commit 96dd780

File tree

4 files changed

+76
-6
lines changed

4 files changed

+76
-6
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: end-to-end-test
2+
3+
on:
4+
pull_request:
5+
workflow_dispatch:
6+
7+
jobs:
8+
benchmark-bubble-sort-optimization:
9+
runs-on: ubuntu-latest
10+
env:
11+
CODEFLASH_AIS_SERVER: prod
12+
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
13+
CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }}
14+
COLUMNS: 110
15+
MAX_RETRIES: 3
16+
RETRY_DELAY: 5
17+
EXPECTED_IMPROVEMENT_PCT: 5
18+
CODEFLASH_END_TO_END: 1
19+
steps:
20+
- uses: actions/checkout@v4
21+
with:
22+
fetch-depth: 0
23+
token: ${{ secrets.GITHUB_TOKEN }}
24+
25+
- name: Set up Python 3.11 for CLI
26+
uses: astral-sh/setup-uv@v5
27+
with:
28+
python-version: 3.11.6
29+
30+
- name: Install dependencies (CLI)
31+
run: |
32+
uv tool install poetry
33+
uv venv
34+
source .venv/bin/activate
35+
poetry install --with dev
36+
37+
- name: Run Codeflash to optimize code
38+
id: optimize_code with benchmarks
39+
run: |
40+
source .venv/bin/activate
41+
poetry run python tests/scripts/end_to_end_test_benchmark_sort.py

codeflash/optimization/function_optimizer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ def determine_best_candidate(
449449
speedup_ratios[candidate.optimization_id] = perf_gain
450450

451451
tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
452+
benchmark_tree = None
452453
if speedup_critic(
453454
candidate_result, original_code_baseline.runtime, best_runtime_until_now
454455
) and quantity_of_tests_critic(candidate_result):
@@ -499,9 +500,9 @@ def determine_best_candidate(
499500
console.print(benchmark_tree)
500501
console.rule()
501502

502-
self.write_code_and_helpers(
503-
self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
504-
)
503+
self.write_code_and_helpers(
504+
self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
505+
)
505506

506507
except KeyboardInterrupt as e:
507508
self.write_code_and_helpers(
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
import pathlib
3+
4+
from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries
5+
6+
7+
def run_test(expected_improvement_pct: int) -> bool:
8+
cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve()
9+
config = TestConfig(
10+
file_path=pathlib.Path("bubble_sort.py"),
11+
function_name="sorter",
12+
benchmarks_root=cwd / "tests" / "pytest" / "benchmarks",
13+
test_framework="pytest",
14+
min_improvement_x=1.0,
15+
coverage_expectations=[
16+
CoverageExpectation(
17+
function_name="sorter", expected_coverage=100.0, expected_lines=[2, 3, 4, 5, 6, 7, 8, 9, 10]
18+
)
19+
],
20+
)
21+
22+
return run_codeflash_command(cwd, config, expected_improvement_pct)
23+
24+
25+
if __name__ == "__main__":
26+
exit(run_with_retries(run_test, int(os.getenv("EXPECTED_IMPROVEMENT_PCT", 5))))

tests/scripts/end_to_end_test_utilities.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class TestConfig:
2626
min_improvement_x: float = 0.1
2727
trace_mode: bool = False
2828
coverage_expectations: list[CoverageExpectation] = field(default_factory=list)
29+
benchmarks_root: Optional[pathlib.Path] = None
2930

3031

3132
def clear_directory(directory_path: str | pathlib.Path) -> None:
@@ -85,8 +86,8 @@ def run_codeflash_command(
8586
path_to_file = cwd / config.file_path
8687
file_contents = path_to_file.read_text("utf-8")
8788
test_root = cwd / "tests" / (config.test_framework or "")
88-
command = build_command(cwd, config, test_root)
8989

90+
command = build_command(cwd, config, test_root, config.benchmarks_root if config.benchmarks_root else None)
9091
process = subprocess.Popen(
9192
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, cwd=str(cwd), env=os.environ.copy()
9293
)
@@ -116,7 +117,7 @@ def run_codeflash_command(
116117
return validated
117118

118119

119-
def build_command(cwd: pathlib.Path, config: TestConfig, test_root: pathlib.Path) -> list[str]:
120+
def build_command(cwd: pathlib.Path, config: TestConfig, test_root: pathlib.Path, benchmarks_root:pathlib.Path|None = None) -> list[str]:
120121
python_path = "../../../codeflash/main.py" if "code_directories" in str(cwd) else "../codeflash/main.py"
121122

122123
base_command = ["python", python_path, "--file", config.file_path, "--no-pr"]
@@ -127,7 +128,8 @@ def build_command(cwd: pathlib.Path, config: TestConfig, test_root: pathlib.Path
127128
base_command.extend(
128129
["--test-framework", config.test_framework, "--tests-root", str(test_root), "--module-root", str(cwd)]
129130
)
130-
131+
if benchmarks_root:
132+
base_command.extend(["--benchmark", "--benchmarks-root", str(benchmarks_root)])
131133
return base_command
132134

133135

0 commit comments

Comments
 (0)