diff --git a/.github/workflows/e2e-async.yaml b/.github/workflows/e2e-async.yaml new file mode 100644 index 000000000..e7d08091c --- /dev/null +++ b/.github/workflows/e2e-async.yaml @@ -0,0 +1,69 @@ +name: E2E - Async + +on: + pull_request: + paths: + - '**' # Trigger for all paths + + workflow_dispatch: + +jobs: + async-optimization: + # Dynamically determine if environment is needed only when workflow files change and contributor is external + environment: ${{ (github.event_name == 'workflow_dispatch' || (contains(toJSON(github.event.pull_request.files.*.filename), '.github/workflows/') && github.event.pull_request.user.login != 'misrasaurabh1' && github.event.pull_request.user.login != 'KRRT7')) && 'external-trusted-contributors' || '' }} + + runs-on: ubuntu-latest + env: + CODEFLASH_AIS_SERVER: prod + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + CODEFLASH_API_KEY: ${{ secrets.CODEFLASH_API_KEY }} + COLUMNS: 110 + MAX_RETRIES: 3 + RETRY_DELAY: 5 + EXPECTED_IMPROVEMENT_PCT: 10 + CODEFLASH_END_TO_END: 1 + steps: + - name: 🛎️ Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Validate PR + run: | + # Check for any workflow changes + if git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}" | grep -q "^.github/workflows/"; then + echo "⚠️ Workflow changes detected." + + # Get the PR author + AUTHOR="${{ github.event.pull_request.user.login }}" + echo "PR Author: $AUTHOR" + + # Allowlist check + if [[ "$AUTHOR" == "misrasaurabh1" || "$AUTHOR" == "KRRT7" ]]; then + echo "✅ Authorized user ($AUTHOR). Proceeding." + elif [[ "${{ github.event.pull_request.state }}" == "open" ]]; then + echo "✅ PR triggered by 'pull_request_target' and is open. Assuming protection rules are in place. Proceeding." + else + echo "⛔ Unauthorized user ($AUTHOR) attempting to modify workflows. Exiting." + exit 1 + fi + else + echo "✅ No workflow file changes detected. Proceeding." + fi + + - name: Set up Python 3.11 for CLI + uses: astral-sh/setup-uv@v5 + with: + python-version: 3.11.6 + + - name: Install dependencies (CLI) + run: | + uv sync + + - name: Run Codeflash to optimize async code + id: optimize_async_code + run: | + uv run python tests/scripts/end_to_end_test_async.py \ No newline at end of file diff --git a/code_to_optimize/code_directories/async_e2e/main.py b/code_to_optimize/code_directories/async_e2e/main.py index 4470cc969..317068a1c 100644 --- a/code_to_optimize/code_directories/async_e2e/main.py +++ b/code_to_optimize/code_directories/async_e2e/main.py @@ -1,4 +1,16 @@ import time -async def fake_api_call(delay, data): - time.sleep(0.0001) - return f"Processed: {data}" \ No newline at end of file +import asyncio + + +async def retry_with_backoff(func, max_retries=3): + if max_retries < 1: + raise ValueError("max_retries must be at least 1") + last_exception = None + for attempt in range(max_retries): + try: + return await func() + except Exception as e: + last_exception = e + if attempt < max_retries - 1: + time.sleep(0.0001 * attempt) + raise last_exception diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 28c485a80..bec15fe69 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -658,6 +658,15 @@ def determine_best_candidate( ) tree.add(f"Speedup percentage: {perf_gain * 100:.1f}%") tree.add(f"Speedup ratio: {perf_gain + 1:.3f}X") + if ( + original_code_baseline.async_throughput is not None + and candidate_result.async_throughput is not None + ): + throughput_gain_value = throughput_gain( + original_throughput=original_code_baseline.async_throughput, + optimized_throughput=candidate_result.async_throughput, + ) + tree.add(f"Throughput gain: {throughput_gain_value * 100:.1f}%") console.print(tree) if self.args.benchmark and benchmark_tree: console.print(benchmark_tree) @@ -1199,6 +1208,8 @@ def find_and_process_best_optimization( function_name=function_to_optimize_qualified_name, file_path=self.function_to_optimize.file_path, benchmark_details=processed_benchmark_info.benchmark_details if processed_benchmark_info else None, + original_async_throughput=original_code_baseline.async_throughput, + best_async_throughput=best_optimization.async_throughput, ) self.replace_function_and_helpers_with_optimized_code( @@ -1284,7 +1295,7 @@ def process_review( original_throughput_str = None optimized_throughput_str = None throughput_improvement_str = None - + if ( self.function_to_optimize.is_async and original_code_baseline.async_throughput is not None @@ -1297,7 +1308,7 @@ def process_review( optimized_throughput=best_optimization.async_throughput, ) throughput_improvement_str = f"{throughput_improvement_value * 100:.1f}%" - + new_explanation_raw_str = self.aiservice_client.get_new_explanation( source_code=code_context.read_writable_code.flat, dependency_code=code_context.read_only_context_code, @@ -1324,6 +1335,8 @@ def process_review( function_name=explanation.function_name, file_path=explanation.file_path, benchmark_details=explanation.benchmark_details, + original_async_throughput=explanation.original_async_throughput, + best_async_throughput=explanation.best_async_throughput, ) self.log_successful_optimization(new_explanation, generated_tests, exp_type) @@ -1551,7 +1564,8 @@ def establish_original_code_baseline( async_throughput = calculate_function_throughput_from_test_results( benchmarking_results, self.function_to_optimize.function_name ) - logger.info(f"Original async function throughput: {async_throughput} calls/second") + logger.debug(f"Original async function throughput: {async_throughput} calls/second") + console.rule() if self.args.benchmark: replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks( diff --git a/codeflash/result/critic.py b/codeflash/result/critic.py index 938b02c27..d0ff62176 100644 --- a/codeflash/result/critic.py +++ b/codeflash/result/critic.py @@ -82,9 +82,6 @@ def speedup_critic( original_throughput=original_async_throughput, optimized_throughput=candidate_result.async_throughput ) throughput_improved = throughput_gain_value > MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD - logger.info( - f"Async throughput gain: {throughput_gain_value * 100:.1f}% (original: {original_async_throughput}, optimized: {candidate_result.async_throughput})" - ) throughput_is_best = ( best_throughput_until_now is None or candidate_result.async_throughput > best_throughput_until_now diff --git a/codeflash/result/explanation.py b/codeflash/result/explanation.py index eb12beeb6..9fa5d02a5 100644 --- a/codeflash/result/explanation.py +++ b/codeflash/result/explanation.py @@ -11,6 +11,7 @@ from codeflash.code_utils.time_utils import humanize_runtime from codeflash.models.models import BenchmarkDetail, TestResults +from codeflash.result.critic import performance_gain, throughput_gain @dataclass(frozen=True, config={"arbitrary_types_allowed": True}) @@ -23,9 +24,29 @@ class Explanation: function_name: str file_path: Path benchmark_details: Optional[list[BenchmarkDetail]] = None + original_async_throughput: Optional[int] = None + best_async_throughput: Optional[int] = None @property def perf_improvement_line(self) -> str: + runtime_improvement = self.speedup + + if ( + self.original_async_throughput is not None + and self.best_async_throughput is not None + and self.original_async_throughput > 0 + ): + throughput_improvement = throughput_gain( + original_throughput=self.original_async_throughput, + optimized_throughput=self.best_async_throughput, + ) + + # Use throughput metrics if throughput improvement is better or runtime got worse + if throughput_improvement > runtime_improvement or runtime_improvement <= 0: + throughput_pct = f"{throughput_improvement * 100:,.0f}%" + throughput_x = f"{throughput_improvement + 1:,.2f}x" + return f"{throughput_pct} improvement ({throughput_x} faster)." + return f"{self.speedup_pct} improvement ({self.speedup_x} faster)." @property @@ -45,6 +66,24 @@ def to_console_string(self) -> str: # TODO: Sometimes the explanation says something similar to "This is the code that was optimized", remove such parts original_runtime_human = humanize_runtime(self.original_runtime_ns) best_runtime_human = humanize_runtime(self.best_runtime_ns) + + # Determine if we're showing throughput or runtime improvements + runtime_improvement = self.speedup + is_using_throughput_metric = False + + if ( + self.original_async_throughput is not None + and self.best_async_throughput is not None + and self.original_async_throughput > 0 + ): + throughput_improvement = throughput_gain( + original_throughput=self.original_async_throughput, + optimized_throughput=self.best_async_throughput, + ) + + if throughput_improvement > runtime_improvement or runtime_improvement <= 0: + is_using_throughput_metric = True + benchmark_info = "" if self.benchmark_details: @@ -85,10 +124,18 @@ def to_console_string(self) -> str: console.print(table) benchmark_info = cast("StringIO", console.file).getvalue() + "\n" # Cast for mypy + if is_using_throughput_metric: + performance_description = ( + f"Throughput improved from {self.original_async_throughput} to {self.best_async_throughput} operations/second " + f"(runtime: {original_runtime_human} → {best_runtime_human})\n\n" + ) + else: + performance_description = f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n" + return ( f"Optimized {self.function_name} in {self.file_path}\n" f"{self.perf_improvement_line}\n" - f"Runtime went down from {original_runtime_human} to {best_runtime_human} \n\n" + + performance_description + (benchmark_info if benchmark_info else "") + self.raw_explanation_message + " \n\n" diff --git a/tests/scripts/end_to_end_test_async.py b/tests/scripts/end_to_end_test_async.py index f9ef1d806..5aed8f8ca 100644 --- a/tests/scripts/end_to_end_test_async.py +++ b/tests/scripts/end_to_end_test_async.py @@ -6,14 +6,14 @@ def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="workload.py", - expected_unit_tests=1, + file_path="main.py", + expected_unit_tests=0, min_improvement_x=0.1, coverage_expectations=[ CoverageExpectation( - function_name="process_data_list", + function_name="retry_with_backoff", expected_coverage=100.0, - expected_lines=[5, 7, 8, 9, 10, 12], + expected_lines=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], ) ], )