diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 2eedb9fae..7480252bd 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -1,5 +1,6 @@ from __future__ import annotations +import concurrent.futures import json import os import platform @@ -12,7 +13,6 @@ from codeflash.cli_cmds.console import console, logger from codeflash.code_utils.code_replacer import is_zero_diff from codeflash.code_utils.code_utils import unified_diff_strings -from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE from codeflash.code_utils.env_utils import get_codeflash_api_key from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.code_utils.time_utils import humanize_runtime @@ -92,7 +92,7 @@ def make_ai_service_request( return response def _get_valid_candidates( - self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource + self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource, model: str | None = None ) -> list[OptimizedCandidate]: candidates: list[OptimizedCandidate] = [] for opt in optimizations_json: @@ -106,6 +106,7 @@ def _get_valid_candidates( optimization_id=opt["optimization_id"], source=source, parent_id=opt.get("parent_id", None), + model=model, ) ) return candidates @@ -115,10 +116,11 @@ def optimize_python_code( # noqa: D417 source_code: str, dependency_code: str, trace_id: str, - num_candidates: int = 10, experiment_metadata: ExperimentMetadata | None = None, *, is_async: bool = False, + model: str | None = None, + call_sequence: int | None = None, ) -> list[OptimizedCandidate]: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -127,8 +129,9 @@ def optimize_python_code( # noqa: D417 - source_code (str): The python code to optimize. - dependency_code (str): The dependency code used as read-only context for the optimization - trace_id (str): Trace id of optimization run - - num_candidates (int): Number of optimization variants to generate. Default is 10. - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization + - model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default). + - call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None. Returns ------- @@ -141,7 +144,6 @@ def optimize_python_code( # noqa: D417 payload = { "source_code": source_code, "dependency_code": dependency_code, - "num_variants": num_candidates, "trace_id": trace_id, "python_version": platform.python_version(), "experiment_metadata": experiment_metadata, @@ -149,12 +151,12 @@ def optimize_python_code( # noqa: D417 "current_username": get_last_commit_author_if_pr_exists(None), "repo_owner": git_repo_owner, "repo_name": git_repo_name, - "n_candidates": N_CANDIDATES_EFFECTIVE, "is_async": is_async, + "model": model, + "call_sequence": call_sequence, } + logger.debug(f"Sending optimize request: model={model}, trace_id={trace_id}, call_sequence={call_sequence}") - logger.info("!lsp|Generating optimized candidates…") - console.rule() try: response = self.make_ai_service_request("/optimize", payload=payload, timeout=60) except requests.exceptions.RequestException as e: @@ -164,17 +166,16 @@ def optimize_python_code( # noqa: D417 if response.status_code == 200: optimizations_json = response.json()["optimizations"] - console.rule() end_time = time.perf_counter() logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.") - return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE) + logger.debug(f"Backend returned {len(optimizations_json)} optimization(s)") + return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE, model=model) try: error = response.json()["error"] except Exception: error = response.text logger.error(f"Error generating optimized candidates: {response.status_code} - {error}") ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) - console.rule() return [] def optimize_python_code_line_profiler( # noqa: D417 @@ -183,8 +184,9 @@ def optimize_python_code_line_profiler( # noqa: D417 dependency_code: str, trace_id: str, line_profiler_results: str, - num_candidates: int = 10, experiment_metadata: ExperimentMetadata | None = None, + model: str | None = None, + call_sequence: int | None = None, ) -> list[OptimizedCandidate]: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -193,8 +195,9 @@ def optimize_python_code_line_profiler( # noqa: D417 - source_code (str): The python code to optimize. - dependency_code (str): The dependency code used as read-only context for the optimization - trace_id (str): Trace id of optimization run - - num_candidates (int): Number of optimization variants to generate. Default is 10. - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization + - model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default). + - call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None. Returns ------- @@ -204,20 +207,18 @@ def optimize_python_code_line_profiler( # noqa: D417 payload = { "source_code": source_code, "dependency_code": dependency_code, - "num_variants": num_candidates, "line_profiler_results": line_profiler_results, "trace_id": trace_id, "python_version": platform.python_version(), "experiment_metadata": experiment_metadata, "codeflash_version": codeflash_version, "lsp_mode": is_LSP_enabled(), - "n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE, + "model": model, + "call_sequence": call_sequence, } - console.rule() if line_profiler_results == "": logger.info("No LineProfiler results were provided, Skipping optimization.") - console.rule() return [] try: response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60) @@ -228,20 +229,115 @@ def optimize_python_code_line_profiler( # noqa: D417 if response.status_code == 200: optimizations_json = response.json()["optimizations"] - logger.info( - f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information." - ) - console.rule() - return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP) + logger.debug(f"Backend returned {len(optimizations_json)} LP optimization(s)") + return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP, model=model) try: error = response.json()["error"] except Exception: error = response.text logger.error(f"Error generating optimized candidates: {response.status_code} - {error}") ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) - console.rule() return [] + def optimize_python_code_multi_model( + self, + source_code: str, + dependency_code: str, + base_trace_id: str, + model_distribution: list[tuple[str, int]], + executor: concurrent.futures.ThreadPoolExecutor, + experiment_metadata: ExperimentMetadata | None = None, + *, + is_async: bool = False, + sequence_offset: int = 0, + ) -> tuple[list[OptimizedCandidate], int]: + """Generate optimizations using multiple models in parallel.""" + logger.info("Generating optimized candidates…") + console.rule() + + futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = [] + + call_index = 0 + for model_name, num_calls in model_distribution: + for _ in range(num_calls): + call_trace_id = f"{base_trace_id[:-3]}0{call_index:02x}" + call_sequence = sequence_offset + call_index + 1 + call_index += 1 + future = executor.submit( + self.optimize_python_code, + source_code, + dependency_code, + call_trace_id, + experiment_metadata, + is_async=is_async, + model=model_name, + call_sequence=call_sequence, + ) + futures.append((future, model_name)) + + concurrent.futures.wait([f for f, _ in futures]) + + all_candidates: list[OptimizedCandidate] = [] + for future, model_name in futures: + try: + candidates = future.result() + all_candidates.extend(candidates) + except Exception as e: + logger.warning(f"Model {model_name} call failed: {e}") + continue + + console.rule() + return all_candidates, call_index + + def optimize_python_code_line_profiler_multi_model( + self, + source_code: str, + dependency_code: str, + base_trace_id: str, + line_profiler_results: str, + model_distribution: list[tuple[str, int]], + executor: concurrent.futures.ThreadPoolExecutor, + experiment_metadata: ExperimentMetadata | None = None, + sequence_offset: int = 0, + ) -> tuple[list[OptimizedCandidate], int]: + """Generate line profiler optimizations using multiple models in parallel.""" + logger.info("Generating optimized candidates with line profiler…") + console.rule() + + futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = [] + + call_index = 0 + for model_name, num_calls in model_distribution: + for _ in range(num_calls): + call_trace_id = f"{base_trace_id[:-3]}1{call_index:02x}" + call_sequence = sequence_offset + call_index + 1 + call_index += 1 + future = executor.submit( + self.optimize_python_code_line_profiler, + source_code, + dependency_code, + call_trace_id, + line_profiler_results, + experiment_metadata, + model_name, + call_sequence, + ) + futures.append((future, model_name)) + + concurrent.futures.wait([f for f, _ in futures]) + + all_candidates: list[OptimizedCandidate] = [] + for future, model_name in futures: + try: + candidates = future.result() + all_candidates.extend(candidates) + except Exception as e: + logger.warning(f"Line profiler model {model_name} call failed: {e}") + continue + + console.rule() + return all_candidates, call_index + def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -268,6 +364,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "trace_id": opt.trace_id, "function_references": opt.function_references, "python_version": platform.python_version(), + "call_sequence": opt.call_sequence, } for opt in request ] @@ -357,6 +454,7 @@ def get_new_explanation( # noqa: D417 throughput_improvement: str | None = None, function_references: str | None = None, codeflash_version: str = codeflash_version, + call_sequence: int | None = None, ) -> str: """Optimize the given python code for performance by making a request to the Django endpoint. @@ -402,6 +500,7 @@ def get_new_explanation( # noqa: D417 "throughput_improvement": throughput_improvement, "function_references": function_references, "codeflash_version": codeflash_version, + "call_sequence": call_sequence, } logger.info("loading|Generating explanation") console.rule() @@ -529,6 +628,7 @@ def generate_regression_tests( # noqa: D417 test_timeout: int, trace_id: str, test_index: int, + call_sequence: int | None = None, ) -> tuple[str, str, str] | None: """Generate regression tests for the given function by making a request to the Django endpoint. @@ -564,6 +664,7 @@ def generate_regression_tests( # noqa: D417 "python_version": platform.python_version(), "codeflash_version": codeflash_version, "is_async": function_to_optimize.is_async, + "call_sequence": call_sequence, } try: response = self.make_ai_service_request("/testgen", payload=payload, timeout=90) @@ -604,6 +705,7 @@ def get_optimization_review( replay_tests: str, concolic_tests: str, # noqa: ARG002 calling_fn_details: str, + call_sequence: int | None = None, ) -> str: """Compute the optimization review of current Pull Request. @@ -619,6 +721,7 @@ def get_optimization_review( root_dir: Path -> path of git directory concolic_tests: str -> concolic_tests (not used) calling_fn_details: str -> filenames and definitions of functions which call the function_to_optimize + call_sequence: int | None -> sequence number for multi-model calls Returns: ------- @@ -650,6 +753,7 @@ def get_optimization_review( "codeflash_version": codeflash_version, "calling_fn_details": calling_fn_details, "python_version": platform.python_version(), + "call_sequence": call_sequence, } console.rule() try: diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py index 88758455e..ba09989f8 100644 --- a/codeflash/code_utils/config_consts.py +++ b/codeflash/code_utils/config_consts.py @@ -32,6 +32,20 @@ MAX_N_CANDIDATES = 5 MAX_N_CANDIDATES_LP = 6 +# Multi-model diversity configuration +# Each tuple is (model_name, num_calls) where each call returns 1 candidate +# Standard mode: 3 GPT-4.1 + 2 Claude Sonnet = 5 candidates +MODEL_DISTRIBUTION: list[tuple[str, int]] = [("gpt-4.1", 3), ("claude-sonnet-4-5", 2)] + +# LSP mode: fewer candidates for faster response +MODEL_DISTRIBUTION_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)] + +# Line profiler mode: 6 candidates total +MODEL_DISTRIBUTION_LP: list[tuple[str, int]] = [("gpt-4.1", 4), ("claude-sonnet-4-5", 2)] + +# Line profiler LSP mode +MODEL_DISTRIBUTION_LP_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)] + try: from codeflash.lsp.helpers import is_LSP_enabled @@ -43,5 +57,7 @@ N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP) N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME +MODEL_DISTRIBUTION_EFFECTIVE = MODEL_DISTRIBUTION_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION +MODEL_DISTRIBUTION_LP_EFFECTIVE = MODEL_DISTRIBUTION_LP_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION_LP MAX_CONTEXT_LEN_REVIEW = 1000 diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 1db09bc12..822ecffab 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -46,6 +46,7 @@ class AIServiceRefinerRequest: original_line_profiler_results: str optimized_line_profiler_results: str function_references: str | None = None + call_sequence: int | None = None class TestDiffScope(str, Enum): @@ -464,6 +465,7 @@ class OptimizedCandidate: optimization_id: str source: OptimizedCandidateSource parent_id: str | None = None + model: str | None = None # Which LLM model generated this candidate @dataclass(frozen=True) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 416bdc8df..6228ee01a 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -46,8 +46,8 @@ COVERAGE_THRESHOLD, INDIVIDUAL_TESTCASE_TIMEOUT, MAX_REPAIRS_PER_TRACE, - N_CANDIDATES_EFFECTIVE, - N_CANDIDATES_LP_EFFECTIVE, + MODEL_DISTRIBUTION_EFFECTIVE, + MODEL_DISTRIBUTION_LP_EFFECTIVE, N_TESTS_TO_GENERATE_EFFECTIVE, REFINE_ALL_THRESHOLD, REFINED_CANDIDATE_RANKING_WEIGHTS, @@ -139,6 +139,7 @@ def __init__( ai_service_client: AiServiceClient, executor: concurrent.futures.ThreadPoolExecutor, future_all_code_repair: list[concurrent.futures.Future], + sequence_offset: int = 0, ) -> None: self.candidate_queue = queue.Queue() self.line_profiler_done = False @@ -146,6 +147,9 @@ def __init__( self.candidate_len = len(initial_candidates) self.ai_service_client = ai_service_client self.executor = executor + self.sequence_offset = sequence_offset + self.lp_calls_count = 0 + self.refinement_calls_count = 0 # Initialize queue with initial candidates for candidate in initial_candidates: @@ -155,6 +159,9 @@ def __init__( self.all_refinements_data = all_refinements_data self.future_all_code_repair = future_all_code_repair + def get_total_llm_calls(self) -> int: + return self.sequence_offset + self.lp_calls_count + self.refinement_calls_count + def get_next_candidate(self) -> OptimizedCandidate | None: """Get the next candidate from the queue, handling async results as needed.""" try: @@ -176,7 +183,11 @@ def _process_line_profiler_results(self) -> OptimizedCandidate | None: """Process line profiler results and add to queue.""" logger.debug("all candidates processed, await candidates from line profiler") concurrent.futures.wait([self.future_line_profile_results]) - line_profile_results = self.future_line_profile_results.result() + result = self.future_line_profile_results.result() + + # LP multi-model now returns (candidates, lp_call_count) + line_profile_results, lp_call_count = result + self.lp_calls_count = lp_call_count for candidate in line_profile_results: self.candidate_queue.put(candidate) @@ -192,11 +203,18 @@ def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concur def _process_refinement_results(self) -> OptimizedCandidate | None: """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined.""" + import dataclasses + future_refinements: list[concurrent.futures.Future] = [] + # Calculate base sequence: offset + lp_calls (refinements come after LP) + base_sequence = self.sequence_offset + self.lp_calls_count + refinement_call_index = 0 if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD: for data in self.all_refinements_data: - future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401 + refinement_call_index += 1 + data_with_seq = dataclasses.replace(data, call_sequence=base_sequence + refinement_call_index) + future_refinements.append(self.refine_optimizations([data_with_seq])) else: diff_lens_list = [] runtimes_list = [] @@ -215,8 +233,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None: top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates] for idx in top_indecies: + refinement_call_index += 1 data = self.all_refinements_data[idx] - future_refinements.append(self.refine_optimizations([data])) + data_with_seq = dataclasses.replace(data, call_sequence=base_sequence + refinement_call_index) + future_refinements.append(self.refine_optimizations([data_with_seq])) + + # Track total refinement calls made + self.refinement_calls_count = refinement_call_index if future_refinements: logger.info("loading|Refining generated code for improved quality and performance...") @@ -319,10 +342,14 @@ def __init__( self.optimization_review = "" self.future_all_code_repair: list[concurrent.futures.Future] = [] self.repair_counter = 0 # track how many repairs we did for each function + self.test_gen_calls_count = 0 + self.optimize_calls_count = 0 + self.lp_calls_count = 0 + self.total_llm_calls = 0 def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: should_run_experiment = self.experiment_id is not None - logger.debug(f"Function Trace ID: {self.function_trace_id}") + logger.info(f"Function Trace ID: {self.function_trace_id}") ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id}) self.cleanup_leftover_test_return_values() file_name_from_test_module_name.cache_clear() @@ -922,17 +949,19 @@ def determine_best_candidate( assert ai_service_client is not None, "AI service client must be set for optimization" future_line_profile_results = self.executor.submit( - ai_service_client.optimize_python_code_line_profiler, + ai_service_client.optimize_python_code_line_profiler_multi_model, source_code=code_context.read_writable_code.markdown, dependency_code=code_context.read_only_context_code, - trace_id=self.get_trace_id(exp_type), + base_trace_id=self.get_trace_id(exp_type), line_profiler_results=original_code_baseline.line_profile_results["str_out"], - num_candidates=N_CANDIDATES_LP_EFFECTIVE, + model_distribution=MODEL_DISTRIBUTION_LP_EFFECTIVE, + executor=self.executor, experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) if self.experiment_id else None, + sequence_offset=self.optimize_calls_count, ) processor = CandidateProcessor( @@ -942,6 +971,7 @@ def determine_best_candidate( self.aiservice_client, self.executor, self.future_all_code_repair, + sequence_offset=self.optimize_calls_count, ) candidate_index = 0 @@ -975,6 +1005,9 @@ def determine_best_candidate( self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path ) + # Track total LLM calls from the processor for sequence numbering + self.total_llm_calls = processor.get_total_llm_calls() + # Select and return the best optimization best_optimization = self.select_best_optimization( eval_ctx=eval_ctx, @@ -1353,17 +1386,17 @@ def generate_optimizations( read_only_context_code: str, run_experiment: bool = False, # noqa: FBT001, FBT002 ) -> Result[tuple[OptimizationSet, str], str]: - """Generate optimization candidates for the function.""" - n_candidates = N_CANDIDATES_EFFECTIVE - + """Generate optimization candidates for the function using multiple models in parallel.""" future_optimization_candidates = self.executor.submit( - self.aiservice_client.optimize_python_code, + self.aiservice_client.optimize_python_code_multi_model, read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id, - n_candidates, + MODEL_DISTRIBUTION_EFFECTIVE, + self.executor, ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, is_async=self.function_to_optimize.is_async, + sequence_offset=N_TESTS_TO_GENERATE_EFFECTIVE, ) future_references = self.executor.submit( @@ -1380,27 +1413,34 @@ def generate_optimizations( if run_experiment: future_candidates_exp = self.executor.submit( - self.local_aiservice_client.optimize_python_code, + self.local_aiservice_client.optimize_python_code_multi_model, read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP1", - n_candidates, + MODEL_DISTRIBUTION_EFFECTIVE, + self.executor, ExperimentMetadata(id=self.experiment_id, group="experiment"), is_async=self.function_to_optimize.is_async, + sequence_offset=N_TESTS_TO_GENERATE_EFFECTIVE, ) futures.append(future_candidates_exp) # Wait for optimization futures to complete concurrent.futures.wait(futures) - # Retrieve results - candidates: list[OptimizedCandidate] = future_optimization_candidates.result() - logger.info(f"!lsp|Generated '{len(candidates)}' candidate optimizations.") + # Retrieve results - optimize_python_code_multi_model returns (candidates, call_count) + candidates, optimize_call_count = future_optimization_candidates.result() + # Total sequence count = test gen calls + optimization calls (LP will continue from here) + self.optimize_calls_count = N_TESTS_TO_GENERATE_EFFECTIVE + optimize_call_count + logger.info(f"!lsp|Completed {optimize_call_count} optimization calls, got {len(candidates)} candidates.") if not candidates: return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}") - candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None + # Handle experiment results - also returns (candidates, call_count) tuple + candidates_experiment = None + if future_candidates_exp: + candidates_experiment, _ = future_candidates_exp.result() function_references = future_references.result() return Success((OptimizationSet(control=candidates, experiment=candidates_experiment), function_references)) @@ -1647,6 +1687,10 @@ def process_review( ) throughput_improvement_str = f"{throughput_improvement_value * 100:.1f}%" + # Explanation call continues the sequence numbering + explanation_call_sequence = self.total_llm_calls + 1 + self.total_llm_calls = explanation_call_sequence + new_explanation_raw_str = self.aiservice_client.get_new_explanation( source_code=code_context.read_writable_code.flat, dependency_code=code_context.read_only_context_code, @@ -1664,6 +1708,7 @@ def process_review( optimized_throughput=optimized_throughput_str, throughput_improvement=throughput_improvement_str, function_references=function_references, + call_sequence=explanation_call_sequence, ) new_explanation = Explanation( raw_explanation_message=new_explanation_raw_str or explanation.raw_explanation_message, @@ -1699,9 +1744,13 @@ def process_review( staging_review = self.args.staging_review opt_review_response = "" # this will now run regardless of pr, staging review flags + # Optimization review call continues the sequence numbering + review_call_sequence = self.total_llm_calls + 1 + self.total_llm_calls = review_call_sequence + try: opt_review_response = self.aiservice_client.get_optimization_review( - **data, calling_fn_details=function_references + **data, calling_fn_details=function_references, call_sequence=review_call_sequence ) except Exception as e: logger.debug(f"optimization review response failed, investigate {e}") @@ -2192,6 +2241,9 @@ def submit_test_generation_tasks( generated_test_paths: list[Path], generated_perf_test_paths: list[Path], ) -> list[concurrent.futures.Future]: + # Track how many test generation calls we're making for sequence numbering + self.test_gen_calls_count = len(generated_test_paths) + return [ executor.submit( generate_tests, @@ -2206,6 +2258,7 @@ def submit_test_generation_tasks( test_index, test_path, test_perf_path, + call_sequence=test_index + 1, ) for test_index, (test_path, test_perf_path) in enumerate( zip(generated_test_paths, generated_perf_test_paths) diff --git a/codeflash/verification/verifier.py b/codeflash/verification/verifier.py index 8d187f2b1..d94455df3 100644 --- a/codeflash/verification/verifier.py +++ b/codeflash/verification/verifier.py @@ -27,6 +27,7 @@ def generate_tests( test_index: int, test_path: Path, test_perf_path: Path, + call_sequence: int | None = None, ) -> tuple[str, str, Path] | None: # TODO: Sometimes this recreates the original Class definition. This overrides and messes up the original # class import. Remove the recreation of the class definition @@ -42,6 +43,7 @@ def generate_tests( test_timeout=test_timeout, trace_id=function_trace_id, test_index=test_index, + call_sequence=call_sequence, ) if response and isinstance(response, tuple) and len(response) == 3: generated_test_source, instrumented_behavior_test_source, instrumented_perf_test_source = response