From b2055164e6930b3a107eed9066773a1bd27605b3 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 4 Sep 2025 19:05:56 -0700 Subject: [PATCH 1/5] ranker wip --- codeflash/api/aiservice.py | 53 ++++++++++++++++++++ codeflash/optimization/function_optimizer.py | 8 +++ 2 files changed, 61 insertions(+) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index f337a985..fc541393 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -353,6 +353,59 @@ def get_new_explanation( # noqa: D417 console.rule() return "" + def generate_ranking( # noqa: D417 + self, trace_id: str, diffs: list[str], optimization_ids: list[str], speedups: list[int] + ) -> list[int] | None: + """Optimize the given python code for performance by making a request to the Django endpoint. + + Parameters + ---------- + - source_code (str): The python code to optimize. + - optimized_code (str): The python code generated by the AI service. + - dependency_code (str): The dependency code used as read-only context for the optimization + - original_line_profiler_results: str - line profiler results for the baseline code + - optimized_line_profiler_results: str - line profiler results for the optimized code + - original_code_runtime: str - runtime for the baseline code + - optimized_code_runtime: str - runtime for the optimized code + - speedup: str - speedup of the optimized code + - annotated_tests: str - test functions annotated with runtime + - optimization_id: str - unique id of opt candidate + - original_explanation: str - original_explanation generated for the opt candidate + + Returns + ------- + - List[OptimizationCandidate]: A list of Optimization Candidates. + + """ + payload = { + "trace_id": trace_id, + "diffs": diffs, + "speedups": speedups, + "optimization_ids": optimization_ids, + "python_version": platform.python_version(), + } + logger.info("Generating ranking") + console.rule() + try: + response = self.make_ai_service_request("/ranker", payload=payload, timeout=60) + except requests.exceptions.RequestException as e: + logger.exception(f"Error generating ranking: {e}") + ph("cli-optimize-error-caught", {"error": str(e)}) + return None + + if response.status_code == 200: + ranking: list[int] = response.json()["ranking"] + console.rule() + return ranking + try: + error = response.json()["error"] + except Exception: + error = response.text + logger.error(f"Error generating ranking: {response.status_code} - {error}") + ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error}) + console.rule() + return None + def log_results( # noqa: D417 self, function_trace_id: str, diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index c523dcbc..d5798835 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -656,6 +656,14 @@ def determine_best_candidate( if not valid_optimizations: return None # need to figure out the best candidate here before we return best_optimization + ranking = self.executor.submit( + ai_service_client.generate_ranking, + diffs=[], + optimization_ids=[], + speedups=[], + trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, + ) + print(ranking) # reassign the shorter code here valid_candidates_with_shorter_code = [] diff_lens_list = [] # character level diff From 8c361801d07eddc02a54fd30fd25c418397483af Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 4 Sep 2025 19:18:16 -0700 Subject: [PATCH 2/5] todo logging message, db logging --- codeflash/code_utils/code_utils.py | 17 ++++++++ codeflash/optimization/function_optimizer.py | 43 ++++++++++++++------ 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index dfd79a76..4ff01004 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -20,6 +20,23 @@ ImportErrorPattern = re.compile(r"ModuleNotFoundError.*$", re.MULTILINE) +def unified_diff_strings(code1: str, code2: str, fromfile: str = "original", tofile: str = "modified") -> str: + """Return the unified diff between two code strings as a single string. + + :param code1: First code string (original). + :param code2: Second code string (modified). + :param fromfile: Label for the first code string. + :param tofile: Label for the second code string. + :return: Unified diff as a string. + """ + code1_lines = code1.splitlines(keepends=True) + code2_lines = code2.splitlines(keepends=True) + + diff = difflib.unified_diff(code1_lines, code2_lines, fromfile=fromfile, tofile=tofile, lineterm="") + + return "".join(diff) + + def diff_length(a: str, b: str) -> int: """Compute the length (in characters) of the unified diff between two strings. diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index d5798835..a0d073ef 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -39,6 +39,7 @@ has_any_async_functions, module_name_from_file_path, restore_conftest, + unified_diff_strings, ) from codeflash.code_utils.config_consts import ( INDIVIDUAL_TESTCASE_TIMEOUT, @@ -656,17 +657,12 @@ def determine_best_candidate( if not valid_optimizations: return None # need to figure out the best candidate here before we return best_optimization - ranking = self.executor.submit( - ai_service_client.generate_ranking, - diffs=[], - optimization_ids=[], - speedups=[], - trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, - ) - print(ranking) # reassign the shorter code here valid_candidates_with_shorter_code = [] diff_lens_list = [] # character level diff + speedups_list = [] + optimization_ids = [] + diff_strs = [] runtimes_list = [] for valid_opt in valid_optimizations: valid_opt_normalized_code = ast.unparse(ast.parse(valid_opt.candidate.source_code.flat.strip())) @@ -690,12 +686,33 @@ def determine_best_candidate( diff_lens_list.append( diff_length(new_best_opt.candidate.source_code.flat, code_context.read_writable_code.flat) ) # char level diff + diff_strs.append( + unified_diff_strings(code_context.read_writable_code.flat, new_best_opt.candidate.source_code.flat) + ) + speedups_list.append( + 1 + + performance_gain( + original_runtime_ns=original_code_baseline.runtime, optimized_runtime_ns=new_best_opt.runtime + ) + ) + optimization_ids.append(new_best_opt.candidate.optimization_id) runtimes_list.append(new_best_opt.runtime) - diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) - runtimes_ranking = create_rank_dictionary_compact(runtimes_list) - # TODO: better way to resolve conflicts with same min ranking - overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 - min_key = min(overall_ranking, key=overall_ranking.get) + ranking = self.executor.submit( + ai_service_client.generate_ranking, + diffs=diff_strs, + optimization_ids=optimization_ids, + speedups=speedups_list, + trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, + ) + ranking = [x - 1 for x in ranking] + if ranking: + min_key = ranking[0] + else: + diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) + runtimes_ranking = create_rank_dictionary_compact(runtimes_list) + # TODO: better way to resolve conflicts with same min ranking + overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 + min_key = min(overall_ranking, key=overall_ranking.get) best_optimization = valid_candidates_with_shorter_code[min_key] # reassign code string which is the shortest ai_service_client.log_results( From 9238fe5aa6625e8672195a73f7475e92c8ba8a8d Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 4 Sep 2025 19:29:02 -0700 Subject: [PATCH 3/5] int to float --- codeflash/api/aiservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index fc541393..d2bc2021 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -354,7 +354,7 @@ def get_new_explanation( # noqa: D417 return "" def generate_ranking( # noqa: D417 - self, trace_id: str, diffs: list[str], optimization_ids: list[str], speedups: list[int] + self, trace_id: str, diffs: list[str], optimization_ids: list[str], speedups: list[float] ) -> list[int] | None: """Optimize the given python code for performance by making a request to the Django endpoint. From bf066a9b4958a1a5ada2e5c7bc6265149061c570 Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Thu, 4 Sep 2025 19:36:08 -0700 Subject: [PATCH 4/5] minor bugfix --- codeflash/optimization/function_optimizer.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index a0d073ef..4e624bbc 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -171,9 +171,10 @@ def _process_refinement_results(self) -> OptimizedCandidate | None: self.candidate_queue.put(candidate) self.candidate_len += len(refinement_response) - logger.info( - f"Added {len(refinement_response)} candidates from refinement, total candidates now: {self.candidate_len}" - ) + if len(refinement_response) > 0: + logger.info( + f"Added {len(refinement_response)} candidates from refinement, total candidates now: {self.candidate_len}" + ) self.refinement_done = True return self.get_next_candidate() @@ -697,15 +698,17 @@ def determine_best_candidate( ) optimization_ids.append(new_best_opt.candidate.optimization_id) runtimes_list.append(new_best_opt.runtime) - ranking = self.executor.submit( + future_ranking = self.executor.submit( ai_service_client.generate_ranking, diffs=diff_strs, optimization_ids=optimization_ids, speedups=speedups_list, trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id, ) - ranking = [x - 1 for x in ranking] + concurrent.futures.wait([future_ranking]) + ranking = future_ranking.result() if ranking: + ranking = [x - 1 for x in ranking] min_key = ranking[0] else: diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) From dec3c1a8aecf92129b1812ddb3e4fd89c47a889e Mon Sep 17 00:00:00 2001 From: aseembits93 Date: Fri, 5 Sep 2025 17:14:24 -0700 Subject: [PATCH 5/5] bugfix --- codeflash/api/aiservice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index d2bc2021..00585f9c 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -387,7 +387,7 @@ def generate_ranking( # noqa: D417 logger.info("Generating ranking") console.rule() try: - response = self.make_ai_service_request("/ranker", payload=payload, timeout=60) + response = self.make_ai_service_request("/rank", payload=payload, timeout=60) except requests.exceptions.RequestException as e: logger.exception(f"Error generating ranking: {e}") ph("cli-optimize-error-caught", {"error": str(e)})