diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index dfd79a76..24965d67 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -64,6 +64,63 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]: return {original_index: rank for rank, original_index in enumerate(sorted_indices)} +def choose_weights(**importance: float) -> list[float]: + """Choose normalized weights from relative importance values. + + Example: + choose_weights(runtime=3, diff=1) + -> [0.75, 0.25] + + Args: + **importance: keyword args of metric=importance (relative numbers). + + Returns: + A list of weights in the same order as the arguments. + + """ + total = sum(importance.values()) + if total == 0: + raise ValueError("At least one importance value must be > 0") + + return [v / total for v in importance.values()] + + +def normalize(values: list[float]) -> list[float]: + mn, mx = min(values), max(values) + if mx == mn: + return [0.0] * len(values) + return [(v - mn) / (mx - mn) for v in values] + + +def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]: + """Combine multiple metrics into a single weighted score dictionary. + + Each metric is a list of values (smaller = better). + The total score for each index is the weighted sum of its values + across all metrics: + + score[index] = Σ (value * weight) + + Args: + weights: A list of weights, one per metric. Larger weight = more influence. + *metrics: Lists of values (one list per metric, aligned by index). + + Returns: + A dictionary mapping each index to its combined weighted score. + + """ + if len(weights) != len(metrics): + raise ValueError("Number of weights must match number of metrics") + + combined: dict[int, float] = {} + + for weight, metric in zip(weights, metrics): + for idx, value in enumerate(metric): + combined[idx] = combined.get(idx, 0) + value * weight + + return combined + + @contextmanager def custom_addopts() -> None: pyproject_file = find_pyproject_toml() diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index c523dcbc..81c76b59 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -31,13 +31,15 @@ ) from codeflash.code_utils.code_utils import ( ImportErrorPattern, + choose_weights, cleanup_paths, - create_rank_dictionary_compact, + create_score_dictionary_from_metrics, diff_length, file_name_from_test_module_name, get_run_tmp_file, has_any_async_functions, module_name_from_file_path, + normalize, restore_conftest, ) from codeflash.code_utils.config_consts import ( @@ -683,11 +685,15 @@ def determine_best_candidate( diff_length(new_best_opt.candidate.source_code.flat, code_context.read_writable_code.flat) ) # char level diff runtimes_list.append(new_best_opt.runtime) - diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list) - runtimes_ranking = create_rank_dictionary_compact(runtimes_list) - # TODO: better way to resolve conflicts with same min ranking - overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118 - min_key = min(overall_ranking, key=overall_ranking.get) + + # runtime is more important than diff by a factor of 3 + weights = choose_weights(runtime=3, diff=1) + + runtime_norm = normalize(runtimes_list) + diffs_norm = normalize(diff_lens_list) + score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm) + + min_key = min(score_dict, key=score_dict.get) best_optimization = valid_candidates_with_shorter_code[min_key] # reassign code string which is the shortest ai_service_client.log_results(