codeflash-ai · mohammedahmed18 · Aug 24, 2025
diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py
@@ -64,6 +64,63 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]:
     return {original_index: rank for rank, original_index in enumerate(sorted_indices)}
 
 
+def choose_weights(**importance: float) -> list[float]:
+    """Choose normalized weights from relative importance values.
+
+    Example:
+        choose_weights(runtime=3, diff=1)
+        -> [0.75, 0.25]
+
+    Args:
+        **importance: keyword args of metric=importance (relative numbers).
+
+    Returns:
+        A list of weights in the same order as the arguments.
+
+    """
+    total = sum(importance.values())
+    if total == 0:
+        raise ValueError("At least one importance value must be > 0")
+
+    return [v / total for v in importance.values()]
+
+
+def normalize(values: list[float]) -> list[float]:
+    mn, mx = min(values), max(values)
+    if mx == mn:
+        return [0.0] * len(values)
+    return [(v - mn) / (mx - mn) for v in values]
+
+
+def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]:
+    """Combine multiple metrics into a single weighted score dictionary.
+
+    Each metric is a list of values (smaller = better).
+    The total score for each index is the weighted sum of its values
+    across all metrics:
+
+        score[index] = Σ (value * weight)
+
+    Args:
+        weights: A list of weights, one per metric. Larger weight = more influence.
+        *metrics: Lists of values (one list per metric, aligned by index).
+
+    Returns:
+        A dictionary mapping each index to its combined weighted score.
+
+    """
+    if len(weights) != len(metrics):
+        raise ValueError("Number of weights must match number of metrics")
+
+    combined: dict[int, float] = {}
+
+    for weight, metric in zip(weights, metrics):
+        for idx, value in enumerate(metric):
+            combined[idx] = combined.get(idx, 0) + value * weight
+
+    return combined
+
+
 @contextmanager
 def custom_addopts() -> None:
     pyproject_file = find_pyproject_toml()

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -31,13 +31,15 @@
 )
 from codeflash.code_utils.code_utils import (
     ImportErrorPattern,
+    choose_weights,
     cleanup_paths,
-    create_rank_dictionary_compact,
+    create_score_dictionary_from_metrics,
     diff_length,
     file_name_from_test_module_name,
     get_run_tmp_file,
     has_any_async_functions,
     module_name_from_file_path,
+    normalize,
     restore_conftest,
 )
 from codeflash.code_utils.config_consts import (
@@ -683,11 +685,15 @@ def determine_best_candidate(
                 diff_length(new_best_opt.candidate.source_code.flat, code_context.read_writable_code.flat)
             )  # char level diff
             runtimes_list.append(new_best_opt.runtime)
-        diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list)
-        runtimes_ranking = create_rank_dictionary_compact(runtimes_list)
-        # TODO: better way to resolve conflicts with same min ranking
-        overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()}  # noqa: SIM118
-        min_key = min(overall_ranking, key=overall_ranking.get)
+
+        # runtime is more important than diff by a factor of 3
+        weights = choose_weights(runtime=3, diff=1)
+
+        runtime_norm = normalize(runtimes_list)
+        diffs_norm = normalize(diff_lens_list)
+        score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)
+
+        min_key = min(score_dict, key=score_dict.get)
         best_optimization = valid_candidates_with_shorter_code[min_key]
         # reassign code string which is the shortest
         ai_service_client.log_results(