Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions codeflash/code_utils/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,63 @@ def create_rank_dictionary_compact(int_array: list[int]) -> dict[int, int]:
return {original_index: rank for rank, original_index in enumerate(sorted_indices)}


def choose_weights(**importance: float) -> list[float]:
"""Choose normalized weights from relative importance values.

Example:
choose_weights(runtime=3, diff=1)
-> [0.75, 0.25]

Args:
**importance: keyword args of metric=importance (relative numbers).

Returns:
A list of weights in the same order as the arguments.

"""
total = sum(importance.values())
if total == 0:
raise ValueError("At least one importance value must be > 0")

return [v / total for v in importance.values()]


def normalize(values: list[float]) -> list[float]:
mn, mx = min(values), max(values)
if mx == mn:
return [0.0] * len(values)
return [(v - mn) / (mx - mn) for v in values]


def create_score_dictionary_from_metrics(weights: list[float], *metrics: list[float]) -> dict[int, int]:
"""Combine multiple metrics into a single weighted score dictionary.

Each metric is a list of values (smaller = better).
The total score for each index is the weighted sum of its values
across all metrics:

score[index] = Σ (value * weight)

Args:
weights: A list of weights, one per metric. Larger weight = more influence.
*metrics: Lists of values (one list per metric, aligned by index).

Returns:
A dictionary mapping each index to its combined weighted score.

"""
if len(weights) != len(metrics):
raise ValueError("Number of weights must match number of metrics")

combined: dict[int, float] = {}

for weight, metric in zip(weights, metrics):
for idx, value in enumerate(metric):
combined[idx] = combined.get(idx, 0) + value * weight

return combined


@contextmanager
def custom_addopts() -> None:
pyproject_file = find_pyproject_toml()
Expand Down
18 changes: 12 additions & 6 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@
)
from codeflash.code_utils.code_utils import (
ImportErrorPattern,
choose_weights,
cleanup_paths,
create_rank_dictionary_compact,
create_score_dictionary_from_metrics,
diff_length,
file_name_from_test_module_name,
get_run_tmp_file,
has_any_async_functions,
module_name_from_file_path,
normalize,
restore_conftest,
)
from codeflash.code_utils.config_consts import (
Expand Down Expand Up @@ -683,11 +685,15 @@ def determine_best_candidate(
diff_length(new_best_opt.candidate.source_code.flat, code_context.read_writable_code.flat)
) # char level diff
runtimes_list.append(new_best_opt.runtime)
diff_lens_ranking = create_rank_dictionary_compact(diff_lens_list)
runtimes_ranking = create_rank_dictionary_compact(runtimes_list)
# TODO: better way to resolve conflicts with same min ranking
overall_ranking = {key: diff_lens_ranking[key] + runtimes_ranking[key] for key in diff_lens_ranking.keys()} # noqa: SIM118
min_key = min(overall_ranking, key=overall_ranking.get)

# runtime is more important than diff by a factor of 3
weights = choose_weights(runtime=3, diff=1)

runtime_norm = normalize(runtimes_list)
diffs_norm = normalize(diff_lens_list)
score_dict = create_score_dictionary_from_metrics(weights, runtime_norm, diffs_norm)

min_key = min(score_dict, key=score_dict.get)
best_optimization = valid_candidates_with_shorter_code[min_key]
# reassign code string which is the shortest
ai_service_client.log_results(
Expand Down
Loading