Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 127 additions & 23 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import concurrent.futures
import json
import os
import platform
Expand All @@ -12,7 +13,6 @@
from codeflash.cli_cmds.console import console, logger
from codeflash.code_utils.code_replacer import is_zero_diff
from codeflash.code_utils.code_utils import unified_diff_strings
from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE
from codeflash.code_utils.env_utils import get_codeflash_api_key
from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
from codeflash.code_utils.time_utils import humanize_runtime
Expand Down Expand Up @@ -92,7 +92,7 @@ def make_ai_service_request(
return response

def _get_valid_candidates(
self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource
self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource, model: str | None = None
) -> list[OptimizedCandidate]:
candidates: list[OptimizedCandidate] = []
for opt in optimizations_json:
Expand All @@ -106,6 +106,7 @@ def _get_valid_candidates(
optimization_id=opt["optimization_id"],
source=source,
parent_id=opt.get("parent_id", None),
model=model,
)
)
return candidates
Expand All @@ -115,10 +116,11 @@ def optimize_python_code( # noqa: D417
source_code: str,
dependency_code: str,
trace_id: str,
num_candidates: int = 10,
experiment_metadata: ExperimentMetadata | None = None,
*,
is_async: bool = False,
model: str | None = None,
call_sequence: int | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -127,8 +129,9 @@ def optimize_python_code( # noqa: D417
- source_code (str): The python code to optimize.
- dependency_code (str): The dependency code used as read-only context for the optimization
- trace_id (str): Trace id of optimization run
- num_candidates (int): Number of optimization variants to generate. Default is 10.
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
- call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.

Returns
-------
Expand All @@ -141,20 +144,19 @@ def optimize_python_code( # noqa: D417
payload = {
"source_code": source_code,
"dependency_code": dependency_code,
"num_variants": num_candidates,
"trace_id": trace_id,
"python_version": platform.python_version(),
"experiment_metadata": experiment_metadata,
"codeflash_version": codeflash_version,
"current_username": get_last_commit_author_if_pr_exists(None),
"repo_owner": git_repo_owner,
"repo_name": git_repo_name,
"n_candidates": N_CANDIDATES_EFFECTIVE,
"is_async": is_async,
"model": model,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would recommend we do model selection from our backend. that way we can keep switching models etc more easily.

"call_sequence": call_sequence,
}
logger.debug(f"Sending optimize request: model={model}, trace_id={trace_id}, call_sequence={call_sequence}")

logger.info("!lsp|Generating optimized candidates…")
console.rule()
try:
response = self.make_ai_service_request("/optimize", payload=payload, timeout=60)
except requests.exceptions.RequestException as e:
Expand All @@ -164,17 +166,16 @@ def optimize_python_code( # noqa: D417

if response.status_code == 200:
optimizations_json = response.json()["optimizations"]
console.rule()
end_time = time.perf_counter()
logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE)
logger.debug(f"Backend returned {len(optimizations_json)} optimization(s)")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE, model=model)
try:
error = response.json()["error"]
except Exception:
error = response.text
logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
console.rule()
return []

def optimize_python_code_line_profiler( # noqa: D417
Expand All @@ -183,8 +184,9 @@ def optimize_python_code_line_profiler( # noqa: D417
dependency_code: str,
trace_id: str,
line_profiler_results: str,
num_candidates: int = 10,
experiment_metadata: ExperimentMetadata | None = None,
model: str | None = None,
call_sequence: int | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -193,8 +195,9 @@ def optimize_python_code_line_profiler( # noqa: D417
- source_code (str): The python code to optimize.
- dependency_code (str): The dependency code used as read-only context for the optimization
- trace_id (str): Trace id of optimization run
- num_candidates (int): Number of optimization variants to generate. Default is 10.
- experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
- model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
- call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.

Returns
-------
Expand All @@ -204,20 +207,18 @@ def optimize_python_code_line_profiler( # noqa: D417
payload = {
"source_code": source_code,
"dependency_code": dependency_code,
"num_variants": num_candidates,
"line_profiler_results": line_profiler_results,
"trace_id": trace_id,
"python_version": platform.python_version(),
"experiment_metadata": experiment_metadata,
"codeflash_version": codeflash_version,
"lsp_mode": is_LSP_enabled(),
"n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE,
"model": model,
"call_sequence": call_sequence,
}

console.rule()
if line_profiler_results == "":
logger.info("No LineProfiler results were provided, Skipping optimization.")
console.rule()
return []
try:
response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60)
Expand All @@ -228,20 +229,115 @@ def optimize_python_code_line_profiler( # noqa: D417

if response.status_code == 200:
optimizations_json = response.json()["optimizations"]
logger.info(
f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information."
)
console.rule()
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP)
logger.debug(f"Backend returned {len(optimizations_json)} LP optimization(s)")
return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP, model=model)
try:
error = response.json()["error"]
except Exception:
error = response.text
logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
console.rule()
return []

def optimize_python_code_multi_model(
self,
source_code: str,
dependency_code: str,
base_trace_id: str,
model_distribution: list[tuple[str, int]],
executor: concurrent.futures.ThreadPoolExecutor,
experiment_metadata: ExperimentMetadata | None = None,
*,
is_async: bool = False,
sequence_offset: int = 0,
) -> tuple[list[OptimizedCandidate], int]:
"""Generate optimizations using multiple models in parallel."""
logger.info("Generating optimized candidates…")
console.rule()

futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []

call_index = 0
for model_name, num_calls in model_distribution:
for _ in range(num_calls):
call_trace_id = f"{base_trace_id[:-3]}0{call_index:02x}"
call_sequence = sequence_offset + call_index + 1
call_index += 1
future = executor.submit(
self.optimize_python_code,
source_code,
dependency_code,
call_trace_id,
experiment_metadata,
is_async=is_async,
model=model_name,
call_sequence=call_sequence,
)
futures.append((future, model_name))

concurrent.futures.wait([f for f, _ in futures])

all_candidates: list[OptimizedCandidate] = []
for future, model_name in futures:
try:
candidates = future.result()
all_candidates.extend(candidates)
except Exception as e:
logger.warning(f"Model {model_name} call failed: {e}")
continue

console.rule()
return all_candidates, call_index

def optimize_python_code_line_profiler_multi_model(
self,
source_code: str,
dependency_code: str,
base_trace_id: str,
line_profiler_results: str,
model_distribution: list[tuple[str, int]],
executor: concurrent.futures.ThreadPoolExecutor,
experiment_metadata: ExperimentMetadata | None = None,
sequence_offset: int = 0,
) -> tuple[list[OptimizedCandidate], int]:
"""Generate line profiler optimizations using multiple models in parallel."""
logger.info("Generating optimized candidates with line profiler…")
console.rule()

futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []

call_index = 0
for model_name, num_calls in model_distribution:
for _ in range(num_calls):
call_trace_id = f"{base_trace_id[:-3]}1{call_index:02x}"
call_sequence = sequence_offset + call_index + 1
call_index += 1
future = executor.submit(
self.optimize_python_code_line_profiler,
source_code,
dependency_code,
call_trace_id,
line_profiler_results,
experiment_metadata,
model_name,
call_sequence,
)
futures.append((future, model_name))

concurrent.futures.wait([f for f, _ in futures])

all_candidates: list[OptimizedCandidate] = []
for future, model_name in futures:
try:
candidates = future.result()
all_candidates.extend(candidates)
except Exception as e:
logger.warning(f"Line profiler model {model_name} call failed: {e}")
continue

console.rule()
return all_candidates, call_index

def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand All @@ -268,6 +364,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
"trace_id": opt.trace_id,
"function_references": opt.function_references,
"python_version": platform.python_version(),
"call_sequence": opt.call_sequence,
}
for opt in request
]
Expand Down Expand Up @@ -357,6 +454,7 @@ def get_new_explanation( # noqa: D417
throughput_improvement: str | None = None,
function_references: str | None = None,
codeflash_version: str = codeflash_version,
call_sequence: int | None = None,
) -> str:
"""Optimize the given python code for performance by making a request to the Django endpoint.

Expand Down Expand Up @@ -402,6 +500,7 @@ def get_new_explanation( # noqa: D417
"throughput_improvement": throughput_improvement,
"function_references": function_references,
"codeflash_version": codeflash_version,
"call_sequence": call_sequence,
}
logger.info("loading|Generating explanation")
console.rule()
Expand Down Expand Up @@ -529,6 +628,7 @@ def generate_regression_tests( # noqa: D417
test_timeout: int,
trace_id: str,
test_index: int,
call_sequence: int | None = None,
) -> tuple[str, str, str] | None:
"""Generate regression tests for the given function by making a request to the Django endpoint.

Expand Down Expand Up @@ -564,6 +664,7 @@ def generate_regression_tests( # noqa: D417
"python_version": platform.python_version(),
"codeflash_version": codeflash_version,
"is_async": function_to_optimize.is_async,
"call_sequence": call_sequence,
}
try:
response = self.make_ai_service_request("/testgen", payload=payload, timeout=90)
Expand Down Expand Up @@ -604,6 +705,7 @@ def get_optimization_review(
replay_tests: str,
concolic_tests: str, # noqa: ARG002
calling_fn_details: str,
call_sequence: int | None = None,
) -> str:
"""Compute the optimization review of current Pull Request.

Expand All @@ -619,6 +721,7 @@ def get_optimization_review(
root_dir: Path -> path of git directory
concolic_tests: str -> concolic_tests (not used)
calling_fn_details: str -> filenames and definitions of functions which call the function_to_optimize
call_sequence: int | None -> sequence number for multi-model calls

Returns:
-------
Expand Down Expand Up @@ -650,6 +753,7 @@ def get_optimization_review(
"codeflash_version": codeflash_version,
"calling_fn_details": calling_fn_details,
"python_version": platform.python_version(),
"call_sequence": call_sequence,
}
console.rule()
try:
Expand Down
16 changes: 16 additions & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@
MAX_N_CANDIDATES = 5
MAX_N_CANDIDATES_LP = 6

# Multi-model diversity configuration
# Each tuple is (model_name, num_calls) where each call returns 1 candidate
# Standard mode: 3 GPT-4.1 + 2 Claude Sonnet = 5 candidates
MODEL_DISTRIBUTION: list[tuple[str, int]] = [("gpt-4.1", 3), ("claude-sonnet-4-5", 2)]

# LSP mode: fewer candidates for faster response
MODEL_DISTRIBUTION_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]

# Line profiler mode: 6 candidates total
MODEL_DISTRIBUTION_LP: list[tuple[str, int]] = [("gpt-4.1", 4), ("claude-sonnet-4-5", 2)]

# Line profiler LSP mode
MODEL_DISTRIBUTION_LP_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]

try:
from codeflash.lsp.helpers import is_LSP_enabled

Expand All @@ -43,5 +57,7 @@
N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME
MODEL_DISTRIBUTION_EFFECTIVE = MODEL_DISTRIBUTION_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION
MODEL_DISTRIBUTION_LP_EFFECTIVE = MODEL_DISTRIBUTION_LP_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION_LP

MAX_CONTEXT_LEN_REVIEW = 1000
2 changes: 2 additions & 0 deletions codeflash/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class AIServiceRefinerRequest:
original_line_profiler_results: str
optimized_line_profiler_results: str
function_references: str | None = None
call_sequence: int | None = None


class TestDiffScope(str, Enum):
Expand Down Expand Up @@ -464,6 +465,7 @@ class OptimizedCandidate:
optimization_id: str
source: OptimizedCandidateSource
parent_id: str | None = None
model: str | None = None # Which LLM model generated this candidate


@dataclass(frozen=True)
Expand Down
Loading
Loading