diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
index 2eedb9fae..7480252bd 100644
--- a/codeflash/api/aiservice.py
+++ b/codeflash/api/aiservice.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import concurrent.futures
 import json
 import os
 import platform
@@ -12,7 +13,6 @@
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.code_replacer import is_zero_diff
 from codeflash.code_utils.code_utils import unified_diff_strings
-from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE
 from codeflash.code_utils.env_utils import get_codeflash_api_key
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
 from codeflash.code_utils.time_utils import humanize_runtime
@@ -92,7 +92,7 @@ def make_ai_service_request(
         return response
 
     def _get_valid_candidates(
-        self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource
+        self, optimizations_json: list[dict[str, Any]], source: OptimizedCandidateSource, model: str | None = None
     ) -> list[OptimizedCandidate]:
         candidates: list[OptimizedCandidate] = []
         for opt in optimizations_json:
@@ -106,6 +106,7 @@ def _get_valid_candidates(
                     optimization_id=opt["optimization_id"],
                     source=source,
                     parent_id=opt.get("parent_id", None),
+                    model=model,
                 )
             )
         return candidates
@@ -115,10 +116,11 @@ def optimize_python_code(  # noqa: D417
         source_code: str,
         dependency_code: str,
         trace_id: str,
-        num_candidates: int = 10,
         experiment_metadata: ExperimentMetadata | None = None,
         *,
         is_async: bool = False,
+        model: str | None = None,
+        call_sequence: int | None = None,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -127,8 +129,9 @@ def optimize_python_code(  # noqa: D417
         - source_code (str): The python code to optimize.
         - dependency_code (str): The dependency code used as read-only context for the optimization
         - trace_id (str): Trace id of optimization run
-        - num_candidates (int): Number of optimization variants to generate. Default is 10.
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
+        - model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
+        - call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.
 
         Returns
         -------
@@ -141,7 +144,6 @@ def optimize_python_code(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
-            "num_variants": num_candidates,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
@@ -149,12 +151,12 @@ def optimize_python_code(  # noqa: D417
             "current_username": get_last_commit_author_if_pr_exists(None),
             "repo_owner": git_repo_owner,
             "repo_name": git_repo_name,
-            "n_candidates": N_CANDIDATES_EFFECTIVE,
             "is_async": is_async,
+            "model": model,
+            "call_sequence": call_sequence,
         }
+        logger.debug(f"Sending optimize request: model={model}, trace_id={trace_id}, call_sequence={call_sequence}")
 
-        logger.info("!lsp|Generating optimized candidates…")
-        console.rule()
         try:
             response = self.make_ai_service_request("/optimize", payload=payload, timeout=60)
         except requests.exceptions.RequestException as e:
@@ -164,17 +166,16 @@ def optimize_python_code(  # noqa: D417
 
         if response.status_code == 200:
             optimizations_json = response.json()["optimizations"]
-            console.rule()
             end_time = time.perf_counter()
             logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.")
-            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE)
+            logger.debug(f"Backend returned {len(optimizations_json)} optimization(s)")
+            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE, model=model)
         try:
             error = response.json()["error"]
         except Exception:
             error = response.text
         logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
         ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
-        console.rule()
         return []
 
     def optimize_python_code_line_profiler(  # noqa: D417
@@ -183,8 +184,9 @@ def optimize_python_code_line_profiler(  # noqa: D417
         dependency_code: str,
         trace_id: str,
         line_profiler_results: str,
-        num_candidates: int = 10,
         experiment_metadata: ExperimentMetadata | None = None,
+        model: str | None = None,
+        call_sequence: int | None = None,
     ) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -193,8 +195,9 @@ def optimize_python_code_line_profiler(  # noqa: D417
         - source_code (str): The python code to optimize.
         - dependency_code (str): The dependency code used as read-only context for the optimization
         - trace_id (str): Trace id of optimization run
-        - num_candidates (int): Number of optimization variants to generate. Default is 10.
         - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization
+        - model (str | None): Model name to use ("gpt-4.1" or "claude-sonnet-4-5"). Default is None (server default).
+        - call_sequence (int | None): Sequence number for multi-model calls (1, 2, 3...). Default is None.
 
         Returns
         -------
@@ -204,20 +207,18 @@ def optimize_python_code_line_profiler(  # noqa: D417
         payload = {
             "source_code": source_code,
             "dependency_code": dependency_code,
-            "num_variants": num_candidates,
             "line_profiler_results": line_profiler_results,
             "trace_id": trace_id,
             "python_version": platform.python_version(),
             "experiment_metadata": experiment_metadata,
             "codeflash_version": codeflash_version,
             "lsp_mode": is_LSP_enabled(),
-            "n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE,
+            "model": model,
+            "call_sequence": call_sequence,
         }
 
-        console.rule()
         if line_profiler_results == "":
             logger.info("No LineProfiler results were provided, Skipping optimization.")
-            console.rule()
             return []
         try:
             response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60)
@@ -228,20 +229,115 @@ def optimize_python_code_line_profiler(  # noqa: D417
 
         if response.status_code == 200:
             optimizations_json = response.json()["optimizations"]
-            logger.info(
-                f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information."
-            )
-            console.rule()
-            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP)
+            logger.debug(f"Backend returned {len(optimizations_json)} LP optimization(s)")
+            return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP, model=model)
         try:
             error = response.json()["error"]
         except Exception:
             error = response.text
         logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
         ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
-        console.rule()
         return []
 
+    def optimize_python_code_multi_model(
+        self,
+        source_code: str,
+        dependency_code: str,
+        base_trace_id: str,
+        model_distribution: list[tuple[str, int]],
+        executor: concurrent.futures.ThreadPoolExecutor,
+        experiment_metadata: ExperimentMetadata | None = None,
+        *,
+        is_async: bool = False,
+        sequence_offset: int = 0,
+    ) -> tuple[list[OptimizedCandidate], int]:
+        """Generate optimizations using multiple models in parallel."""
+        logger.info("Generating optimized candidates…")
+        console.rule()
+
+        futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []
+
+        call_index = 0
+        for model_name, num_calls in model_distribution:
+            for _ in range(num_calls):
+                call_trace_id = f"{base_trace_id[:-3]}0{call_index:02x}"
+                call_sequence = sequence_offset + call_index + 1
+                call_index += 1
+                future = executor.submit(
+                    self.optimize_python_code,
+                    source_code,
+                    dependency_code,
+                    call_trace_id,
+                    experiment_metadata,
+                    is_async=is_async,
+                    model=model_name,
+                    call_sequence=call_sequence,
+                )
+                futures.append((future, model_name))
+
+        concurrent.futures.wait([f for f, _ in futures])
+
+        all_candidates: list[OptimizedCandidate] = []
+        for future, model_name in futures:
+            try:
+                candidates = future.result()
+                all_candidates.extend(candidates)
+            except Exception as e:
+                logger.warning(f"Model {model_name} call failed: {e}")
+                continue
+
+        console.rule()
+        return all_candidates, call_index
+
+    def optimize_python_code_line_profiler_multi_model(
+        self,
+        source_code: str,
+        dependency_code: str,
+        base_trace_id: str,
+        line_profiler_results: str,
+        model_distribution: list[tuple[str, int]],
+        executor: concurrent.futures.ThreadPoolExecutor,
+        experiment_metadata: ExperimentMetadata | None = None,
+        sequence_offset: int = 0,
+    ) -> tuple[list[OptimizedCandidate], int]:
+        """Generate line profiler optimizations using multiple models in parallel."""
+        logger.info("Generating optimized candidates with line profiler…")
+        console.rule()
+
+        futures: list[tuple[concurrent.futures.Future[list[OptimizedCandidate]], str]] = []
+
+        call_index = 0
+        for model_name, num_calls in model_distribution:
+            for _ in range(num_calls):
+                call_trace_id = f"{base_trace_id[:-3]}1{call_index:02x}"
+                call_sequence = sequence_offset + call_index + 1
+                call_index += 1
+                future = executor.submit(
+                    self.optimize_python_code_line_profiler,
+                    source_code,
+                    dependency_code,
+                    call_trace_id,
+                    line_profiler_results,
+                    experiment_metadata,
+                    model_name,
+                    call_sequence,
+                )
+                futures.append((future, model_name))
+
+        concurrent.futures.wait([f for f, _ in futures])
+
+        all_candidates: list[OptimizedCandidate] = []
+        for future, model_name in futures:
+            try:
+                candidates = future.result()
+                all_candidates.extend(candidates)
+            except Exception as e:
+                logger.warning(f"Line profiler model {model_name} call failed: {e}")
+                continue
+
+        console.rule()
+        return all_candidates, call_index
+
     def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -268,6 +364,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]
                 "trace_id": opt.trace_id,
                 "function_references": opt.function_references,
                 "python_version": platform.python_version(),
+                "call_sequence": opt.call_sequence,
             }
             for opt in request
         ]
@@ -357,6 +454,7 @@ def get_new_explanation(  # noqa: D417
         throughput_improvement: str | None = None,
         function_references: str | None = None,
         codeflash_version: str = codeflash_version,
+        call_sequence: int | None = None,
     ) -> str:
         """Optimize the given python code for performance by making a request to the Django endpoint.
 
@@ -402,6 +500,7 @@ def get_new_explanation(  # noqa: D417
             "throughput_improvement": throughput_improvement,
             "function_references": function_references,
             "codeflash_version": codeflash_version,
+            "call_sequence": call_sequence,
         }
         logger.info("loading|Generating explanation")
         console.rule()
@@ -529,6 +628,7 @@ def generate_regression_tests(  # noqa: D417
         test_timeout: int,
         trace_id: str,
         test_index: int,
+        call_sequence: int | None = None,
     ) -> tuple[str, str, str] | None:
         """Generate regression tests for the given function by making a request to the Django endpoint.
 
@@ -564,6 +664,7 @@ def generate_regression_tests(  # noqa: D417
             "python_version": platform.python_version(),
             "codeflash_version": codeflash_version,
             "is_async": function_to_optimize.is_async,
+            "call_sequence": call_sequence,
         }
         try:
             response = self.make_ai_service_request("/testgen", payload=payload, timeout=90)
@@ -604,6 +705,7 @@ def get_optimization_review(
         replay_tests: str,
         concolic_tests: str,  # noqa: ARG002
         calling_fn_details: str,
+        call_sequence: int | None = None,
     ) -> str:
         """Compute the optimization review of current Pull Request.
 
@@ -619,6 +721,7 @@ def get_optimization_review(
         root_dir: Path -> path of git directory
         concolic_tests: str -> concolic_tests (not used)
         calling_fn_details: str -> filenames and definitions of functions which call the function_to_optimize
+        call_sequence: int | None -> sequence number for multi-model calls
 
         Returns:
         -------
@@ -650,6 +753,7 @@ def get_optimization_review(
             "codeflash_version": codeflash_version,
             "calling_fn_details": calling_fn_details,
             "python_version": platform.python_version(),
+            "call_sequence": call_sequence,
         }
         console.rule()
         try:
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
index 88758455e..ba09989f8 100644
--- a/codeflash/code_utils/config_consts.py
+++ b/codeflash/code_utils/config_consts.py
@@ -32,6 +32,20 @@
 MAX_N_CANDIDATES = 5
 MAX_N_CANDIDATES_LP = 6
 
+# Multi-model diversity configuration
+# Each tuple is (model_name, num_calls) where each call returns 1 candidate
+# Standard mode: 3 GPT-4.1 + 2 Claude Sonnet = 5 candidates
+MODEL_DISTRIBUTION: list[tuple[str, int]] = [("gpt-4.1", 3), ("claude-sonnet-4-5", 2)]
+
+# LSP mode: fewer candidates for faster response
+MODEL_DISTRIBUTION_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]
+
+# Line profiler mode: 6 candidates total
+MODEL_DISTRIBUTION_LP: list[tuple[str, int]] = [("gpt-4.1", 4), ("claude-sonnet-4-5", 2)]
+
+# Line profiler LSP mode
+MODEL_DISTRIBUTION_LP_LSP: list[tuple[str, int]] = [("gpt-4.1", 2), ("claude-sonnet-4-5", 1)]
+
 try:
     from codeflash.lsp.helpers import is_LSP_enabled
 
@@ -43,5 +57,7 @@
 N_CANDIDATES_LP_EFFECTIVE = min(N_CANDIDATES_LP_LSP if _IS_LSP_ENABLED else N_CANDIDATES_LP, MAX_N_CANDIDATES_LP)
 N_TESTS_TO_GENERATE_EFFECTIVE = N_TESTS_TO_GENERATE_LSP if _IS_LSP_ENABLED else N_TESTS_TO_GENERATE
 TOTAL_LOOPING_TIME_EFFECTIVE = TOTAL_LOOPING_TIME_LSP if _IS_LSP_ENABLED else TOTAL_LOOPING_TIME
+MODEL_DISTRIBUTION_EFFECTIVE = MODEL_DISTRIBUTION_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION
+MODEL_DISTRIBUTION_LP_EFFECTIVE = MODEL_DISTRIBUTION_LP_LSP if _IS_LSP_ENABLED else MODEL_DISTRIBUTION_LP
 
 MAX_CONTEXT_LEN_REVIEW = 1000
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
index 1db09bc12..822ecffab 100644
--- a/codeflash/models/models.py
+++ b/codeflash/models/models.py
@@ -46,6 +46,7 @@ class AIServiceRefinerRequest:
     original_line_profiler_results: str
     optimized_line_profiler_results: str
     function_references: str | None = None
+    call_sequence: int | None = None
 
 
 class TestDiffScope(str, Enum):
@@ -464,6 +465,7 @@ class OptimizedCandidate:
     optimization_id: str
     source: OptimizedCandidateSource
     parent_id: str | None = None
+    model: str | None = None  # Which LLM model generated this candidate
 
 
 @dataclass(frozen=True)
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 416bdc8df..6228ee01a 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -46,8 +46,8 @@
     COVERAGE_THRESHOLD,
     INDIVIDUAL_TESTCASE_TIMEOUT,
     MAX_REPAIRS_PER_TRACE,
-    N_CANDIDATES_EFFECTIVE,
-    N_CANDIDATES_LP_EFFECTIVE,
+    MODEL_DISTRIBUTION_EFFECTIVE,
+    MODEL_DISTRIBUTION_LP_EFFECTIVE,
     N_TESTS_TO_GENERATE_EFFECTIVE,
     REFINE_ALL_THRESHOLD,
     REFINED_CANDIDATE_RANKING_WEIGHTS,
@@ -139,6 +139,7 @@ def __init__(
         ai_service_client: AiServiceClient,
         executor: concurrent.futures.ThreadPoolExecutor,
         future_all_code_repair: list[concurrent.futures.Future],
+        sequence_offset: int = 0,
     ) -> None:
         self.candidate_queue = queue.Queue()
         self.line_profiler_done = False
@@ -146,6 +147,9 @@ def __init__(
         self.candidate_len = len(initial_candidates)
         self.ai_service_client = ai_service_client
         self.executor = executor
+        self.sequence_offset = sequence_offset
+        self.lp_calls_count = 0
+        self.refinement_calls_count = 0
 
         # Initialize queue with initial candidates
         for candidate in initial_candidates:
@@ -155,6 +159,9 @@ def __init__(
         self.all_refinements_data = all_refinements_data
         self.future_all_code_repair = future_all_code_repair
 
+    def get_total_llm_calls(self) -> int:
+        return self.sequence_offset + self.lp_calls_count + self.refinement_calls_count
+
     def get_next_candidate(self) -> OptimizedCandidate | None:
         """Get the next candidate from the queue, handling async results as needed."""
         try:
@@ -176,7 +183,11 @@ def _process_line_profiler_results(self) -> OptimizedCandidate | None:
         """Process line profiler results and add to queue."""
         logger.debug("all candidates processed, await candidates from line profiler")
         concurrent.futures.wait([self.future_line_profile_results])
-        line_profile_results = self.future_line_profile_results.result()
+        result = self.future_line_profile_results.result()
+
+        # LP multi-model now returns (candidates, lp_call_count)
+        line_profile_results, lp_call_count = result
+        self.lp_calls_count = lp_call_count
 
         for candidate in line_profile_results:
             self.candidate_queue.put(candidate)
@@ -192,11 +203,18 @@ def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concur
 
     def _process_refinement_results(self) -> OptimizedCandidate | None:
         """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined."""
+        import dataclasses
+
         future_refinements: list[concurrent.futures.Future] = []
+        # Calculate base sequence: offset + lp_calls (refinements come after LP)
+        base_sequence = self.sequence_offset + self.lp_calls_count
+        refinement_call_index = 0
 
         if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD:
             for data in self.all_refinements_data:
-                future_refinements.append(self.refine_optimizations([data]))  # noqa: PERF401
+                refinement_call_index += 1
+                data_with_seq = dataclasses.replace(data, call_sequence=base_sequence + refinement_call_index)
+                future_refinements.append(self.refine_optimizations([data_with_seq]))
         else:
             diff_lens_list = []
             runtimes_list = []
@@ -215,8 +233,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None:
             top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates]
 
             for idx in top_indecies:
+                refinement_call_index += 1
                 data = self.all_refinements_data[idx]
-                future_refinements.append(self.refine_optimizations([data]))
+                data_with_seq = dataclasses.replace(data, call_sequence=base_sequence + refinement_call_index)
+                future_refinements.append(self.refine_optimizations([data_with_seq]))
+
+        # Track total refinement calls made
+        self.refinement_calls_count = refinement_call_index
 
         if future_refinements:
             logger.info("loading|Refining generated code for improved quality and performance...")
@@ -319,10 +342,14 @@ def __init__(
         self.optimization_review = ""
         self.future_all_code_repair: list[concurrent.futures.Future] = []
         self.repair_counter = 0  # track how many repairs we did for each function
+        self.test_gen_calls_count = 0
+        self.optimize_calls_count = 0
+        self.lp_calls_count = 0
+        self.total_llm_calls = 0
 
     def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
         should_run_experiment = self.experiment_id is not None
-        logger.debug(f"Function Trace ID: {self.function_trace_id}")
+        logger.info(f"Function Trace ID: {self.function_trace_id}")
         ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id})
         self.cleanup_leftover_test_return_values()
         file_name_from_test_module_name.cache_clear()
@@ -922,17 +949,19 @@ def determine_best_candidate(
         assert ai_service_client is not None, "AI service client must be set for optimization"
 
         future_line_profile_results = self.executor.submit(
-            ai_service_client.optimize_python_code_line_profiler,
+            ai_service_client.optimize_python_code_line_profiler_multi_model,
             source_code=code_context.read_writable_code.markdown,
             dependency_code=code_context.read_only_context_code,
-            trace_id=self.get_trace_id(exp_type),
+            base_trace_id=self.get_trace_id(exp_type),
             line_profiler_results=original_code_baseline.line_profile_results["str_out"],
-            num_candidates=N_CANDIDATES_LP_EFFECTIVE,
+            model_distribution=MODEL_DISTRIBUTION_LP_EFFECTIVE,
+            executor=self.executor,
             experiment_metadata=ExperimentMetadata(
                 id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment"
             )
             if self.experiment_id
             else None,
+            sequence_offset=self.optimize_calls_count,
         )
 
         processor = CandidateProcessor(
@@ -942,6 +971,7 @@ def determine_best_candidate(
             self.aiservice_client,
             self.executor,
             self.future_all_code_repair,
+            sequence_offset=self.optimize_calls_count,
         )
         candidate_index = 0
 
@@ -975,6 +1005,9 @@ def determine_best_candidate(
                     self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
                 )
 
+        # Track total LLM calls from the processor for sequence numbering
+        self.total_llm_calls = processor.get_total_llm_calls()
+
         # Select and return the best optimization
         best_optimization = self.select_best_optimization(
             eval_ctx=eval_ctx,
@@ -1353,17 +1386,17 @@ def generate_optimizations(
         read_only_context_code: str,
         run_experiment: bool = False,  # noqa: FBT001, FBT002
     ) -> Result[tuple[OptimizationSet, str], str]:
-        """Generate optimization candidates for the function."""
-        n_candidates = N_CANDIDATES_EFFECTIVE
-
+        """Generate optimization candidates for the function using multiple models in parallel."""
         future_optimization_candidates = self.executor.submit(
-            self.aiservice_client.optimize_python_code,
+            self.aiservice_client.optimize_python_code_multi_model,
             read_writable_code.markdown,
             read_only_context_code,
             self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id,
-            n_candidates,
+            MODEL_DISTRIBUTION_EFFECTIVE,
+            self.executor,
             ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None,
             is_async=self.function_to_optimize.is_async,
+            sequence_offset=N_TESTS_TO_GENERATE_EFFECTIVE,
         )
 
         future_references = self.executor.submit(
@@ -1380,27 +1413,34 @@ def generate_optimizations(
 
         if run_experiment:
             future_candidates_exp = self.executor.submit(
-                self.local_aiservice_client.optimize_python_code,
+                self.local_aiservice_client.optimize_python_code_multi_model,
                 read_writable_code.markdown,
                 read_only_context_code,
                 self.function_trace_id[:-4] + "EXP1",
-                n_candidates,
+                MODEL_DISTRIBUTION_EFFECTIVE,
+                self.executor,
                 ExperimentMetadata(id=self.experiment_id, group="experiment"),
                 is_async=self.function_to_optimize.is_async,
+                sequence_offset=N_TESTS_TO_GENERATE_EFFECTIVE,
             )
             futures.append(future_candidates_exp)
 
         # Wait for optimization futures to complete
         concurrent.futures.wait(futures)
 
-        # Retrieve results
-        candidates: list[OptimizedCandidate] = future_optimization_candidates.result()
-        logger.info(f"!lsp|Generated '{len(candidates)}' candidate optimizations.")
+        # Retrieve results - optimize_python_code_multi_model returns (candidates, call_count)
+        candidates, optimize_call_count = future_optimization_candidates.result()
+        # Total sequence count = test gen calls + optimization calls (LP will continue from here)
+        self.optimize_calls_count = N_TESTS_TO_GENERATE_EFFECTIVE + optimize_call_count
+        logger.info(f"!lsp|Completed {optimize_call_count} optimization calls, got {len(candidates)} candidates.")
 
         if not candidates:
             return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}")
 
-        candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None
+        # Handle experiment results - also returns (candidates, call_count) tuple
+        candidates_experiment = None
+        if future_candidates_exp:
+            candidates_experiment, _ = future_candidates_exp.result()
         function_references = future_references.result()
 
         return Success((OptimizationSet(control=candidates, experiment=candidates_experiment), function_references))
@@ -1647,6 +1687,10 @@ def process_review(
             )
             throughput_improvement_str = f"{throughput_improvement_value * 100:.1f}%"
 
+        # Explanation call continues the sequence numbering
+        explanation_call_sequence = self.total_llm_calls + 1
+        self.total_llm_calls = explanation_call_sequence
+
         new_explanation_raw_str = self.aiservice_client.get_new_explanation(
             source_code=code_context.read_writable_code.flat,
             dependency_code=code_context.read_only_context_code,
@@ -1664,6 +1708,7 @@ def process_review(
             optimized_throughput=optimized_throughput_str,
             throughput_improvement=throughput_improvement_str,
             function_references=function_references,
+            call_sequence=explanation_call_sequence,
         )
         new_explanation = Explanation(
             raw_explanation_message=new_explanation_raw_str or explanation.raw_explanation_message,
@@ -1699,9 +1744,13 @@ def process_review(
         staging_review = self.args.staging_review
         opt_review_response = ""
         # this will now run regardless of pr, staging review flags
+        # Optimization review call continues the sequence numbering
+        review_call_sequence = self.total_llm_calls + 1
+        self.total_llm_calls = review_call_sequence
+
         try:
             opt_review_response = self.aiservice_client.get_optimization_review(
-                **data, calling_fn_details=function_references
+                **data, calling_fn_details=function_references, call_sequence=review_call_sequence
             )
         except Exception as e:
             logger.debug(f"optimization review response failed, investigate {e}")
@@ -2192,6 +2241,9 @@ def submit_test_generation_tasks(
         generated_test_paths: list[Path],
         generated_perf_test_paths: list[Path],
     ) -> list[concurrent.futures.Future]:
+        # Track how many test generation calls we're making for sequence numbering
+        self.test_gen_calls_count = len(generated_test_paths)
+
         return [
             executor.submit(
                 generate_tests,
@@ -2206,6 +2258,7 @@ def submit_test_generation_tasks(
                 test_index,
                 test_path,
                 test_perf_path,
+                call_sequence=test_index + 1,
             )
             for test_index, (test_path, test_perf_path) in enumerate(
                 zip(generated_test_paths, generated_perf_test_paths)
diff --git a/codeflash/verification/verifier.py b/codeflash/verification/verifier.py
index 8d187f2b1..d94455df3 100644
--- a/codeflash/verification/verifier.py
+++ b/codeflash/verification/verifier.py
@@ -27,6 +27,7 @@ def generate_tests(
     test_index: int,
     test_path: Path,
     test_perf_path: Path,
+    call_sequence: int | None = None,
 ) -> tuple[str, str, Path] | None:
     # TODO: Sometimes this recreates the original Class definition. This overrides and messes up the original
     #  class import. Remove the recreation of the class definition
@@ -42,6 +43,7 @@ def generate_tests(
         test_timeout=test_timeout,
         trace_id=function_trace_id,
         test_index=test_index,
+        call_sequence=call_sequence,
     )
     if response and isinstance(response, tuple) and len(response) == 3:
         generated_test_source, instrumented_behavior_test_source, instrumented_perf_test_source = response