hardcoded a/b testing for now

aseembits93 · aseembits93 · commit 4736fd781f3a · 2025-05-07T16:14:05.000-07:00
diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py
@@ -248,6 +248,7 @@ def generate_regression_tests(
         test_timeout: int,
         trace_id: str,
         test_index: int,
+        single_prompt: bool=False,
     ) -> tuple[str, str, str] | None:
         """Generate regression tests for the given function by making a request to the Django endpoint.
 
@@ -284,7 +285,10 @@ def generate_regression_tests(
             "codeflash_version": codeflash_version,
         }
         try:
-            response = self.make_ai_service_request("/testgen", payload=payload, timeout=600)
+            if single_prompt:
+                response = self.make_ai_service_request("/testgen-single-prompt", payload=payload, timeout=600)
+            else:
+                response = self.make_ai_service_request("/testgen", payload=payload, timeout=600)
         except requests.exceptions.RequestException as e:
             logger.exception(f"Error generating tests: {e}")
             ph("cli-testgen-error-caught", {"error": str(e)})
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -162,7 +162,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
             f"Generating new tests and optimizations for function {self.function_to_optimize.function_name}",
             transient=True,
         ):
-            #TODO: do a/b testing with same codegen but different testgen
+            # TODO: do a/b testing with same codegen but different testgen
             generated_results = self.generate_tests_and_optimizations(
                 testgen_context_code=code_context.testgen_context_code,
                 read_writable_code=code_context.read_writable_code,
@@ -760,7 +760,8 @@ def generate_tests_and_optimizations(
         run_experiment: bool = False,
     ) -> Result[tuple[GeneratedTestsList, dict[str, list[FunctionCalledInTest]], OptimizationSet], str]:
         assert len(generated_test_paths) == N_TESTS_TO_GENERATE
-        max_workers = N_TESTS_TO_GENERATE + 2 if not run_experiment else N_TESTS_TO_GENERATE + 3
+        max_workers = 2*N_TESTS_TO_GENERATE + 2 if not run_experiment else 2*N_TESTS_TO_GENERATE + 3
+        self.local_aiservice_client = LocalAiServiceClient()
         console.rule()
         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
             # Submit the test generation task as future
@@ -770,6 +771,7 @@ def generate_tests_and_optimizations(
                 [definition.fully_qualified_name for definition in helper_functions],
                 generated_test_paths,
                 generated_perf_test_paths,
+                run_experiment=True,
             )
             future_optimization_candidates = executor.submit(
                 self.aiservice_client.optimize_python_code,
@@ -1223,8 +1225,9 @@ def generate_and_instrument_tests(
         helper_function_names: list[str],
         generated_test_paths: list[Path],
         generated_perf_test_paths: list[Path],
+        run_experiment: bool
     ) -> list[concurrent.futures.Future]:
-        return [
+        original = [
             executor.submit(
                 generate_tests,
                 self.aiservice_client,
@@ -1234,12 +1237,34 @@ def generate_and_instrument_tests(
                 Path(self.original_module_path),
                 self.test_cfg,
                 INDIVIDUAL_TESTCASE_TIMEOUT,
-                self.function_trace_id,
+                self.function_trace_id,#[:-4]+"TST0" if run_experiment else self.function_trace_id,
                 test_index,
                 test_path,
                 test_perf_path,
+                single_prompt=False,
             )
             for test_index, (test_path, test_perf_path) in enumerate(
                 zip(generated_test_paths, generated_perf_test_paths)
             )
         ]
+        if run_experiment:
+            original+=[
+                executor.submit(
+                    generate_tests,
+                    self.local_aiservice_client,
+                    source_code_being_tested,
+                    self.function_to_optimize,
+                    helper_function_names,
+                    Path(self.original_module_path),
+                    self.test_cfg,
+                    INDIVIDUAL_TESTCASE_TIMEOUT,
+                    self.function_trace_id,#[:-4]+"TST1",
+                    test_index,
+                    test_path,
+                    test_perf_path,
+                    single_prompt=True,
+                )
+                for test_index, (test_path, test_perf_path) in enumerate(
+                    zip(generated_test_paths, generated_perf_test_paths)
+                )]
+        return original
diff --git a/codeflash/verification/verifier.py b/codeflash/verification/verifier.py
@@ -26,6 +26,7 @@ def generate_tests(
     test_index: int,
     test_path: Path,
     test_perf_path: Path,
+    single_prompt: bool=False,
 ) -> tuple[str, str, Path] | None:
     # TODO: Sometimes this recreates the original Class definition. This overrides and messes up the original
     #  class import. Remove the recreation of the class definition
@@ -40,6 +41,7 @@ def generate_tests(
         test_timeout=test_timeout,
         trace_id=function_trace_id,
         test_index=test_index,
+        single_prompt=single_prompt,
     )
     if response and isinstance(response, tuple) and len(response) == 3:
         generated_test_source, instrumented_behavior_test_source, instrumented_perf_test_source = response