codeflash-ai
diff --git a/‎.github/workflows/unit-tests.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/unit-tests.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codeflash-benchmark/codeflash_benchmark/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎codeflash-benchmark/codeflash_benchmark/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎codeflash-benchmark/codeflash_benchmark/plugin.py‎
Lines changed: 61 additions & 0 deletions b/‎codeflash-benchmark/codeflash_benchmark/plugin.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎codeflash-benchmark/pyproject.toml‎
Lines changed: 32 additions & 0 deletions b/‎codeflash-benchmark/pyproject.toml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎codeflash/api/aiservice.py‎
Lines changed: 81 additions & 7 deletions b/‎codeflash/api/aiservice.py‎
Lines changed: 81 additions & 7 deletions
diff --git a/‎codeflash/api/cfapi.py‎
Lines changed: 13 additions & 0 deletions b/‎codeflash/api/cfapi.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 17 additions & 27 deletions b/‎codeflash/benchmarking/plugin/plugin.py‎
Lines changed: 17 additions & 27 deletions
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     continue-on-error: true
     runs-on: ubuntu-latest
     steps:
@@ -30,4 +30,4 @@ jobs:
         run: uv sync
 
       - name: Unit tests
-        run: uv run pytest tests/ --benchmark-skip -m "not ci_skip"
+        run: uv run pytest tests/
@@ -0,0 +1,3 @@
+"""CodeFlash Benchmark - Pytest benchmarking plugin for codeflash.ai."""
+
+__version__ = "0.1.0"
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+import importlib.util
+
+import pytest
+
+from codeflash.benchmarking.plugin.plugin import codeflash_benchmark_plugin
+
+PYTEST_BENCHMARK_INSTALLED = importlib.util.find_spec("pytest_benchmark") is not None
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    """Register the benchmark marker and disable conflicting plugins."""
+    config.addinivalue_line("markers", "benchmark: mark test as a benchmark that should be run with codeflash tracing")
+
+    if config.getoption("--codeflash-trace") and PYTEST_BENCHMARK_INSTALLED:
+        config.option.benchmark_disable = True
+        config.pluginmanager.set_blocked("pytest_benchmark")
+        config.pluginmanager.set_blocked("pytest-benchmark")
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--codeflash-trace", action="store_true", default=False, help="Enable CodeFlash tracing for benchmarks"
+    )
+
+
+@pytest.fixture
+def benchmark(request: pytest.FixtureRequest) -> object:
+    """Benchmark fixture that works with or without pytest-benchmark installed."""
+    config = request.config
+
+    # If --codeflash-trace is enabled, use our implementation
+    if config.getoption("--codeflash-trace"):
+        return codeflash_benchmark_plugin.Benchmark(request)
+
+    # If pytest-benchmark is installed and --codeflash-trace is not enabled,
+    # return the normal pytest-benchmark fixture
+    if PYTEST_BENCHMARK_INSTALLED:
+        from pytest_benchmark.fixture import BenchmarkFixture as BSF  # noqa: N814
+
+        bs = getattr(config, "_benchmarksession", None)
+        if bs and bs.skip:
+            pytest.skip("Benchmarks are skipped (--benchmark-skip was used).")
+
+        node = request.node
+        marker = node.get_closest_marker("benchmark")
+        options = dict(marker.kwargs) if marker else {}
+
+        if bs:
+            return BSF(
+                node,
+                add_stats=bs.benchmarks.append,
+                logger=bs.logger,
+                warner=request.node.warn,
+                disabled=bs.disabled,
+                **dict(bs.options, **options),
+            )
+        return lambda func, *args, **kwargs: func(*args, **kwargs)
+
+    return lambda func, *args, **kwargs: func(*args, **kwargs)
@@ -0,0 +1,32 @@
+[project]
+name = "codeflash-benchmark"
+version = "0.1.0"
+description = "Pytest benchmarking plugin for codeflash.ai - automatic code performance optimization"
+authors = [{ name = "CodeFlash Inc.", email = "[email protected]" }]
+requires-python = ">=3.9"
+readme = "README.md"
+license = {text = "BSL-1.1"}
+keywords = [
+    "codeflash",
+    "benchmark",
+    "pytest",
+    "performance",
+    "testing",
+]
+dependencies = [
+    "pytest>=7.0.0,!=8.3.4",
+]
+
+[project.urls]
+Homepage = "https://codeflash.ai"
+Repository = "https://github.com/codeflash-ai/codeflash-benchmark"
+
+[project.entry-points.pytest11]
+codeflash-benchmark = "codeflash_benchmark.plugin"
+
+[build-system]
+requires = ["setuptools>=45", "wheel", "setuptools_scm"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["codeflash_benchmark"]
@@ -12,7 +12,8 @@
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
-from codeflash.models.models import OptimizedCandidate
+from codeflash.models.ExperimentMetadata import ExperimentMetadata
+from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.version import __version__ as codeflash_version
 
@@ -21,6 +22,7 @@
 
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.models.ExperimentMetadata import ExperimentMetadata
+    from codeflash.models.models import AIServiceRefinerRequest
 
 
 class AiServiceClient:
@@ -36,7 +38,11 @@ def get_aiservice_base_url(self) -> str:
         return "https://app.codeflash.ai"
 
     def make_ai_service_request(
-        self, endpoint: str, method: str = "POST", payload: dict[str, Any] | None = None, timeout: float | None = None
+        self,
+        endpoint: str,
+        method: str = "POST",
+        payload: dict[str, Any] | list[dict[str, Any]] | None = None,
+        timeout: float | None = None,
     ) -> requests.Response:
         """Make an API request to the given endpoint on the AI service.
 
@@ -98,11 +104,7 @@ def optimize_python_code(  # noqa: D417
 
         """
         start_time = time.perf_counter()
-        try:
-            git_repo_owner, git_repo_name = get_repo_owner_and_name()
-        except Exception as e:
-            logger.warning(f"Could not determine repo owner and name: {e}")
-            git_repo_owner, git_repo_name = None, None
+        git_repo_owner, git_repo_name = safe_get_repo_owner_and_name()
 
         payload = {
             "source_code": source_code,
@@ -219,13 +221,72 @@ def optimize_python_code_line_profiler(  # noqa: D417
         console.rule()
         return []
 
+    def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]:
+        """Optimize the given python code for performance by making a request to the Django endpoint.
+
+        Args:
+        request: A list of optimization candidate details for refinement
+
+        Returns:
+        -------
+        - List[OptimizationCandidate]: A list of Optimization Candidates.
+
+        """
+        payload = [
+            {
+                "optimization_id": opt.optimization_id,
+                "original_source_code": opt.original_source_code,
+                "read_only_dependency_code": opt.read_only_dependency_code,
+                "original_line_profiler_results": opt.original_line_profiler_results,
+                "original_code_runtime": opt.original_code_runtime,
+                "optimized_source_code": opt.optimized_source_code,
+                "optimized_explanation": opt.optimized_explanation,
+                "optimized_line_profiler_results": opt.optimized_line_profiler_results,
+                "optimized_code_runtime": opt.optimized_code_runtime,
+                "speedup": opt.speedup,
+                "trace_id": opt.trace_id,
+            }
+            for opt in request
+        ]
+        logger.info(f"Refining {len(request)} optimizations…")
+        console.rule()
+        try:
+            response = self.make_ai_service_request("/refinement", payload=payload, timeout=600)
+        except requests.exceptions.RequestException as e:
+            logger.exception(f"Error generating optimization refinements: {e}")
+            ph("cli-optimize-error-caught", {"error": str(e)})
+            return []
+
+        if response.status_code == 200:
+            refined_optimizations = response.json()["refinements"]
+            logger.info(f"Generated {len(refined_optimizations)} candidate refinements.")
+            console.rule()
+            return [
+                OptimizedCandidate(
+                    source_code=opt["source_code"],
+                    explanation=opt["explanation"],
+                    optimization_id=opt["optimization_id"][:-4] + "refi",
+                )
+                for opt in refined_optimizations
+            ]
+        try:
+            error = response.json()["error"]
+        except Exception:
+            error = response.text
+        logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
+        ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
+        console.rule()
+        return []
+
     def log_results(  # noqa: D417
         self,
         function_trace_id: str,
         speedup_ratio: dict[str, float | None] | None,
         original_runtime: float | None,
         optimized_runtime: dict[str, float | None] | None,
         is_correct: dict[str, bool] | None,
+        optimized_line_profiler_results: dict[str, str] | None,
+        metadata: dict[str, Any] | None,
     ) -> None:
         """Log features to the database.
 
@@ -236,6 +297,8 @@ def log_results(  # noqa: D417
         - original_runtime (Optional[Dict[str, float]]): The original runtime.
         - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime.
         - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct.
+        - optimized_line_profiler_results: line_profiler results for every candidate mapped to their optimization_id
+        - metadata: contains the best optimization id
 
         """
         payload = {
@@ -245,6 +308,8 @@ def log_results(  # noqa: D417
             "optimized_runtime": optimized_runtime,
             "is_correct": is_correct,
             "codeflash_version": codeflash_version,
+            "optimized_line_profiler_results": optimized_line_profiler_results,
+            "metadata": metadata,
         }
         try:
             self.make_ai_service_request("/log_features", payload=payload, timeout=5)
@@ -331,3 +396,12 @@ class LocalAiServiceClient(AiServiceClient):
     def get_aiservice_base_url(self) -> str:
         """Get the base URL for the local AI service."""
         return "http://localhost:8000"
+
+
+def safe_get_repo_owner_and_name() -> tuple[str | None, str | None]:
+    try:
+        git_repo_owner, git_repo_name = get_repo_owner_and_name()
+    except Exception as e:
+        logger.warning(f"Could not determine repo owner and name: {e}")
+        git_repo_owner, git_repo_name = None, None
+    return git_repo_owner, git_repo_name
@@ -316,3 +316,16 @@ def mark_optimization_success(trace_id: str, *, is_optimization_found: bool) ->
     """
     payload = {"trace_id": trace_id, "is_optimization_found": is_optimization_found}
     return make_cfapi_request(endpoint="/mark-as-success", method="POST", payload=payload)
+
+
+def send_completion_email() -> Response:
+    """Send an email notification when codeflash --all completes."""
+    try:
+        owner, repo = get_repo_owner_and_name()
+    except Exception as e:
+        sentry_sdk.capture_exception(e)
+        response = requests.Response()
+        response.status_code = 500
+        return response
+    payload = {"owner": owner, "repo": repo}
+    return make_cfapi_request(endpoint="/send-completion-email", method="POST", payload=payload)
@@ -1,16 +1,22 @@
 from __future__ import annotations
 
+import importlib.util
 import os
 import sqlite3
 import sys
 import time
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 import pytest
 
 from codeflash.benchmarking.codeflash_trace import codeflash_trace
 from codeflash.code_utils.code_utils import module_name_from_file_path
-from codeflash.models.models import BenchmarkKey
+
+if TYPE_CHECKING:
+    from codeflash.models.models import BenchmarkKey
+
+PYTEST_BENCHMARK_INSTALLED = importlib.util.find_spec("pytest_benchmark") is not None
 
 
 class CodeFlashBenchmarkPlugin:
@@ -71,6 +77,8 @@ def close(self) -> None:
 
     @staticmethod
     def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[BenchmarkKey, int]]:
+        from codeflash.models.models import BenchmarkKey
+
         """Process the trace file and extract timing data for all functions.
 
         Args:
@@ -131,6 +139,8 @@ def get_function_benchmark_timings(trace_path: Path) -> dict[str, dict[Benchmark
 
     @staticmethod
     def get_benchmark_timings(trace_path: Path) -> dict[BenchmarkKey, int]:
+        from codeflash.models.models import BenchmarkKey
+
         """Extract total benchmark timings from trace files.
 
         Args:
@@ -199,23 +209,6 @@ def pytest_sessionfinish(self, session, exitstatus) -> None:  # noqa: ANN001, AR
         # Close the database connection
         self.close()
 
-    @staticmethod
-    def pytest_addoption(parser: pytest.Parser) -> None:
-        parser.addoption("--codeflash-trace", action="store_true", default=False, help="Enable CodeFlash tracing")
-
-    @staticmethod
-    def pytest_plugin_registered(plugin, manager) -> None:  # noqa: ANN001
-        # Not necessary since run with -p no:benchmark, but just in case
-        if hasattr(plugin, "name") and plugin.name == "pytest-benchmark":
-            manager.unregister(plugin)
-
-    @staticmethod
-    def pytest_configure(config: pytest.Config) -> None:
-        """Register the benchmark marker."""
-        config.addinivalue_line(
-            "markers", "benchmark: mark test as a benchmark that should be run with codeflash tracing"
-        )
-
     @staticmethod
     def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
         # Skip tests that don't have the benchmark fixture
@@ -258,9 +251,14 @@ def wrapped_func(*args, **kwargs):  # noqa: ANN002, ANN003, ANN202
 
         def _run_benchmark(self, func, *args, **kwargs):  # noqa: ANN001, ANN002, ANN003, ANN202
             """Actual benchmark implementation."""
+            node_path = getattr(self.request.node, "path", None) or getattr(self.request.node, "fspath", None)
+            if node_path is None:
+                raise RuntimeError("Unable to determine test file path from pytest node")
+
             benchmark_module_path = module_name_from_file_path(
-                Path(str(self.request.node.fspath)), Path(codeflash_benchmark_plugin.project_root)
+                Path(str(node_path)), Path(codeflash_benchmark_plugin.project_root), traverse_up=True
             )
+
             benchmark_function_name = self.request.node.name
             line_number = int(str(sys._getframe(2).f_lineno))  # 2 frames up in the call stack  # noqa: SLF001
             # Set env vars
@@ -286,13 +284,5 @@ def _run_benchmark(self, func, *args, **kwargs):  # noqa: ANN001, ANN002, ANN003
 
             return result
 
-    @staticmethod
-    @pytest.fixture
-    def benchmark(request: pytest.FixtureRequest) -> object:
-        if not request.config.getoption("--codeflash-trace"):
-            return None
-
-        return CodeFlashBenchmarkPlugin.Benchmark(request)
-
 
 codeflash_benchmark_plugin = CodeFlashBenchmarkPlugin()
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+"""CodeFlash Benchmark - Pytest benchmarking plugin for codeflash.ai."""`
	`2`	`+`
	`3`	`+__version__ = "0.1.0"`