codeflash-ai
diff --git a/‎.github/workflows/deploy-docs-to-azure.yaml‎
Lines changed: 0 additions & 31 deletions b/‎.github/workflows/deploy-docs-to-azure.yaml‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎.github/workflows/unit-tests.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/unit-tests.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 7 additions & 6 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎codeflash-benchmark/codeflash_benchmark/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎codeflash-benchmark/codeflash_benchmark/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎codeflash-benchmark/codeflash_benchmark/plugin.py‎
Lines changed: 61 additions & 0 deletions b/‎codeflash-benchmark/codeflash_benchmark/plugin.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎codeflash-benchmark/pyproject.toml‎
Lines changed: 32 additions & 0 deletions b/‎codeflash-benchmark/pyproject.toml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎codeflash/api/aiservice.py‎
Lines changed: 154 additions & 7 deletions b/‎codeflash/api/aiservice.py‎
Lines changed: 154 additions & 7 deletions
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     continue-on-error: true
     runs-on: ubuntu-latest
     steps:
@@ -30,4 +30,4 @@ jobs:
         run: uv sync
 
       - name: Unit tests
-        run: uv run pytest tests/ --benchmark-skip -m "not ci_skip"
+        run: uv run pytest tests/
@@ -1,7 +1,8 @@
 repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.11.0"
-    hooks:
-      - id: ruff
-        args: [--fix, --exit-non-zero-on-fix, --config=pyproject.toml]
-      - id: ruff-format
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.12.7
+  hooks:
+    # Run the linter.
+    - id: ruff-check
+    # Run the formatter.
+    - id: ruff-format
@@ -0,0 +1,3 @@
+"""CodeFlash Benchmark - Pytest benchmarking plugin for codeflash.ai."""
+
+__version__ = "0.1.0"
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+import importlib.util
+
+import pytest
+
+from codeflash.benchmarking.plugin.plugin import codeflash_benchmark_plugin
+
+PYTEST_BENCHMARK_INSTALLED = importlib.util.find_spec("pytest_benchmark") is not None
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    """Register the benchmark marker and disable conflicting plugins."""
+    config.addinivalue_line("markers", "benchmark: mark test as a benchmark that should be run with codeflash tracing")
+
+    if config.getoption("--codeflash-trace") and PYTEST_BENCHMARK_INSTALLED:
+        config.option.benchmark_disable = True
+        config.pluginmanager.set_blocked("pytest_benchmark")
+        config.pluginmanager.set_blocked("pytest-benchmark")
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--codeflash-trace", action="store_true", default=False, help="Enable CodeFlash tracing for benchmarks"
+    )
+
+
+@pytest.fixture
+def benchmark(request: pytest.FixtureRequest) -> object:
+    """Benchmark fixture that works with or without pytest-benchmark installed."""
+    config = request.config
+
+    # If --codeflash-trace is enabled, use our implementation
+    if config.getoption("--codeflash-trace"):
+        return codeflash_benchmark_plugin.Benchmark(request)
+
+    # If pytest-benchmark is installed and --codeflash-trace is not enabled,
+    # return the normal pytest-benchmark fixture
+    if PYTEST_BENCHMARK_INSTALLED:
+        from pytest_benchmark.fixture import BenchmarkFixture as BSF  # noqa: N814
+
+        bs = getattr(config, "_benchmarksession", None)
+        if bs and bs.skip:
+            pytest.skip("Benchmarks are skipped (--benchmark-skip was used).")
+
+        node = request.node
+        marker = node.get_closest_marker("benchmark")
+        options = dict(marker.kwargs) if marker else {}
+
+        if bs:
+            return BSF(
+                node,
+                add_stats=bs.benchmarks.append,
+                logger=bs.logger,
+                warner=request.node.warn,
+                disabled=bs.disabled,
+                **dict(bs.options, **options),
+            )
+        return lambda func, *args, **kwargs: func(*args, **kwargs)
+
+    return lambda func, *args, **kwargs: func(*args, **kwargs)
@@ -0,0 +1,32 @@
+[project]
+name = "codeflash-benchmark"
+version = "0.1.0"
+description = "Pytest benchmarking plugin for codeflash.ai - automatic code performance optimization"
+authors = [{ name = "CodeFlash Inc.", email = "[email protected]" }]
+requires-python = ">=3.9"
+readme = "README.md"
+license = {text = "BSL-1.1"}
+keywords = [
+    "codeflash",
+    "benchmark",
+    "pytest",
+    "performance",
+    "testing",
+]
+dependencies = [
+    "pytest>=7.0.0,!=8.3.4",
+]
+
+[project.urls]
+Homepage = "https://codeflash.ai"
+Repository = "https://github.com/codeflash-ai/codeflash-benchmark"
+
+[project.entry-points.pytest11]
+codeflash-benchmark = "codeflash_benchmark.plugin"
+
+[build-system]
+requires = ["setuptools>=45", "wheel", "setuptools_scm"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["codeflash_benchmark"]
@@ -12,7 +12,8 @@
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.env_utils import get_codeflash_api_key, is_LSP_enabled
 from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name
-from codeflash.models.models import OptimizedCandidate
+from codeflash.models.ExperimentMetadata import ExperimentMetadata
+from codeflash.models.models import AIServiceRefinerRequest, OptimizedCandidate
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.version import __version__ as codeflash_version
 
@@ -21,6 +22,7 @@
 
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
     from codeflash.models.ExperimentMetadata import ExperimentMetadata
+    from codeflash.models.models import AIServiceRefinerRequest
 
 
 class AiServiceClient:
@@ -36,7 +38,11 @@ def get_aiservice_base_url(self) -> str:
         return "https://app.codeflash.ai"
 
     def make_ai_service_request(
-        self, endpoint: str, method: str = "POST", payload: dict[str, Any] | None = None, timeout: float | None = None
+        self,
+        endpoint: str,
+        method: str = "POST",
+        payload: dict[str, Any] | list[dict[str, Any]] | None = None,
+        timeout: float | None = None,
     ) -> requests.Response:
         """Make an API request to the given endpoint on the AI service.
 
@@ -98,11 +104,7 @@ def optimize_python_code(  # noqa: D417
 
         """
         start_time = time.perf_counter()
-        try:
-            git_repo_owner, git_repo_name = get_repo_owner_and_name()
-        except Exception as e:
-            logger.warning(f"Could not determine repo owner and name: {e}")
-            git_repo_owner, git_repo_name = None, None
+        git_repo_owner, git_repo_name = safe_get_repo_owner_and_name()
 
         payload = {
             "source_code": source_code,
@@ -219,13 +221,145 @@ def optimize_python_code_line_profiler(  # noqa: D417
         console.rule()
         return []
 
+    def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest]) -> list[OptimizedCandidate]:
+        """Optimize the given python code for performance by making a request to the Django endpoint.
+
+        Args:
+        request: A list of optimization candidate details for refinement
+
+        Returns:
+        -------
+        - List[OptimizationCandidate]: A list of Optimization Candidates.
+
+        """
+        payload = [
+            {
+                "optimization_id": opt.optimization_id,
+                "original_source_code": opt.original_source_code,
+                "read_only_dependency_code": opt.read_only_dependency_code,
+                "original_line_profiler_results": opt.original_line_profiler_results,
+                "original_code_runtime": opt.original_code_runtime,
+                "optimized_source_code": opt.optimized_source_code,
+                "optimized_explanation": opt.optimized_explanation,
+                "optimized_line_profiler_results": opt.optimized_line_profiler_results,
+                "optimized_code_runtime": opt.optimized_code_runtime,
+                "speedup": opt.speedup,
+                "trace_id": opt.trace_id,
+            }
+            for opt in request
+        ]
+        logger.info(f"Refining {len(request)} optimizations…")
+        console.rule()
+        try:
+            response = self.make_ai_service_request("/refinement", payload=payload, timeout=600)
+        except requests.exceptions.RequestException as e:
+            logger.exception(f"Error generating optimization refinements: {e}")
+            ph("cli-optimize-error-caught", {"error": str(e)})
+            return []
+
+        if response.status_code == 200:
+            refined_optimizations = response.json()["refinements"]
+            logger.info(f"Generated {len(refined_optimizations)} candidate refinements.")
+            console.rule()
+            return [
+                OptimizedCandidate(
+                    source_code=opt["source_code"],
+                    explanation=opt["explanation"],
+                    optimization_id=opt["optimization_id"][:-4] + "refi",
+                )
+                for opt in refined_optimizations
+            ]
+        try:
+            error = response.json()["error"]
+        except Exception:
+            error = response.text
+        logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
+        ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
+        console.rule()
+        return []
+
+    def get_new_explanation(  # noqa: D417
+        self,
+        source_code: str,
+        optimized_code: str,
+        dependency_code: str,
+        trace_id: str,
+        original_line_profiler_results: str,
+        optimized_line_profiler_results: str,
+        original_code_runtime: str,
+        optimized_code_runtime: str,
+        speedup: str,
+        annotated_tests: str,
+        optimization_id: str,
+        original_explanation: str,
+    ) -> str:
+        """Optimize the given python code for performance by making a request to the Django endpoint.
+
+        Parameters
+        ----------
+        - source_code (str): The python code to optimize.
+        - optimized_code (str): The python code generated by the AI service.
+        - dependency_code (str): The dependency code used as read-only context for the optimization
+        - original_line_profiler_results: str - line profiler results for the baseline code
+        - optimized_line_profiler_results: str - line profiler results for the optimized code
+        - original_code_runtime: str - runtime for the baseline code
+        - optimized_code_runtime: str - runtime for the optimized code
+        - speedup: str - speedup of the optimized code
+        - annotated_tests: str - test functions annotated with runtime
+        - optimization_id: str - unique id of opt candidate
+        - original_explanation: str - original_explanation generated for the opt candidate
+
+        Returns
+        -------
+        - List[OptimizationCandidate]: A list of Optimization Candidates.
+
+        """
+        payload = {
+            "trace_id": trace_id,
+            "source_code": source_code,
+            "optimized_code": optimized_code,
+            "original_line_profiler_results": original_line_profiler_results,
+            "optimized_line_profiler_results": optimized_line_profiler_results,
+            "original_code_runtime": original_code_runtime,
+            "optimized_code_runtime": optimized_code_runtime,
+            "speedup": speedup,
+            "annotated_tests": annotated_tests,
+            "optimization_id": optimization_id,
+            "original_explanation": original_explanation,
+            "dependency_code": dependency_code,
+        }
+        logger.info("Generating explanation")
+        console.rule()
+        try:
+            response = self.make_ai_service_request("/explain", payload=payload, timeout=60)
+        except requests.exceptions.RequestException as e:
+            logger.exception(f"Error generating explanations: {e}")
+            ph("cli-optimize-error-caught", {"error": str(e)})
+            return ""
+
+        if response.status_code == 200:
+            explanation: str = response.json()["explanation"]
+            logger.debug(f"New Explanation: {explanation}")
+            console.rule()
+            return explanation
+        try:
+            error = response.json()["error"]
+        except Exception:
+            error = response.text
+        logger.error(f"Error generating optimized candidates: {response.status_code} - {error}")
+        ph("cli-optimize-error-response", {"response_status_code": response.status_code, "error": error})
+        console.rule()
+        return ""
+
     def log_results(  # noqa: D417
         self,
         function_trace_id: str,
         speedup_ratio: dict[str, float | None] | None,
         original_runtime: float | None,
         optimized_runtime: dict[str, float | None] | None,
         is_correct: dict[str, bool] | None,
+        optimized_line_profiler_results: dict[str, str] | None,
+        metadata: dict[str, Any] | None,
     ) -> None:
         """Log features to the database.
 
@@ -236,6 +370,8 @@ def log_results(  # noqa: D417
         - original_runtime (Optional[Dict[str, float]]): The original runtime.
         - optimized_runtime (Optional[Dict[str, float]]): The optimized runtime.
         - is_correct (Optional[Dict[str, bool]]): Whether the optimized code is correct.
+        - optimized_line_profiler_results: line_profiler results for every candidate mapped to their optimization_id
+        - metadata: contains the best optimization id
 
         """
         payload = {
@@ -245,6 +381,8 @@ def log_results(  # noqa: D417
             "optimized_runtime": optimized_runtime,
             "is_correct": is_correct,
             "codeflash_version": codeflash_version,
+            "optimized_line_profiler_results": optimized_line_profiler_results,
+            "metadata": metadata,
         }
         try:
             self.make_ai_service_request("/log_features", payload=payload, timeout=5)
@@ -331,3 +469,12 @@ class LocalAiServiceClient(AiServiceClient):
     def get_aiservice_base_url(self) -> str:
         """Get the base URL for the local AI service."""
         return "http://localhost:8000"
+
+
+def safe_get_repo_owner_and_name() -> tuple[str | None, str | None]:
+    try:
+        git_repo_owner, git_repo_name = get_repo_owner_and_name()
+    except Exception as e:
+        logger.warning(f"Could not determine repo owner and name: {e}")
+        git_repo_owner, git_repo_name = None, None
+    return git_repo_owner, git_repo_name
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+"""CodeFlash Benchmark - Pytest benchmarking plugin for codeflash.ai."""`
	`2`	`+`
	`3`	`+__version__ = "0.1.0"`