added performance tests

mishushakov · mishushakov · commit 87ee552a3cb6 · 2025-08-28T18:20:03.000+02:00
diff --git a/.github/workflows/performance_tests.yml b/.github/workflows/performance_tests.yml
@@ -0,0 +1,51 @@
+name: Performance tests
+
+on:
+  workflow_call:
+    secrets:
+      E2B_API_KEY:
+        required: true
+    inputs:
+      E2B_DOMAIN:
+        required: false
+        type: string
+      E2B_TESTS_TEMPLATE:
+        required: false
+        type: string
+
+permissions:
+  contents: read
+
+jobs:
+  publish:
+    defaults:
+      run:
+        working-directory: ./python
+    name: Performance tests
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install and configure Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version: 1.5.1
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run performance tests
+        run: poetry run python tests/performance.py
+        env:
+          E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
+          E2B_DOMAIN: ${{ inputs.E2B_DOMAIN }}
+          E2B_TESTS_TEMPLATE: ${{ inputs.E2B_TESTS_TEMPLATE }}
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -36,6 +36,14 @@ jobs:
     with:
       E2B_DOMAIN: ${{ vars.E2B_DOMAIN }}
       E2B_TESTS_TEMPLATE: ${{ needs.build-template.outputs.template_id }}
+  performance-tests:
+    uses: ./.github/workflows/performance_tests.yml
+    needs: build-template
+    secrets:
+      E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
+    with:
+      E2B_DOMAIN: ${{ vars.E2B_DOMAIN }}
+      E2B_TESTS_TEMPLATE: ${{ needs.build-template.outputs.template_id }}
   cleanup-build-template:
     uses: ./.github/workflows/cleanup_build_template.yml
     needs: [build-template, js-sdk, python-sdk]
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -24,6 +24,7 @@ pytest-asyncio = "^0.23.7"
 pytest-xdist = "^3.6.1"
 black = "23.12.1"
 pydoc-markdown = "^4.8.2"
+matplotlib = "^3.8.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/python/tests/benchmarking.py b/python/tests/benchmarking.py
diff --git a/python/tests/performance.py b/python/tests/performance.py
@@ -0,0 +1,205 @@
+from e2b_code_interpreter import Sandbox
+import time
+import os
+import statistics
+import matplotlib.pyplot as plt
+
+iterations_count = int(os.getenv("E2B_TESTS_PERF_ITERATIONS_COUNT", 20))
+template = os.getenv("E2B_TESTS_TEMPLATE", "code-interpreter-v1")
+
+# Lists to store metrics for each iteration
+sandbox_creation_times = []
+health_check_times = []
+first_code_run_times = []
+second_code_run_times = []
+
+for i in range(iterations_count):
+    print(f"\n--- Iteration {i + 1}/{iterations_count} ---")
+
+    start_time = time.time()
+    sbx = Sandbox(template)
+    end_time = time.time()
+    sandbox_creation_time = (end_time - start_time) * 1000
+    sandbox_creation_times.append(sandbox_creation_time)
+    print(f"Sandbox creation time: {sandbox_creation_time:.2f} milliseconds")
+
+    start_time = time.time()
+    sbx.commands.run("curl http://0.0.0.0:49999/health")
+    end_time = time.time()
+    health_check_time = (end_time - start_time) * 1000
+    health_check_times.append(health_check_time)
+    print(f"Health check time: {health_check_time:.2f} milliseconds")
+
+    start_time = time.time()
+    sbx.run_code("print('Hello, world!')")
+    end_time = time.time()
+    first_code_run_time = (end_time - start_time) * 1000
+    first_code_run_times.append(first_code_run_time)
+    print(f"First code run time: {first_code_run_time:.2f} milliseconds")
+
+    start_time = time.time()
+    sbx.run_code("print('Hello, world!')")
+    end_time = time.time()
+    second_code_run_time = (end_time - start_time) * 1000
+    second_code_run_times.append(second_code_run_time)
+    print(f"Second code run time: {second_code_run_time:.2f} milliseconds")
+
+    sbx.kill()
+
+
+# Calculate and print summary statistics
+def print_metric_summary(metric_name, times):
+    if not times:
+        return
+
+    low = min(times)
+    high = max(times)
+    mean = statistics.mean(times)
+    median = statistics.median(times)
+
+    print(f"\n{metric_name} Summary:")
+    print(f"  Low:    {low:.2f} ms")
+    print(f"  High:   {high:.2f} ms")
+    print(f"  Mean:   {mean:.2f} ms")
+    print(f"  Median: {median:.2f} ms")
+
+
+print("\n" + "=" * 50)
+print("PERFORMANCE SUMMARY")
+print("=" * 50)
+
+print_metric_summary("Sandbox Creation Time", sandbox_creation_times)
+print_metric_summary("Health Check Time", health_check_times)
+print_metric_summary("First Code Run Time", first_code_run_times)
+print_metric_summary("Second Code Run Time", second_code_run_times)
+
+
+def create_performance_plot(
+    template,
+    iterations_count,
+    sandbox_creation_times,
+    health_check_times,
+    first_code_run_times,
+    second_code_run_times,
+):
+    """Create and save a performance visualization plot."""
+    print(f"\nGenerating performance plot...")
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
+
+    # Plot 1: All metrics over iterations
+    iterations = list(range(1, iterations_count + 1))
+    ax1.plot(
+        iterations,
+        sandbox_creation_times,
+        "b-o",
+        label="Sandbox Creation",
+        linewidth=1.5,
+        markersize=6,
+        markerfacecolor="blue",
+        markeredgecolor="darkblue",
+        markeredgewidth=1,
+    )
+    ax1.plot(
+        iterations,
+        health_check_times,
+        "g-s",
+        label="Health Check",
+        linewidth=1.5,
+        markersize=6,
+        markerfacecolor="green",
+        markeredgecolor="darkgreen",
+        markeredgewidth=1,
+    )
+    ax1.plot(
+        iterations,
+        first_code_run_times,
+        "r-^",
+        label="First Code Run",
+        linewidth=1.5,
+        markersize=6,
+        markerfacecolor="red",
+        markeredgecolor="darkred",
+        markeredgewidth=1,
+    )
+    ax1.plot(
+        iterations,
+        second_code_run_times,
+        "m-d",
+        label="Second Code Run",
+        linewidth=1.5,
+        markersize=6,
+        markerfacecolor="magenta",
+        markeredgecolor="darkmagenta",
+        markeredgewidth=1,
+    )
+
+    ax1.set_xlabel("Iteration")
+    ax1.set_ylabel("Time (ms)")
+    ax1.set_title(
+        f"Performance Metrics Over {iterations_count} Iterations - {template}"
+    )
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    
+    # Set x-axis to show each iteration step
+    ax1.set_xticks(iterations)
+    ax1.set_xlim(0.5, iterations_count + 0.5)
+
+    # Plot 2: Box plot for distribution
+    all_metrics = [
+        sandbox_creation_times,
+        health_check_times,
+        first_code_run_times,
+        second_code_run_times,
+    ]
+    metric_names = [
+        "Sandbox\nCreation",
+        "Health\nCheck",
+        "First Code\nRun",
+        "Second Code\nRun",
+    ]
+
+    box_plot = ax2.boxplot(all_metrics, labels=metric_names, patch_artist=True)
+    colors = ["lightblue", "lightgreen", "lightcoral", "plum"]
+    for patch, color in zip(box_plot["boxes"], colors):
+        patch.set_facecolor(color)
+
+    ax2.set_ylabel("Time (ms)")
+    ax2.set_title(f"Performance Distribution - {template}")
+    ax2.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+
+    # Show summary statistics in the plot
+    stats_text = f"""Summary Statistics:
+Sandbox Creation: {statistics.mean(sandbox_creation_times):.1f}ms avg
+Health Check: {statistics.mean(health_check_times):.1f}ms avg
+First Code Run: {statistics.mean(first_code_run_times):.1f}ms avg
+Second Code Run: {statistics.mean(second_code_run_times):.1f}ms avg"""
+
+    fig.text(
+        0.02,
+        0.02,
+        stats_text,
+        fontsize=8,
+        verticalalignment="bottom",
+        bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.8),
+    )
+
+    # Save the plot
+    plot_filename = f"performance_plot.png"
+    plt.savefig(plot_filename, dpi=300, bbox_inches="tight")
+    print(f"Performance plot saved as: {plot_filename}")
+
+    return plot_filename
+
+
+# Create performance plot
+create_performance_plot(
+    template,
+    iterations_count,
+    sandbox_creation_times,
+    health_check_times,
+    first_code_run_times,
+    second_code_run_times,
+)