improving benchmark

cetagostini · jessegrabowski · commit 5e53537c3ccc · 2025-10-07T23:11:52.000-05:00
diff --git a/doc/_drafts/benchmark_mlx_v_jax_corrected.ipynb b/doc/_drafts/benchmark_mlx_v_jax_corrected.ipynb
@@ -2,7 +2,7 @@
   "cells": [
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 5,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -21,7 +21,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": 6,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -30,14 +30,14 @@
         "\n",
         "# Set up PyTensor JAX mode\n",
         "jax_optimizer = RewriteDatabaseQuery(include=[\"jax\"], exclude=[])\n",
-        "pytensor_jax_mode = Mode(linker=JAXLinker(), optimizer=jax_optimizer)\n",
+        "pytensor_jax_mode = \"JAX\"\n",
         "\n",
         "# Try to set up MLX mode\n",
         "try:\n",
         "    from pytensor.link.mlx import MLXLinker\n",
         "    import mlx.core as mx\n",
         "    mlx_optimizer = RewriteDatabaseQuery(include=[\"mlx\"], exclude=[])\n",
-        "    pytensor_mlx_mode = Mode(linker=MLXLinker(), optimizer=mlx_optimizer)\n",
+        "    pytensor_mlx_mode = \"MLX\"\n",
         "    MLX_AVAILABLE = True\n",
         "except ImportError:\n",
         "    MLX_AVAILABLE = False\n",
@@ -101,29 +101,28 @@
         "        A = np.random.randn(size, size).astype(np.float32)\n",
         "        B = np.random.randn(size, size).astype(np.float32)\n",
         "        C = np.random.randn(size, size).astype(np.float32)\n",
+        "\n",
+        "        pt_A = pt.matrix('A', dtype='float32')\n",
+        "        pt_B = pt.matrix('B', dtype='float32')  \n",
+        "        pt_C = pt.matrix('C', dtype='float32')\n",
+        "        result = pt.dot(pt.dot(pt_A, pt_B), pt_C)\n",
+        "\n",
+        "\n",
+        "        f_jax = function([pt_A, pt_B, pt_C], result, mode=pytensor_jax_mode, trust_input=True)\n",
+        "        f_mlx = function([pt_A, pt_B, pt_C], result, mode=pytensor_mlx_mode, trust_input=True)\n",
         "        \n",
         "        # === TEST 1: Matrix Multiplication Chain ===\n",
         "        # PyTensor + JAX backend\n",
         "        @timer_jax\n",
         "        def pytensor_jax_matmul():\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')  \n",
-        "            pt_C = pt.matrix('C', dtype='float32')\n",
-        "            result = pt.dot(pt.dot(pt_A, pt_B), pt_C)\n",
-        "            f = function([pt_A, pt_B, pt_C], result, mode=pytensor_jax_mode)\n",
-        "            return f(A, B, C)\n",
+        "            return f_jax(A, B, C)\n",
         "        \n",
         "        # PyTensor + MLX backend\n",
         "        @timer_mlx\n",
         "        def pytensor_mlx_matmul():\n",
         "            if not MLX_AVAILABLE:\n",
         "                return None, float('inf'), 0\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')\n",
-        "            pt_C = pt.matrix('C', dtype='float32')\n",
-        "            result = pt_A @ pt_B @ pt_C\n",
-        "            f = function([pt_A, pt_B, pt_C], result, mode=pytensor_mlx_mode)\n",
-        "            return f(A, B, C)\n",
+        "            return f_mlx(A, B, C)\n",
         "        \n",
         "        # Run matrix multiplication test\n",
         "        _, jax_mean, jax_std = pytensor_jax_matmul()\n",
@@ -145,24 +144,20 @@
         "        \n",
         "        # === TEST 2: Element-wise Operations ===\n",
         "        # PyTensor + JAX\n",
+        "        result = pt.sin(pt_A) + pt.cos(pt_B)\n",
+        "        f_jax = function([pt_A, pt_B], result, mode=pytensor_jax_mode, trust_input=True)\n",
+        "        f_mlx = function([pt_A, pt_B], result, mode=pytensor_mlx_mode, trust_input=True)\n",
+        "\n",
         "        @timer_jax\n",
         "        def pytensor_jax_elemwise():\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')\n",
-        "            result = pt.sin(pt_A) + pt.cos(pt_B)\n",
-        "            f = function([pt_A, pt_B], result, mode=pytensor_jax_mode)\n",
-        "            return f(A, B)\n",
+        "            return f_jax(A, B)\n",
         "        \n",
         "        # PyTensor + MLX\n",
         "        @timer_mlx\n",
         "        def pytensor_mlx_elemwise():\n",
         "            if not MLX_AVAILABLE:\n",
         "                return None, float('inf'), 0\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')\n",
-        "            result = pt.sin(pt_A) + pt.cos(pt_B)\n",
-        "            f = function([pt_A, pt_B], result, mode=pytensor_mlx_mode)\n",
-        "            return f(A, B)\n",
+        "            return f_mlx(A, B)\n",
         "        \n",
         "        # Run element-wise test\n",
         "        _, jax_mean, jax_std = pytensor_jax_elemwise()\n",
@@ -184,24 +179,19 @@
         "        \n",
         "        # === TEST 3: Matrix Addition with Broadcasting ===\n",
         "        # PyTensor + JAX\n",
+        "        result = pt_A + pt_B.T\n",
+        "        f_jax = function([pt_A, pt_B], result, mode=pytensor_jax_mode, trust_input=True)\n",
+        "        f_mlx = function([pt_A, pt_B], result, mode=pytensor_mlx_mode, trust_input=True)\n",
         "        @timer_jax\n",
         "        def pytensor_jax_broadcast():\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')\n",
-        "            result = pt_A + pt_B.T\n",
-        "            f = function([pt_A, pt_B], result, mode=pytensor_jax_mode)\n",
-        "            return f(A, B)\n",
+        "            return f_jax(A, B)\n",
         "        \n",
         "        # PyTensor + MLX\n",
         "        @timer_mlx\n",
         "        def pytensor_mlx_broadcast():\n",
         "            if not MLX_AVAILABLE:\n",
         "                return None, float('inf'), 0\n",
-        "            pt_A = pt.matrix('A', dtype='float32')\n",
-        "            pt_B = pt.matrix('B', dtype='float32')\n",
-        "            result = pt_A + pt_B.T\n",
-        "            f = function([pt_A, pt_B], result, mode=pytensor_mlx_mode)\n",
-        "            return f(A, B)\n",
+        "            return f_mlx(A, B)\n",
         "        \n",
         "        # Run broadcasting test\n",
         "        _, jax_mean, jax_std = pytensor_jax_broadcast()\n",
@@ -225,49 +215,6 @@
         "    df = pd.DataFrame(results)\n",
         "    return df\n",
         "\n",
-        "def verify_computation_correctness():\n",
-        "    \"\"\"Verify that JAX and MLX backends produce the same results\"\"\"\n",
-        "    if not MLX_AVAILABLE:\n",
-        "        print(\"MLX not available, skipping correctness check\")\n",
-        "        return\n",
-        "    \n",
-        "    print(\"Verifying computational correctness...\")\n",
-        "    \n",
-        "    # Test with small matrices\n",
-        "    np.random.seed(42)\n",
-        "    A = np.random.randn(4, 4).astype(np.float32)\n",
-        "    B = np.random.randn(4, 4).astype(np.float32)\n",
-        "    C = np.random.randn(4, 4).astype(np.float32)\n",
-        "    \n",
-        "    # Test matrix multiplication\n",
-        "    pt_A = pt.matrix('A', dtype='float32')\n",
-        "    pt_B = pt.matrix('B', dtype='float32')\n",
-        "    pt_C = pt.matrix('C', dtype='float32')\n",
-        "    result_expr = pt_A @ pt_B @ pt_C\n",
-        "    \n",
-        "    f_jax = function([pt_A, pt_B, pt_C], result_expr, mode=pytensor_jax_mode)\n",
-        "    f_mlx = function([pt_A, pt_B, pt_C], result_expr, mode=pytensor_mlx_mode)\n",
-        "    \n",
-        "    result_jax = f_jax(A, B, C)\n",
-        "    result_mlx = f_mlx(A, B, C)\n",
-        "    \n",
-        "    # Force MLX evaluation\n",
-        "    mx.eval(result_mlx)\n",
-        "    \n",
-        "    # Convert to numpy for comparison\n",
-        "    if hasattr(result_jax, 'block_until_ready'):\n",
-        "        result_jax.block_until_ready()\n",
-        "    \n",
-        "    diff = np.abs(np.array(result_jax) - np.array(result_mlx)).max()\n",
-        "    print(f\"Max difference between JAX and MLX results: {diff:.2e}\")\n",
-        "    \n",
-        "    if diff < 1e-5:\n",
-        "        print(\"✅ Results match within tolerance\")\n",
-        "    else:\n",
-        "        print(\"❌ Results differ significantly\")\n",
-        "    \n",
-        "    return diff\n",
-        "\n",
         "def main(N=1000):\n",
         "    \"\"\"Main benchmark execution\"\"\"\n",
         "    # Display system info\n",
@@ -285,9 +232,6 @@
         "    import pandas as pd\n",
         "    info_df = pd.DataFrame([system_info])\n",
         "    \n",
-        "    # First verify correctness\n",
-        "    verify_computation_correctness()\n",
-        "    \n",
         "    # Then run benchmarks\n",
         "    results_df = run_benchmark(N=N)\n",
         "    \n",
@@ -296,50 +240,50 @@
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 10,
       "metadata": {},
       "outputs": [
         {
           "name": "stdout",
           "output_type": "stream",
           "text": [
-            "Verifying computational correctness...\n",
-            "Max difference between JAX and MLX results: 0.00e+00\n",
-            "✅ Results match within tolerance\n",
-            "Running benchmarks with N=20 repetitions per test...\n",
-            "Testing 128x128 matrices...\n"
+            "Running benchmarks with N=100 repetitions per test...\n",
+            "Testing 128x128 matrices...\n",
+            "Testing 256x256 matrices...\n",
+            "Testing 512x512 matrices...\n",
+            "Testing 1024x1024 matrices...\n"
           ]
         }
       ],
       "source": [
-        "iteration=20\n",
+        "iteration=100\n",
         "_, results = main(N=iteration)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": 11,
       "metadata": {},
       "outputs": [
         {
           "name": "stdout",
           "output_type": "stream",
           "text": [
             "\n",
-            "Benchmark Results over 1000 repetitions:\n",
+            "Benchmark Results over 100 repetitions:\n",
             "     Size                      Operation PyTensor+JAX Mean (s) PyTensor+JAX Std (s) PyTensor+MLX Mean (s) PyTensor+MLX Std (s) MLX Speedup\n",
-            "  128x128       Matrix Chain (A @ B @ C)              0.005700             0.002127              0.001215             0.000497       4.69x\n",
-            "  128x128 Element-wise (sin(A) + cos(B))              0.008280             0.002158              0.000876             0.000451       9.45x\n",
-            "  128x128         Broadcasting (A + B.T)              0.008083             0.002485              0.000861             0.000207       9.39x\n",
-            "  256x256       Matrix Chain (A @ B @ C)              0.005705             0.002307              0.001085             0.000210       5.26x\n",
-            "  256x256 Element-wise (sin(A) + cos(B))              0.009794             0.001994              0.000998             0.001895       9.82x\n",
-            "  256x256         Broadcasting (A + B.T)              0.010467             0.002573              0.001056             0.000578       9.91x\n",
-            "  512x512       Matrix Chain (A @ B @ C)              0.006898             0.002576              0.001300             0.000391       5.31x\n",
-            "  512x512 Element-wise (sin(A) + cos(B))              0.010997             0.002435              0.000976             0.000584      11.27x\n",
-            "  512x512         Broadcasting (A + B.T)              0.009730             0.002690              0.000968             0.000315      10.05x\n",
-            "1024x1024       Matrix Chain (A @ B @ C)              0.010941             0.002035              0.001735             0.000302       6.31x\n",
-            "1024x1024 Element-wise (sin(A) + cos(B))              0.013936             0.003774              0.001103             0.000253      12.64x\n",
-            "1024x1024         Broadcasting (A + B.T)              0.011153             0.002297              0.001084             0.000242      10.29x\n"
+            "  128x128       Matrix Chain (A @ B @ C)              0.000131             0.000300              0.000283             0.000216       0.46x\n",
+            "  128x128 Element-wise (sin(A) + cos(B))              0.000104             0.000304              0.000209             0.000145       0.50x\n",
+            "  128x128         Broadcasting (A + B.T)              0.000037             0.000296              0.000215             0.000153       0.17x\n",
+            "  256x256       Matrix Chain (A @ B @ C)              0.000394             0.000372              0.000441             0.000239       0.89x\n",
+            "  256x256 Element-wise (sin(A) + cos(B))              0.000247             0.000389              0.000255             0.000168       0.97x\n",
+            "  256x256         Broadcasting (A + B.T)              0.000063             0.000329              0.000217             0.000153       0.29x\n",
+            "  512x512       Matrix Chain (A @ B @ C)              0.001004             0.000255              0.000399             0.000188       2.51x\n",
+            "  512x512 Element-wise (sin(A) + cos(B))              0.000664             0.000328              0.000263             0.000163       2.53x\n",
+            "  512x512         Broadcasting (A + B.T)              0.000115             0.000339              0.000254             0.000156       0.45x\n",
+            "1024x1024       Matrix Chain (A @ B @ C)              0.005281             0.000359              0.000993             0.000342       5.32x\n",
+            "1024x1024 Element-wise (sin(A) + cos(B))              0.002595             0.000359              0.000408             0.000220       6.36x\n",
+            "1024x1024         Broadcasting (A + B.T)              0.000501             0.000346              0.000385             0.000155       1.30x\n"
           ]
         }
       ],
@@ -367,46 +311,46 @@
         }
       ],
       "source": [
-        "# Additional timing analysis - separate compilation vs execution time\n",
-        "if MLX_AVAILABLE:\n",
-        "    print(\"\\n=== Detailed MLX Timing Analysis ===\")\n",
+        "# # Additional timing analysis - separate compilation vs execution time\n",
+        "# if MLX_AVAILABLE:\n",
+        "#     print(\"\\n=== Detailed MLX Timing Analysis ===\")\n",
         "    \n",
-        "    # Test with medium-sized matrix\n",
-        "    np.random.seed(42)\n",
-        "    A = np.random.randn(512, 512).astype(np.float32)\n",
-        "    B = np.random.randn(512, 512).astype(np.float32)\n",
-        "    C = np.random.randn(512, 512).astype(np.float32)\n",
+        "#     # Test with medium-sized matrix\n",
+        "#     np.random.seed(42)\n",
+        "#     A = np.random.randn(512, 512).astype(np.float32)\n",
+        "#     B = np.random.randn(512, 512).astype(np.float32)\n",
+        "#     C = np.random.randn(512, 512).astype(np.float32)\n",
         "    \n",
-        "    # Create PyTensor function (compilation time)\n",
-        "    start = time.perf_counter()\n",
-        "    pt_A = pt.matrix('A', dtype='float32')\n",
-        "    pt_B = pt.matrix('B', dtype='float32')\n",
-        "    pt_C = pt.matrix('C', dtype='float32')\n",
-        "    result_expr = pt_A @ pt_B @ pt_C\n",
-        "    f_mlx = function([pt_A, pt_B, pt_C], result_expr, mode=pytensor_mlx_mode)\n",
-        "    compilation_time = time.perf_counter() - start\n",
+        "#     # Create PyTensor function (compilation time)\n",
+        "#     start = time.perf_counter()\n",
+        "#     pt_A = pt.matrix('A', dtype='float32')\n",
+        "#     pt_B = pt.matrix('B', dtype='float32')\n",
+        "#     pt_C = pt.matrix('C', dtype='float32')\n",
+        "#     result_expr = pt_A @ pt_B @ pt_C\n",
+        "#     f_mlx = function([pt_A, pt_B, pt_C], result_expr, mode=pytensor_mlx_mode)\n",
+        "#     compilation_time = time.perf_counter() - start\n",
         "    \n",
-        "    # First execution (may include additional compilation/optimization)\n",
-        "    start = time.perf_counter()\n",
-        "    result = f_mlx(A, B, C)\n",
-        "    mx.eval(result)  # Force evaluation\n",
-        "    first_exec_time = time.perf_counter() - start\n",
+        "#     # First execution (may include additional compilation/optimization)\n",
+        "#     start = time.perf_counter()\n",
+        "#     result = f_mlx(A, B, C)\n",
+        "#     mx.eval(result)  # Force evaluation\n",
+        "#     first_exec_time = time.perf_counter() - start\n",
         "    \n",
-        "    # Subsequent executions (should be faster)\n",
-        "    exec_times = []\n",
-        "    for _ in range(1000):\n",
-        "        start = time.perf_counter()\n",
-        "        result = f_mlx(A, B, C)\n",
-        "        mx.eval(result)\n",
-        "        exec_times.append(time.perf_counter() - start)\n",
+        "#     # Subsequent executions (should be faster)\n",
+        "#     exec_times = []\n",
+        "#     for _ in range(1000):\n",
+        "#         start = time.perf_counter()\n",
+        "#         result = f_mlx(A, B, C)\n",
+        "#         mx.eval(result)\n",
+        "#         exec_times.append(time.perf_counter() - start)\n",
         "    \n",
-        "    avg_exec_time = np.mean(exec_times)\n",
-        "    std_exec_time = np.std(exec_times)\n",
+        "#     avg_exec_time = np.mean(exec_times)\n",
+        "#     std_exec_time = np.std(exec_times)\n",
         "    \n",
-        "    print(f\"Compilation time: {compilation_time:.4f}s\")\n",
-        "    print(f\"First execution: {first_exec_time:.4f}s\")\n",
-        "    print(f\"Average execution (5 runs): {avg_exec_time:.4f}s ± {std_exec_time:.4f}s\")\n",
-        "    print(f\"Individual execution times: {[f'{t:.4f}' for t in exec_times]}\")\n"
+        "#     print(f\"Compilation time: {compilation_time:.4f}s\")\n",
+        "#     print(f\"First execution: {first_exec_time:.4f}s\")\n",
+        "#     print(f\"Average execution (5 runs): {avg_exec_time:.4f}s ± {std_exec_time:.4f}s\")\n",
+        "#     print(f\"Individual execution times: {[f'{t:.4f}' for t in exec_times]}\")\n"
       ]
     },
     {