perf: run ablations for the paper

avik-pal · avik-pal · commit 79e302e2efe2 · 2025-06-21T22:02:35.000-04:00
diff --git a/perf/common.jl b/perf/common.jl
@@ -0,0 +1,120 @@
+using BenchmarkTools: @belapsed
+using Reactant, Enzyme, PrettyTables, Statistics
+
+function simple_mse_loss(model, x, ps, st)
+    y, _ = Lux.apply(model, x, ps, st)
+    return sum(abs2, y)
+end
+
+function benchmark_nn_primal(
+    model, x, ps, st; disable_scatter_gather_bench=true, disable_pad_bench=true
+)
+    results = Vector{Tuple{String,String,Float64,Float64,Float64}}()
+
+    # Only XLA
+    compiled_fwd_xla = @compile sync = true compile_options = Reactant.DefaultXLACompileOptions() simple_mse_loss(
+        model, x, ps, st
+    )
+    bench = @benchmark $compiled_fwd_xla($model, $x, $ps, $st)
+    push!(results, ("Primal", "Only XLA", median(bench).time, std(bench).time, 1.0))
+    baseline = median(bench).time
+
+    # Default
+    compiled_fwd = @compile sync = true simple_mse_loss(model, x, ps, st)
+    bench = @benchmark $compiled_fwd($model, $x, $ps, $st)
+    push!(
+        results,
+        (
+            "Primal",
+            "All",
+            median(bench).time,
+            std(bench).time,
+            median(bench).time / baseline,
+        ),
+    )
+
+    # Disable Scatter
+    if disable_scatter_gather_bench
+        compiled_fwd_no_scatter = @compile sync = true compile_options = CompileOptions(;
+            disable_scatter_gather_optimization_passes=true
+        ) simple_mse_loss(model, x, ps, st)
+        bench = @benchmark $compiled_fwd_no_scatter($model, $x, $ps, $st)
+
+        push!(
+            results,
+            (
+                "Primal",
+                "No Scatter/Gather Optimizations",
+                median(bench).time,
+                std(bench).time,
+                median(bench).time / baseline,
+            ),
+        )
+    end
+
+    # Disable Pad
+    if disable_pad_bench
+        compiled_fwd_no_pad = @compile sync = true compile_options = CompileOptions(;
+            disable_pad_optimization_passes=true
+        ) simple_mse_loss(model, x, ps, st)
+        bench = @benchmark $compiled_fwd_no_pad($model, $x, $ps, $st)
+
+        push!(
+            results,
+            (
+                "Primal",
+                "No Pad Optimizations",
+                median(bench).time,
+                std(bench).time,
+                median(bench).time / baseline,
+            ),
+        )
+    end
+
+    # Disable Scatter and Pad
+    if disable_scatter_gather_bench && disable_pad_bench
+        compiled_fwd_no_scatter_pad = @compile sync = true compile_options = CompileOptions(;
+            disable_scatter_gather_optimization_passes=true,
+            disable_pad_optimization_passes=true,
+        ) simple_mse_loss(model, x, ps, st)
+        bench = @benchmark $compiled_fwd_no_scatter_pad($model, $x, $ps, $st)
+
+        push!(
+            results,
+            (
+                "Primal",
+                "No Scatter/Gather and Pad Optimizations",
+                median(bench).time,
+                std(bench).time,
+                median(bench).time / baseline,
+            ),
+        )
+    end
+
+    sort!(results, by=x -> x[3])
+    return results
+end
+
+function pretty_print_table(results)
+    header = (
+        ["Mode", "Optimization Passes", "Median Time", "Std. Dev. Time", "Relative Timing"],
+        ["", "", "s", "s", "Time / XLA Time"],
+    )
+
+    results = copy(results)
+    results[:, 3] ./= 1e9
+    results[:, 4] ./= 1e9
+
+    hl_r = Highlighter((data, i, j) -> j == 5 && data[i, j] > 1.0, crayon"bold red")
+    hl_g = Highlighter((data, i, j) -> j == 5 && data[i, j] < 1.0, crayon"bold green")
+    display(
+        pretty_table(
+            results;
+            header,
+            header_crayon=crayon"yellow bold",
+            highlighters=(hl_r, hl_g),
+            tf=tf_unicode_rounded,
+        ),
+    )
+    return nothing
+end
diff --git a/perf/neuraloperators/Project.toml b/perf/neuraloperators/Project.toml
@@ -0,0 +1,21 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+NeuralOperators = "ea5c82af-86e5-48da-8ee1-382d6ad7af4b"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Reactant = "3c362404-f566-11ee-1572-e11a4b42c853"
+
+[sources]
+Reactant = {path = "../../"}
+
+[compat]
+BenchmarkTools = "1.6"
+CSV = "0.10.15"
+Lux = "1.13.4"
+NeuralOperators = "0.6"
+PrettyTables = "2.4.0"
+Random = "1.11"
+julia = "1.11"
diff --git a/perf/neuraloperators/main.jl b/perf/neuraloperators/main.jl
@@ -0,0 +1,39 @@
+using NeuralOperators, Lux, Random
+
+include("../common.jl")
+
+const xdev = reactant_device()
+
+function run_deeponet_benchmarks()
+    @info "Running DeepONet benchmarks"
+
+    model = DeepONet(;
+        branch=(64, ntuple(Returns(256), 5)..., 16),
+        trunk=(1, ntuple(Returns(256), 5)..., 16),
+        branch_activation=gelu,
+        trunk_activation=gelu,
+    )
+    ps, st = xdev(Lux.setup(Random.default_rng(), model))
+    u = xdev(rand(Float32, 64, 1024))
+    y = xdev(rand(Float32, 1, 128))
+
+    primal_timings = Reactant.with_config(;
+        dot_general_precision=PrecisionConfig.HIGH,
+        convolution_precision=PrecisionConfig.HIGH,
+    ) do
+        benchmark_nn_primal(
+            model,
+            (u, y),
+            ps,
+            st;
+            disable_scatter_gather_bench=true,
+            disable_pad_bench=true,
+        )
+    end
+
+    pretty_print_table(permutedims(hcat([[t...] for t in primal_timings]...), (2, 1)))
+
+    return nothing
+end
+
+run_deeponet_benchmarks()
diff --git a/src/CompileOptions.jl b/src/CompileOptions.jl
@@ -138,9 +138,6 @@ Fine-grained control over the compilation options for the Reactant compiler.
   - `assert_nonallocating`: If `true`, we make sure that no new buffers are
     returned by the function. Any buffer returned must be donated from the inputs. Defaults
     to `false`.
-  - `sync`: Reactant computations are asynchronous by default. If `true`, the computation
-    will be executed synchronously, blocking till the computation is complete. This is
-    recommended when benchmarking.
 
 # Extended Help
 
@@ -178,7 +175,6 @@ struct CompileOptions
     # julia codegen options
     assert_nonallocating::Bool
     donated_args::Symbol
-    sync::Bool
     ## private options for ablation studies
     disable_scatter_gather_optimization_passes::Bool
     disable_pad_optimization_passes::Bool
@@ -201,7 +197,6 @@ function CompileOptions(;
     optimize_communications::Union{Bool,OptimizeCommunicationOptions}=true,
     assert_nonallocating::Bool=false,
     donated_args::Symbol=:auto,
-    sync::Bool=false,
     disable_scatter_gather_optimization_passes::Bool=false,
     disable_pad_optimization_passes::Bool=false,
 )
@@ -248,7 +243,6 @@ function CompileOptions(;
         optimize_communications,
         assert_nonallocating,
         donated_args,
-        sync,
         disable_scatter_gather_optimization_passes,
         disable_pad_optimization_passes,
     )
@@ -288,7 +282,6 @@ function __compile_options_with_reversed_propagation(compile_options::CompileOpt
         compile_options.optimize_communications,
         compile_options.assert_nonallocating,
         compile_options.donated_args,
-        compile_options.sync,
         compile_options.disable_scatter_gather_optimization_passes,
         compile_options.disable_pad_optimization_passes,
     )
diff --git a/src/Compiler.jl b/src/Compiler.jl
@@ -1260,7 +1260,6 @@ function __get_compile_options_and_kwargs(;
     optimize_communications::Union{Bool,OptimizeCommunicationOptions}=true,
     assert_nonallocating::Bool=false,
     donated_args::Symbol=:auto,
-    sync::Bool=false,
     kwargs...,
 )
     return (
@@ -1282,7 +1281,6 @@ function __get_compile_options_and_kwargs(;
             optimize_communications,
             assert_nonallocating,
             donated_args,
-            sync,
         ),
         kwargs,
     )