Workaround for OOM error on benchmark_jsd (#1037)

Egor-Krivov · web-flow · commit ab07bfd60519 · 2026-01-22T17:56:25.000+08:00
## Summary
The benchmark currently requires a lot of memory with current
configuration (69GBs), it's the heaviest of them all based on current
results from `all_benchmark_data.csv`, so I added a workaround for GPUs
with not enough memory.


## Details
Alternative implementation could be to replace all benchmark function
calls with another function that would process OOM errors, but that
would require changing all benchmarks. We would need to replace all
`triton.testing.do_bench` and with local function that would handle OOM
errors and change `_test_memory` as well. And we would probably need to
start saving some `inf` results in the csv.

## Testing Done
Tested specifically changed benchmark.

- Hardware Type: Intel GPU Max 1550
- [x] run `make test` to ensure correctness
- [x] run `make checkstyle` to ensure code style
- [x] run `make test-convergence` to ensure convergence
diff --git a/benchmark/scripts/benchmark_jsd.py b/benchmark/scripts/benchmark_jsd.py
@@ -9,6 +9,7 @@
 from utils import run_benchmarks
 
 from liger_kernel.transformers.jsd import LigerJSD
+from liger_kernel.utils import get_total_gpu_memory
 from liger_kernel.utils import infer_device
 
 device = infer_device()
@@ -123,11 +124,19 @@ def full():
 
 if __name__ == "__main__":
     args = parse_benchmark_script_args()
+    gpu_memory_gbs = get_total_gpu_memory()
+    # We know that the full test will require 69GBs for vocab size 2^17 and 39GBs for vocab size 2^16 on torch
+    if gpu_memory_gbs >= 69:
+        x_max = 17
+    elif gpu_memory_gbs >= 39:
+        x_max = 16
+    else:
+        x_max = 15
     common_args = {
         "kernel_name": "jsd",
         "x_name": "V",
         "x_label": "vocab size",
-        "x_values": [2**i for i in range(12, 18)],
+        "x_values": [2**i for i in range(12, x_max + 1)],
         "kernel_providers": ["liger", "torch"],
         "extra_benchmark_configs": [{"B": 4, "T": 2048}],
         "overwrite": args.overwrite,
diff --git a/src/liger_kernel/utils.py b/src/liger_kernel/utils.py
@@ -121,3 +121,16 @@ def transformers_version_dispatch(
         return before_fn(*before_args, **before_kwargs)
     else:
         return after_fn(*after_args, **after_kwargs)
+
+
+def get_total_gpu_memory() -> int:
+    """Returns total GPU memory in GBs."""
+    device = infer_device()
+    if device == "cuda":
+        return torch.cuda.get_device_properties(0).total_memory // (1024**3)
+    elif device == "xpu":
+        return torch.xpu.get_device_properties(0).total_memory // (1024**3)
+    elif device == "npu":
+        return torch.npu.get_device_properties(0).total_memory // (1024**3)
+    else:
+        raise RuntimeError(f"Unsupported device: {device}")