Skip to content

Commit d2e5fce

Browse files
committed
add h200 support to throughput
1 parent b581924 commit d2e5fce

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

src/lightning/fabric/utilities/throughput.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,23 @@ def measure_flops(
304304

305305
_CUDA_FLOPS: dict[str, dict[Union[str, torch.dtype], float]] = {
306306
# Hopper
307+
# source: https://nvdam.widen.net/s/nb5zzzsjdf/hpc-datasheet-sc23-h200-datasheet-3002446
308+
"h200 sxm1": {
309+
torch.float64: 3.4e13,
310+
torch.float32: 6.7e13,
311+
"tfloat32": 9.9e14,
312+
torch.bfloat16: 2.0e15,
313+
torch.float16: 2.0e15,
314+
torch.int8: 4.0e15,
315+
},
316+
"h200 nvl1": {
317+
torch.float64: 3.0e13,
318+
torch.float32: 6.0e13,
319+
"tfloat32": 8.4e14,
320+
torch.bfloat16: 1.7e15,
321+
torch.float16: 1.7e15,
322+
torch.int8: 3.3e15,
323+
},
307324
# source: https://resources.nvidia.com/en-us-tensor-core
308325
"h100 nvl": {
309326
torch.float64: 67e12,
@@ -536,7 +553,12 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) ->
536553
if device.type == "cuda":
537554
device_name = torch.cuda.get_device_name(device)
538555
chip = device_name.lower()
539-
if "h100" in chip:
556+
if "h200" in chip:
557+
if "sxm1" in chip:
558+
chip = "h200 sxm1"
559+
elif "nvl1" in chip:
560+
chip = "h200 nvl1"
561+
elif "h100" in chip:
540562
if "hbm3" in chip:
541563
chip = "h100 sxm"
542564
elif "nvl" in chip:

0 commit comments

Comments
 (0)