Skip to content

Commit 33a17db

Browse files
SkafteNickilantiga
authored andcommitted
Update throughput table to include H200 stats (#21119)
* add h200 support to throughput * add testing * changelog --------- Co-authored-by: Jirka Borovec <[email protected]> (cherry picked from commit e55650d)
1 parent 9288c1f commit 33a17db

File tree

3 files changed

+33
-1
lines changed

3 files changed

+33
-1
lines changed

src/lightning/fabric/CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
66

7+
8+
## [unreleased] - YYYY-MM-DD
9+
10+
### Changed
11+
12+
- Added support for NVIDIA H200 GPUs in `get_available_flops` ([#20913](https://github.com/Lightning-AI/pytorch-lightning/pull/21119))
13+
14+
715
---
816

917
## [2.5.3] - 2025-08-DD

src/lightning/fabric/utilities/throughput.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,23 @@ def measure_flops(
304304

305305
_CUDA_FLOPS: dict[str, dict[Union[str, torch.dtype], float]] = {
306306
# Hopper
307+
# source: https://nvdam.widen.net/s/nb5zzzsjdf/hpc-datasheet-sc23-h200-datasheet-3002446
308+
"h200 sxm1": {
309+
torch.float64: 3.4e13,
310+
torch.float32: 6.7e13,
311+
"tfloat32": 9.9e14,
312+
torch.bfloat16: 2.0e15,
313+
torch.float16: 2.0e15,
314+
torch.int8: 4.0e15,
315+
},
316+
"h200 nvl1": {
317+
torch.float64: 3.0e13,
318+
torch.float32: 6.0e13,
319+
"tfloat32": 8.4e14,
320+
torch.bfloat16: 1.7e15,
321+
torch.float16: 1.7e15,
322+
torch.int8: 3.3e15,
323+
},
307324
# source: https://resources.nvidia.com/en-us-tensor-core
308325
"h100 nvl": {
309326
torch.float64: 67e12,
@@ -536,7 +553,12 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) ->
536553
if device.type == "cuda":
537554
device_name = torch.cuda.get_device_name(device)
538555
chip = device_name.lower()
539-
if "h100" in chip:
556+
if "h200" in chip:
557+
if "sxm1" in chip:
558+
chip = "h200 sxm1"
559+
elif "nvl1" in chip:
560+
chip = "h200 nvl1"
561+
elif "h100" in chip:
540562
if "hbm3" in chip:
541563
chip = "h100 sxm"
542564
elif "nvl" in chip:

tests/tests_fabric/utilities/test_throughput.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ def test_get_available_flops(xla_available):
6868
"device_name",
6969
[
7070
# Hopper
71+
"NVIDIA H200 SXM1",
72+
"NVIDIA H200 NVL1",
7173
"h100-nvl", # TODO: switch with `torch.cuda.get_device_name()` result
7274
"h100-hbm3", # TODO: switch with `torch.cuda.get_device_name()` result
7375
"NVIDIA H100 PCIe",

0 commit comments

Comments
 (0)