From d2e5fcedd3c087dbcef0c41e0f9fb26241b98b62 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Date: Wed, 27 Aug 2025 06:34:45 +0200 Subject: [PATCH 1/3] add h200 support to throughput --- src/lightning/fabric/utilities/throughput.py | 24 +++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/lightning/fabric/utilities/throughput.py b/src/lightning/fabric/utilities/throughput.py index 72b33a41f168c..6bc329fa1c3be 100644 --- a/src/lightning/fabric/utilities/throughput.py +++ b/src/lightning/fabric/utilities/throughput.py @@ -304,6 +304,23 @@ def measure_flops( _CUDA_FLOPS: dict[str, dict[Union[str, torch.dtype], float]] = { # Hopper + # source: https://nvdam.widen.net/s/nb5zzzsjdf/hpc-datasheet-sc23-h200-datasheet-3002446 + "h200 sxm1": { + torch.float64: 3.4e13, + torch.float32: 6.7e13, + "tfloat32": 9.9e14, + torch.bfloat16: 2.0e15, + torch.float16: 2.0e15, + torch.int8: 4.0e15, + }, + "h200 nvl1": { + torch.float64: 3.0e13, + torch.float32: 6.0e13, + "tfloat32": 8.4e14, + torch.bfloat16: 1.7e15, + torch.float16: 1.7e15, + torch.int8: 3.3e15, + }, # source: https://resources.nvidia.com/en-us-tensor-core "h100 nvl": { torch.float64: 67e12, @@ -536,7 +553,12 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) -> if device.type == "cuda": device_name = torch.cuda.get_device_name(device) chip = device_name.lower() - if "h100" in chip: + if "h200" in chip: + if "sxm1" in chip: + chip = "h200 sxm1" + elif "nvl1" in chip: + chip = "h200 nvl1" + elif "h100" in chip: if "hbm3" in chip: chip = "h100 sxm" elif "nvl" in chip: From 854f07ab3395cab40ad48b1a579c66109b9e98f3 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Date: Wed, 27 Aug 2025 06:34:55 +0200 Subject: [PATCH 2/3] add testing --- tests/tests_fabric/utilities/test_throughput.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index 00dafbb72cb8f..a175fa97fd444 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -68,6 +68,8 @@ def test_get_available_flops(xla_available): "device_name", [ # Hopper + "NVIDIA H200 SXM1", + "NVIDIA H200 NVL1", "h100-nvl", # TODO: switch with `torch.cuda.get_device_name()` result "h100-hbm3", # TODO: switch with `torch.cuda.get_device_name()` result "NVIDIA H100 PCIe", From 7f53ed1283564b361b6e7d211c0149443c122442 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Date: Wed, 27 Aug 2025 06:36:26 +0200 Subject: [PATCH 3/3] changelog --- src/lightning/fabric/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index 20907df7c874a..a41cdae713e02 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- +- Added support for NVIDIA H200 GPUs in `get_available_flops` ([#20913](https://github.com/Lightning-AI/pytorch-lightning/pull/21119)) ### Removed