fix: working README.md example, support nvfuser for torch==2.8 (#2525)

lianakoleva · web-flow · commit 280c57ec289d · 2025-09-25T08:12:20.000+02:00
diff --git a/README.md b/README.md
@@ -77,33 +77,50 @@ For **performance experts**, Thunder is the most ergonomic framework for underst
 Install Thunder via pip ([more options](https://lightning.ai/docs/thunder/latest/fundamentals/installation.html)):
 
 ```bash
-pip install torch==2.6.0 torchvision==0.21 nvfuser-cu124-torch26
-
 pip install lightning-thunder
+
+pip install -U torch torchvision
+pip install nvfuser-cu128-torch28 nvidia-cudnn-frontend  # if NVIDIA GPU is present
 ```
 
 <details>
-  <summary>Advanced install options</summary>
+  <summary>For older versions of <code>torch</code></summary>
+
+<code>torch==2.7</code> + CUDA 12.8
+
+```bash
+pip install lightning-thunder
 
-### Blackwell support
+pip install torch==2.7.0 torchvision==0.22
+pip install nvfuser-cu128-torch27 nvidia-cudnn-frontend  # if NVIDIA GPU is present
+```
 
-For Blackwell you'll need CUDA 12.8
+<code>torch==2.6</code> + CUDA 12.6
 
 ```bash
-pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu128
-pip install --pre nvfuser-cu128 --extra-index-url https://pypi.nvidia.com
+pip install lightning-thunder
 
+pip install torch==2.6.0 torchvision==0.21
+pip install nvfuser-cu126-torch26 nvidia-cudnn-frontend  # if NVIDIA GPU is present
+```
+
+<code>torch==2.5</code> + CUDA 12.4
+
+```bash
 pip install lightning-thunder
+
+pip install torch==2.5.0 torchvision==0.20
+pip install nvfuser-cu124-torch25 nvidia-cudnn-frontend  # if NVIDIA GPU is present
 ```
 
-### Install additional executors
+</details>
 
-These are optional, feel free to mix and match
+<details>
+  <summary>Advanced install options</summary>
 
-```bash
-# cuDNN SDPA
-pip install nvidia-cudnn-frontend
+### Install optional executors
 
+```bash
 # Float8 support (this will compile from source, be patient)
 pip install "transformer_engine[pytorch]"
 ```
diff --git a/thunder/executors/transformer_engineex.py b/thunder/executors/transformer_engineex.py
@@ -8,18 +8,21 @@
 from thunder.extend import StatefulExecutor
 from thunder.core.trace import TraceCtx
 
+import torch
+
 __all__ = ["transformer_engine_ex", "TransformerEngineTransform", "_te_activation_checkpointing_transform"]
 
 transformer_engine_ex: None | StatefulExecutor = None
 TransformerEngineTransform: None | Transform = None
 _te_activation_checkpointing_transform: None | Callable[[TraceCtx], TraceCtx] = None
 
-if package_available("transformer_engine"):
-    import thunder.executors.transformer_engineex_impl as impl
+if torch.cuda.is_available():
+    if package_available("transformer_engine"):
+        import thunder.executors.transformer_engineex_impl as impl
 
-    transformer_engine_ex = impl.transformer_engine_ex
-    TransformerEngineTransform = impl.TransformerEngineTransform
-    _te_activation_checkpointing_transform = impl._te_activation_checkpointing_transform
+        transformer_engine_ex = impl.transformer_engine_ex
+        TransformerEngineTransform = impl.TransformerEngineTransform
+        _te_activation_checkpointing_transform = impl._te_activation_checkpointing_transform
 
-else:
-    warnings.warn("transformer_engine module not found!")
+    else:
+        warnings.warn("transformer_engine module not found!")
diff --git a/thunder/executors/triton_crossentropy.py b/thunder/executors/triton_crossentropy.py
@@ -1,16 +1,20 @@
 from thunder.executors import triton_utils
 from thunder.extend import OperatorExecutor
 
+import torch
+
 triton_version: None | str = triton_utils.triton_version()
 
 triton_ex: None | OperatorExecutor = None
-if triton_version is not None:
-    try:
-        from thunder.executors.triton_crossentropy_impl import triton_ex as impl_ex
 
-        triton_ex = impl_ex
-    except Exception:
-        import warnings
+if torch.cuda.is_available():
+    if triton_version is not None:
+        try:
+            from thunder.executors.triton_crossentropy_impl import triton_ex as impl_ex
+
+            triton_ex = impl_ex
+        except Exception:
+            import warnings
 
-        warnings.warn("triton is present but cannot be initialized")
-        triton_version = None
+            warnings.warn("triton is present but cannot be initialized")
+            triton_version = None
diff --git a/thunder/recipes/base.py b/thunder/recipes/base.py
@@ -13,6 +13,7 @@ def get_nvfuser_package_hint() -> str:
         "2.5": "nvfuser-cu124-torch25",
         "2.6": "nvfuser-cu126-torch26",
         "2.7": "nvfuser-cu128-torch27",
+        "2.8": "nvfuser-cu128-torch28",
     }
 
     torch_key = ".".join(torch_version.split(".")[:2])
@@ -73,8 +74,17 @@ def __init__(
         plugins=None,
     ):
         super().__init__(interpreter=interpreter, plugins=plugins)
-        self.executor_names = ["cudnn", "sdpa", "torchcompile_xentropy"]
         self.fuser = fuser
+        self.executor_names = []
+
+        if torch.cuda.is_available():
+            self.executor_names = ["cudnn", "sdpa"]
+            if self.fuser == "nvfuser":
+                self.executor_names.append("torchcompile_xentropy")
+        else:
+            print("GPU not found, nvFuser not available. Setting fusing executor to torch.compile")
+            self.fuser = "torch.compile"
+
         self.setup_fuser()
         self.show_progress = show_progress
 
@@ -114,8 +124,6 @@ def setup_fuser(self) -> None:
             if "nvfuser" not in self.executor_names:
                 self.executor_names.append("nvfuser")
         elif self.fuser == "torch.compile":
-            if "torchcompile_xentropy" in self.executor_names:
-                self.executor_names.remove("torchcompile_xentropy")
             if "torchcompile" not in self.executor_names:
                 self.executor_names.append("torchcompile")
         else: