build: Install flash-attn (#335)

ko3n1g · web-flow · commit 67abaca3b4a5 · 2025-08-25T15:50:59.000+02:00
Signed-off-by: oliver könig &lt;okoenig@nvidia.com&gt;
diff --git a/docker/common/install.sh b/docker/common/install.sh
@@ -135,6 +135,7 @@ main() {
         uv sync \
             --link-mode copy \
             --locked \
+            --extra fa \
             --all-groups ${UV_ARGS[@]}
         # Install the package
         uv pip install --no-deps -e .
diff --git a/pyproject.toml b/pyproject.toml
@@ -88,6 +88,7 @@ trtllm = [
     "cuda-python~=12.8.0",
 ]
 trt-onnx = ["tensorrt==10.11.0.33", "transformers==4.51.3", "onnx==1.18.0"]
+fa = ["flash-attn==2.8.1"]
 
 [dependency-groups]
 # This is a default group so that we install these even with bare `uv sync`
@@ -109,6 +110,7 @@ nemo-run = ["nemo-run"]
 
 [tool.uv.sources]
 xformers = [{ index = "pytorch-cu128" }]
+torch = [{ index = "pytorch-cu128" }]
 vllm = [
     { index = "pytorch-cu128", marker = "python_version < '3.9' and platform_machine == 'x86_64'" },
     { index = "pypi", marker = "platform_machine == 'aarch64'" },
@@ -119,7 +121,11 @@ transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git",
 
 [tool.uv]
 # Currently, TE must be built with no build-isolation b/c it requires torch
-no-build-isolation-package = ["transformer-engine", "transformer-engine-torch"]
+no-build-isolation-package = [
+    "transformer-engine",
+    "transformer-engine-torch",
+    "flash-attn",
+]
 # Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
 # and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
 # avoid including these in the default dependency set, but for now it's required.
@@ -137,6 +143,11 @@ override-dependencies = [
 ]
 prerelease = "allow"
 
+# Needed when building from source
+[[tool.uv.dependency-metadata]]
+name = "flash-attn"
+requires-dist = ["torch", "einops", "setuptools", "psutil", "ninja"]
+
 [[tool.uv.index]]
 name = "pypi"
 url = "https://pypi.org/simple"
@@ -205,4 +216,4 @@ convention = "google"
 # Ignore all files that end in `_test.py`.
 "*_test.py" = ["D"]
 # Ignore F401 (import but unused) in __init__.py
-"__init__.py" = ["F401"]
+"__init__.py" = ["F401"]
diff --git a/uv.lock b/uv.lock