huggingface
diff --git a/‎Cargo.toml‎
Lines changed: 0 additions & 2 deletions b/‎Cargo.toml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 26 additions & 12 deletions b/‎README.md‎
Lines changed: 26 additions & 12 deletions
diff --git a/‎candle-examples/Cargo.toml‎
Lines changed: 2 additions & 2 deletions b/‎candle-examples/Cargo.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎candle-examples/build.rs‎
Lines changed: 13 additions & 14 deletions b/‎candle-examples/build.rs‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎candle-examples/buildtime_downloader.rs‎
Lines changed: 1 addition & 6 deletions b/‎candle-examples/buildtime_downloader.rs‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎candle-flash-attn-build/Cargo.toml‎
Lines changed: 0 additions & 10 deletions b/‎candle-flash-attn-build/Cargo.toml‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎candle-flash-attn-build/src/lib.rs‎
Lines changed: 0 additions & 102 deletions b/‎candle-flash-attn-build/src/lib.rs‎
Lines changed: 0 additions & 102 deletions
diff --git a/‎candle-flash-attn-v3/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎candle-flash-attn-v3/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
@@ -13,7 +13,6 @@ members = [
 ]
 exclude = [
     "candle-book",
-    "candle-flash-attn-build",
     "candle-flash-attn",
     "candle-flash-attn-v3",
     "candle-kernels",
@@ -38,7 +37,6 @@ anyhow = { version = "1", features = ["backtrace"] }
 byteorder = "1.4.3"
 candle = { path = "./candle-core", package = "candle-core", version = "0.9.2" }
 candle-datasets = { path = "./candle-datasets", version = "0.9.2" }
-candle-flash-attn-build = { path = "candle-flash-attn-build", version = "0.9.2" }
 candle-flash-attn = { path = "./candle-flash-attn", version = "0.9.2" }
 candle-flash-attn-v3 = { path = "./candle-flash-attn-v3", version = "0.9.2" }
 candle-kernels = { path = "./candle-kernels", version = "0.9.2" }
 
@@ -368,20 +368,34 @@ conditions](https://huggingface.co/meta-llama/Llama-2-7b-hf), and set up your
 authentication token. See issue
 [#350](https://github.com/huggingface/candle/issues/350) for more details.
 
-#### Missing cute/cutlass headers when compiling flash-attn
+#### Docker build
+
+When building CUDA kernels inside a Dockerfile, nvidia-smi cannot be used to auto-detect compute capability.
+
+You must explicitly set CUDA_COMPUTE_CAP, for example:
 
 ```
-  In file included from kernels/flash_fwd_launch_template.h:11:0,
-                   from kernels/flash_fwd_hdim224_fp16_sm80.cu:5:
-  kernels/flash_fwd_kernel.h:8:10: fatal error: cute/algorithm/copy.hpp: No such file or directory
-   #include <cute/algorithm/copy.hpp>
-            ^~~~~~~~~~~~~~~~~~~~~~~~~
-  compilation terminated.
-  Error: nvcc error while compiling:
-```
-[cutlass](https://github.com/NVIDIA/cutlass) is provided as a git submodule so you may want to run the following command to check it in properly.
-```bash
-git submodule update --init
+FROM nvidia/cuda:12.9.0-devel-ubuntu22.04
+
+# Install git and curl
+RUN set -eux; \
+  apt-get update; \
+  apt-get install -y curl git ca-certificates;
+
+# Install Rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Clone candle repo
+RUN git clone https://github.com/huggingface/candle.git
+
+# Set compute capability for the build
+ARG CUDA_COMPUTE_CAP=90
+ENV CUDA_COMPUTE_CAP=${CUDA_COMPUTE_CAP}
+
+# Build with explicit compute cap
+WORKDIR /app
+COPY . .
+RUN cargo build --release features cuda
 ```
 
 #### Compiling with flash-attention fails
 
@@ -60,7 +60,7 @@ tokio = "1.48.0"
 
 [build-dependencies]
 anyhow = { workspace = true }
-bindgen_cuda = { version = "0.1.5", optional = true }
+cudaforge = { version = "0.1.2", optional = true }
 hf-hub = { workspace = true, features = ["tokio"] }
 
 [features]
@@ -75,7 +75,7 @@ cuda = [
     "candle/cuda",
     "candle-nn/cuda",
     "candle-transformers/cuda",
-    "dep:bindgen_cuda",
+    "dep:cudaforge",
 ]
 cudnn = ["candle/cudnn", "candle-nn/cudnn", "candle-transformers/cudnn"]
 flash-attn = ["cuda", "candle-transformers/flash-attn", "dep:candle-flash-attn"]
 
@@ -1,42 +1,42 @@
 #![allow(unused)]
-use anyhow::{Context, Result};
-use std::env;
-use std::io::Write;
-use std::path::{Path, PathBuf};
 mod buildtime_downloader;
 use buildtime_downloader::download_model;
 
 struct KernelDirectories {
     kernel_glob: &'static str,
     rust_target: &'static str,
-    include_dirs: &'static [&'static str],
 }
 
 const KERNEL_DIRS: [KernelDirectories; 1] = [KernelDirectories {
     kernel_glob: "examples/custom-ops/kernels/*.cu",
     rust_target: "examples/custom-ops/cuda_kernels.rs",
-    include_dirs: &[],
 }];
 
-fn main() -> Result<()> {
+fn main() {
     println!("cargo::rerun-if-changed=build.rs");
 
     #[cfg(feature = "cuda")]
     {
+        use std::env;
+        use std::path::{Path, PathBuf};
         // Added: Get the safe output directory from the environment.
         let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
 
         for kdir in KERNEL_DIRS.iter() {
-            let builder = bindgen_cuda::Builder::default().kernel_paths_glob(kdir.kernel_glob);
-            let bindings = builder.build_ptx().unwrap();
-
             // Changed: This now writes to a safe path inside $OUT_DIR.
             let safe_target = out_dir.join(
                 Path::new(kdir.rust_target)
                     .file_name()
-                    .context("Failed to get filename from rust_target")?,
+                    .expect("Failed to get filename from rust_target"),
             );
-            bindings.write(safe_target).unwrap()
+
+            let bindings = cudaforge::KernelBuilder::new()
+                .source_glob(kdir.kernel_glob)
+                .build_ptx()
+                .expect("Failed to build ptx");
+            bindings
+                .write(safe_target)
+                .expect("Failed to write ptx bindings");
         }
     }
 
@@ -45,7 +45,6 @@ fn main() -> Result<()> {
     // Example value:
     // CANDLE_BUILDTIME_MODEL_REVISION="sentence-transformers/all-MiniLM-L6-v2:c9745ed1d9f207416be6d2e6f8de32d1f16199bf"
     if let Some(model_rev) = core::option_env!("CANDLE_BUILDTIME_MODEL_REVISION") {
-        buildtime_downloader::download_model(model_rev)?;
+        buildtime_downloader::download_model(model_rev).expect("Model download failed!");
     }
-    Ok(())
 }
@@ -1,10 +1,5 @@
-use anyhow::{Context, Result};
+use anyhow::Result;
 use hf_hub::{api::sync::Api, Repo, RepoType};
-use std::{
-    fs::{self, File},
-    io::copy,
-    path::Path,
-};
 
 pub fn download_model(model_and_revision: &str) -> Result<()> {
     let (model_id, revision) = match model_and_revision.split_once(":") {
 
@@ -19,7 +19,7 @@ half = { version = "2.3.1", features = ["num-traits"] }
 anyhow = { version = "1", features = ["backtrace"] }
 num_cpus = "1.15.0"
 rayon = "1.7.0"
-candle-flash-attn-build = { path = "../candle-flash-attn-build", version = "0.9.2" }
+cudaforge = "0.1"
 
 [dev-dependencies]
 anyhow = { version = "1", features = ["backtrace"] }