Fix projection layer loading for voyage-4-nano

williambarberjr · williambarberjr · commit 3fdc22411b03 · 2026-01-30T17:24:22.000-05:00
The projection layer weight (linear.weight) is at the root level of
the safetensors file, not under the "model" prefix. We need to capture
the root VarBuilder before adding the "model" prefix.

Tested with voyage-4-nano:
- Output dimension: 2048 (correct)
- Cosine similarity vs transformers: 0.999965
- Inference time: 7.8ms on L4 GPU
diff --git a/backends/candle/src/models/flash_qwen3.rs b/backends/candle/src/models/flash_qwen3.rs
@@ -317,6 +317,8 @@ impl FlashQwen3Model {
 
         // The Qwen3-Reranker models contain the `model` key
         // https://huggingface.co/collections/Qwen/qwen3-reranker-6841b22d0192d7ade9cdefea
+        // Keep reference to root vb for loading projection layer
+        let vb_root = vb.clone();
         let vb = if vb.contains_tensor("model.embed_tokens.weight") {
             vb.pp("model")
         } else {
@@ -337,15 +339,8 @@ impl FlashQwen3Model {
 
         // voyage-4-nano: load projection layer if num_labels is set
         // The projection transforms hidden_size (1024) to num_labels (2048)
+        // Use vb_root (root level) since linear.weight is at root, not under "model"
         let projection = if let Some(num_labels) = config.num_labels {
-            // Try to load from the model root (voyage-4-nano uses "linear.weight")
-            let vb_root = if vb.contains_tensor("linear.weight") {
-                vb.clone()
-            } else {
-                // Also check under "model" prefix for reranker-style models
-                vb.pp("..") // go up one level if we're already in "model"
-            };
-
             if vb_root.contains_tensor("linear.weight") {
                 let projection_weight = vb_root.get((num_labels, config.hidden_size), "linear.weight")?;
                 Some(Linear::new(projection_weight, None, None))
diff --git a/backends/candle/src/models/qwen3.rs b/backends/candle/src/models/qwen3.rs
@@ -410,6 +410,8 @@ impl Qwen3Model {
 
         // The Qwen3-Reranker models contain the `model` key
         // https://huggingface.co/collections/Qwen/qwen3-reranker-6841b22d0192d7ade9cdefea
+        // Keep reference to root vb for loading projection layer
+        let vb_root = vb.clone();
         let vb = if vb.contains_tensor("model.embed_tokens.weight") {
             vb.pp("model")
         } else {
@@ -430,15 +432,8 @@ impl Qwen3Model {
 
         // voyage-4-nano: load projection layer if num_labels is set
         // The projection transforms hidden_size (1024) to num_labels (2048)
+        // Use vb_root (root level) since linear.weight is at root, not under "model"
         let projection = if let Some(num_labels) = config.num_labels {
-            // Try to load from the model root (voyage-4-nano uses "linear.weight")
-            let vb_root = if vb.contains_tensor("linear.weight") {
-                vb.clone()
-            } else {
-                // Also check under "model" prefix for reranker-style models
-                vb.pp("..") // go up one level if we're already in "model"
-            };
-
             if vb_root.contains_tensor("linear.weight") {
                 let projection_weight = vb_root.get((num_labels, config.hidden_size), "linear.weight")?;
                 Some(Linear::new(projection_weight, None, None))