fix(clip): 支持 f32 norm，交换 up/down

YdrMaster · YdrMaster · commit 7ec259a5854b · 2025-02-13T16:49:24.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/models/clip/common/src/compute.rs b/models/clip/common/src/compute.rs
@@ -148,6 +148,7 @@ where
         let Args { raw, pos } = args;
         let ClipMeta {
             dt,
+            dt_norm,
             nblk,
             nh,
             nkvh,
@@ -267,7 +268,7 @@ where
 
                 let weights = &self.weights.weights;
                 let q0 = Tensor::new(dt, &[dq, d]).map(|_| weights.resampler_q(queue));
-                let ln_qkv = Tensor::new(dt, &[d]);
+                let ln_qkv = Tensor::new(dt_norm, &[d]);
 
                 let q = Tensor::new(dt, q0.shape());
                 let kv = Tensor::new(dt, &[np, d]);
diff --git a/models/clip/common/src/lib.rs b/models/clip/common/src/lib.rs
@@ -23,6 +23,7 @@ pub mod ext {
 #[derive(Clone, Debug)]
 pub struct ClipMeta {
     pub dt: DigitLayout,
+    pub dt_norm: DigitLayout,
 
     pub d_patch: usize,
     pub d_image: usize,
@@ -66,7 +67,7 @@ impl ClipMeta {
 
     pub fn norm(&self) -> Tensor<usize> {
         let &Self { d, .. } = self;
-        Tensor::new(self.dt, &[d])
+        Tensor::new(self.dt_norm, &[d])
     }
 
     pub fn attn_qkv_w(&self) -> Tensor<usize> {
diff --git a/models/clip/common/src/projector/resampler.rs b/models/clip/common/src/projector/resampler.rs
@@ -41,20 +41,20 @@ impl<'a> Storage<&'a [u8]> {
     #[rustfmt::skip]
     pub fn from_gguf(gguf: &GGufModel<'a>) -> Self {
         Self {
-            wkv   :  tensor![gguf => "resampler.kv.weight"    ].data ,
-            q     :  tensor![gguf => "resampler.query"        ].data ,
-            ln_q  : [tensor![gguf => "resampler.ln_q.weight"  ].data ,
-                     tensor![gguf => "resampler.ln_q.bias"    ].data],
-            ln_kv : [tensor![gguf => "resampler.ln_kv.weight" ].data ,
-                     tensor![gguf => "resampler.ln_kv.bias"   ].data],
-            attn_q: [tensor![gguf => "resampler.attn.q.weight"].data ,
-                     tensor![gguf => "resampler.attn.q.bias"  ].data],
-            attn_k: [tensor![gguf => "resampler.attn.k.weight"].data ,
-                     tensor![gguf => "resampler.attn.k.bias"  ].data],
-            attn_v: [tensor![gguf => "resampler.attn.v.weight"].data ,
-                     tensor![gguf => "resampler.attn.v.bias"  ].data],
-            attn_o: [tensor![gguf => "resampler.attn.o.weight"].data ,
-                     tensor![gguf => "resampler.attn.o.bias"  ].data],
+            wkv   :  tensor![gguf => "resampler.kv.weight"      ].data ,
+            q     :  tensor![gguf => "resampler.query"          ].data ,
+            ln_q  : [tensor![gguf => "resampler.ln_q.weight"    ].data ,
+                     tensor![gguf => "resampler.ln_q.bias"      ].data],
+            ln_kv : [tensor![gguf => "resampler.ln_kv.weight"   ].data ,
+                     tensor![gguf => "resampler.ln_kv.bias"     ].data],
+            attn_q: [tensor![gguf => "resampler.attn.q.weight"  ].data ,
+                     tensor![gguf => "resampler.attn.q.bias"    ].data],
+            attn_k: [tensor![gguf => "resampler.attn.k.weight"  ].data ,
+                     tensor![gguf => "resampler.attn.k.bias"    ].data],
+            attn_v: [tensor![gguf => "resampler.attn.v.weight"  ].data ,
+                     tensor![gguf => "resampler.attn.v.bias"    ].data],
+            attn_o: [tensor![gguf => "resampler.attn.out.weight"].data ,
+                     tensor![gguf => "resampler.attn.out.bias"  ].data],
         }
     }
 }
diff --git a/models/clip/common/src/storage.rs b/models/clip/common/src/storage.rs
@@ -36,13 +36,15 @@ pub struct BlkStorage<T> {
 impl<'a> Storage<&'a [u8]> {
     pub fn from_gguf(gguf: &GGufModel<'a>) -> Self {
         let pos_embd = &gguf.tensors["v.position_embd.weight"];
+        let ln1_0 = &gguf.tensors["v.blk.0.ln1.weight"];
 
         let d = meta![gguf => (usize) "clip.vision.embedding_length"];
         let nh = meta![gguf => (usize) "clip.vision.attention.head_count"];
 
         #[rustfmt::skip]
         let meta = ClipMeta {
             dt     : pos_embd.ty,
+            dt_norm: ln1_0.ty,
             d_patch: meta![gguf => (usize) "clip.vision.patch_size"],
             d_image: meta![gguf => (usize) "clip.vision.image_size"],
 
@@ -70,10 +72,10 @@ impl<'a> Storage<&'a [u8]> {
 
                 ffn_norm_w:  tensor![gguf => format!("v.blk.{i}.ln2.weight"     )].data,
                 ffn_norm_b:  tensor![gguf => format!("v.blk.{i}.ln2.bias"       )].data,
-                ffn_up_w:    tensor![gguf => format!("v.blk.{i}.ffn_up.weight"  )].data,
-                ffn_up_b:    tensor![gguf => format!("v.blk.{i}.ffn_up.bias"    )].data,
-                ffn_down_w:  tensor![gguf => format!("v.blk.{i}.ffn_down.weight")].data,
-                ffn_down_b:  tensor![gguf => format!("v.blk.{i}.ffn_down.bias"  )].data,
+                ffn_up_w:    tensor![gguf => format!("v.blk.{i}.ffn_down.weight")].data,
+                ffn_up_b:    tensor![gguf => format!("v.blk.{i}.ffn_down.bias"  )].data,
+                ffn_down_w:  tensor![gguf => format!("v.blk.{i}.ffn_up.weight"  )].data,
+                ffn_down_b:  tensor![gguf => format!("v.blk.{i}.ffn_up.bias"    )].data,
             })
             .collect();
 
@@ -100,7 +102,7 @@ fn get_rgb(gguf: &GGufModel, key: &str) -> [f32; 3] {
     let mut arr = gguf.get_f32_arr(key).unwrap();
     let mut ans = [0.0; 3];
     for x in ans.iter_mut() {
-        *x = arr.next().unwrap().unwrap();
+        *x = arr.next().unwrap().unwrap()
     }
     ans
 }