跑通mlp

onenewcode · onenewcode · commit 4ac05d7eee8d · 2025-02-13T10:54:15.000Z
diff --git a/models/minicpm3/common/src/compute.rs b/models/minicpm3/common/src/compute.rs
@@ -152,7 +152,7 @@ where
         // llama.cpp 定义死
         let scale_emb = 12f32;
         let scale_depth = 1.4f32;
-        //  提前进行缩放
+        //  残差连接时权重缩放
         let s = scale_depth / (nblk as f32).sqrt();
         fn ggml_scale(embd: *mut f16, s: f16, l: usize) {
             if l == 0 {
@@ -181,6 +181,11 @@ where
         let mut q = q.map(|_| buf);
         let (buf, workspace) = workspace.split_at_mut(*kv_pe.get());
         let mut kv_pe = kv_pe.map(|_| buf);
+        // 经行 attention
+        let attn = tensor(&[nt, nh, dv]);
+        let (buf, workspace) = workspace.split_at_mut(*attn.get());
+        let mut attn = attn.map(|_| buf);
+
         let queue = queue_alloc.queue();
         for iblk in 0..nblk {
             // norm
@@ -323,15 +328,11 @@ where
                 let k_rope_2 = k_rope_0.tile(1, &[1, dh]).broadcast(1, nh);
                 self.rearrange(&mut k_rope_r, &k_rope_2, workspace, queue_alloc)?;
                 self.rearrange(&mut k_nope_r, &k_nope, workspace, queue_alloc)?;
-                // 经行 attention
-                let attn = tensor(&[nt, nh, dv]);
-                let (buf, workspace) = workspace.split_at_mut(*attn.get());
-                let mut attn = attn.map(|_| buf);
 
                 let mut q = q3.transpose(&[1, 0]);
                 let k = k.map_slice().transpose(&[1, 0]);
                 let mut v = v.map_slice_mut().transpose(&[1, 0]);
-                let mut attn = attn.transpose(&[1, 0]);
+                let mut attn = unsafe { attn.map_slice_mut().transpose(&[1, 0]) };
                 self.attnention(
                     &mut q,
                     &k,
@@ -346,12 +347,9 @@ where
                 let w = self.weights.attn_o(iblk, queue);
 
                 self.mat_mul(&mut x1, 0., &o, &w, s, workspace, queue_alloc)?;
+                let inplace = unsafe { x.map_slice_static() };
+                self.add(&mut x, &inplace, &x1, workspace, queue_alloc)?;
             }
-            let inplace = unsafe { x.map_slice_static() };
-            //是否给 add 加上缩放系数
-
-            self.add(&mut x, &inplace, &x1, workspace, queue_alloc)?;
-
             let w = self.weights.ffn_norm(iblk, queue);
             self.rms_norm(&mut x1, &x, &w, workspace, queue_alloc)?;
             drop(w);
@@ -361,29 +359,32 @@ where
             split!(gate_up => gate, up; [di, di] @ 1);
             let mut gate = gate;
             let mut up = up;
-            let w = self.weights.ffn_gate(iblk, queue).transpose(&[0,1]);
+            let w = self.weights.ffn_gate(iblk, queue);
             self.mat_mul(&mut gate, 0., &x1, &w, 1., workspace, queue_alloc)?;
-            // Ops::debug(&w, queue);
+
+            let w = self.weights.ffn_up(iblk, queue);
+            self.mat_mul(&mut up, 0., &x1, &w, 1., workspace, queue_alloc)?;
+
+            self.swiglu(&mut gate, &up, workspace, queue_alloc)?;
 
             fn print_first_10_elements(ptr: *const f16) {
                 assert!(!ptr.is_null(), "Pointer must not be null");
 
                 unsafe {
                     for i in 0..10 {
-                        // 逐个访问并打印前10个元素
+                        // 逐个访问并打印前 10 个元素
                         let element = ptr.offset(i as isize).read();
                         println!("Element {}: {:?}", i, element);
                     }
                 }
             }
-            print_first_10_elements(w.base().cast::<f16>());
-            todo!();
-
-            self.swiglu(&mut gate, &up, workspace, queue_alloc)?;
 
             let w = self.weights.ffn_down(iblk, queue);
-            let residual = if self.id == 0 { 1. } else { 0. };
-            self.mat_mul(&mut x, residual, &gate, &w, 1., workspace, queue_alloc)?;
+            self.mat_mul(&mut x1, 0., &gate, &w, s, workspace, queue_alloc)?;
+
+            let inplace = unsafe { x.map_slice_static() };
+            self.add(&mut x, &inplace, &x1, workspace, queue_alloc)?;
+
             self.all_reduce(&mut x, workspace, queue_alloc)?
         }
         if logits.shape()[0] == 0 {
@@ -808,7 +809,7 @@ impl<W: WeightLoader> WeightDecorator<W> {
         iblk: usize,
         queue: &'a QueueOf<W::Hardware>,
     ) -> Tensor<W::Weight<'a>> {
-        const WHICH: MiniCPM3BlkWeight = MiniCPM3BlkWeight::FfnGateUp;
+        const WHICH: MiniCPM3BlkWeight = MiniCPM3BlkWeight::FfnGate;
         let w = self.weights.load_blk(WHICH, iblk, queue);
         self.ffn_gate.clone().map(|_| w)
     }
@@ -818,7 +819,7 @@ impl<W: WeightLoader> WeightDecorator<W> {
         iblk: usize,
         queue: &'a QueueOf<W::Hardware>,
     ) -> Tensor<W::Weight<'a>> {
-        const WHICH: MiniCPM3BlkWeight = MiniCPM3BlkWeight::FfnGateUp;
+        const WHICH: MiniCPM3BlkWeight = MiniCPM3BlkWeight::FfnUp;
         let w = self.weights.load_blk(WHICH, iblk, queue);
         self.ffn_up.clone().map(|_| w)
     }