style: code cleanup

YdrMaster · YdrMaster · commit ab671b9ee533 · 2025-02-11T19:34:16.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/models/gpt2/common/src/compute.rs b/models/gpt2/common/src/compute.rs
@@ -96,23 +96,6 @@ impl<Ops: Operators, W> Gpt2Worker<Ops, W> {
     pub const fn meta(&self) -> &Gpt2Meta {
         &self.meta
     }
-
-    pub fn workspace_size(&self, nt: usize, max_seq_len: usize, max_att_len: usize) -> usize {
-        let Gpt2Meta {
-            nh, nkvh, dh, di, ..
-        } = self.meta;
-
-        let embd = self.meta.embd(nt);
-        let dt = embd.dt();
-        let embd = embd.take();
-
-        let qkv = Tensor::new(dt, &[nt * (nh + nkvh + nkvh), dh]).take();
-        let q = Tensor::new(dt, &[max_seq_len, nh, dh]).take();
-        let att = Tensor::new(dt, &[nh, max_seq_len, max_att_len]).take();
-
-        let up = Tensor::new(dt, &[nt, di]).take();
-        embd + (qkv + q + att).max(up)
-    }
 }
 
 impl<Ops, W> Gpt2Worker<Ops, W>
@@ -153,13 +136,18 @@ where
             self.add_rows(&mut embd, &pos_embd, &idx, workspace, queue_alloc)?
         }
 
-        let nt = embd.shape()[0];
         let mut x = embd;
-        let x1 = Tensor::new(x.dt(), x.shape());
-        let qkv = Tensor::new(x.dt(), &[nt, (nh + nkvh + nkvh) * dh]);
-        let up = Tensor::new(x.dt(), &[nt, di]);
+        let nt = x.shape()[0];
+
+        let tensor = |shape: &[usize]| Tensor::new(x.dt(), shape);
+        let x1 = tensor(x.shape());
+        let qkv = tensor(&[nt, (nh + nkvh + nkvh) * dh]);
+        let q = tensor(&[max_seq_len, nh, dh]).take();
+        let att = tensor(&[nh, max_seq_len, max_att_len]).take();
+        let up = tensor(&[nt, di]);
+
+        let workspace_size = *x1.get() + (*qkv.get() + q + att).max(*up.get());
 
-        let workspace_size = self.workspace_size(nt, max_seq_len, max_att_len);
         let mut workspace = Workspace::new(queue_alloc, workspace, workspace_size);
         let (buf, workspace) = workspace.split_at_mut(*x1.get());
         let mut x1 = x1.map(|_| buf);
@@ -253,9 +241,9 @@ where
                 if src != dst {
                     let src = unsafe { x.map_slice_static() }.index(0, src);
                     let mut dst = x.map_slice_mut().index(0, dst);
-                    self.rearrange(&mut dst, &src, workspace, queue_alloc)?;
+                    self.rearrange(&mut dst, &src, workspace, queue_alloc)?
                 }
-                dst += 1;
+                dst += 1
             }
         }
         assert_eq!(dst, logits.shape()[0]);
diff --git a/models/gpt2/common/src/lib.rs b/models/gpt2/common/src/lib.rs
@@ -6,7 +6,6 @@ use common::Distribution;
 use gguf::ggml_quants::digit_layout::DigitLayout;
 
 pub use args::{Args as GPT2Args, Request as GPT2Request};
-pub use common::Contiguous;
 pub use compute::{BlkWeight, Gpt2Worker, Operators, WeightLoader};
 pub use storage::{BlkStorage as GPT2BlkStorage, Storage as GPT2Storage};
 pub use tensor::{RandomSample, Tensor};
@@ -16,6 +15,7 @@ pub mod ext {
         ggml_quants,
     };
 }
+
 #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
 pub enum GPT2BlkWeight {
     AttnQkvB,
@@ -31,6 +31,7 @@ pub enum GPT2BlkWeight {
     FfnNormB,
     FfnNormW,
 }
+
 #[derive(Clone, Debug)]
 pub struct Gpt2Meta {
     pub dt_embd: DigitLayout,
@@ -72,6 +73,7 @@ impl Gpt2Meta {
             ..self.clone()
         }
     }
+
     pub fn blk(&self) -> GPT2BlkStorage<usize> {
         use TensorUsage::Storage as TensorMem;
         GPT2BlkStorage {
diff --git a/models/llama/common/src/compute.rs b/models/llama/common/src/compute.rs
@@ -172,6 +172,7 @@ where
         let att = tensor(&[nh, max_seq_len, max_att_len]).take();
         let gate_up = tensor(&[if self.meta.is_moe() { 1 } else { nt }, di * 2]);
         let routes = tensor(&[nt, nexp]);
+        let mut routes_host = routes.clone().map(Blob::new).take();
 
         let workspace_size = *x1.get()
             + (*qkv.get() + q + att)
@@ -204,8 +205,7 @@ where
         for iblk in 0..nblk {
             {
                 let w = self.weights.attn_norm(iblk, queue);
-                self.rms_norm(&mut x1, &x, &w, workspace, queue_alloc)?;
-                drop(w);
+                self.rms_norm(&mut x1, &x, w, workspace, queue_alloc)?;
 
                 let (buf, workspace) = workspace.split_at_mut(*qkv.get());
                 let mut qkv = qkv.clone().map(|_| buf);
@@ -272,8 +272,7 @@ where
             self.all_reduce(&mut x, workspace, queue_alloc)?;
 
             let w = self.weights.ffn_norm(iblk, queue);
-            self.rms_norm(&mut x1, &x, &w, workspace, queue_alloc)?;
-            drop(w);
+            self.rms_norm(&mut x1, &x, w, workspace, queue_alloc)?;
 
             if !self.meta.is_moe() {
                 let (buf, workspace) = workspace.split_at_mut(*gate_up.get());
@@ -291,7 +290,6 @@ where
                 let residual = if self.id == 0 { 1. } else { 0. };
                 self.mat_mul(&mut x, residual, &gate, &w, 1., workspace, queue_alloc)?
             } else {
-                let mut routes_host = routes.clone().map(Blob::new).take();
                 // gate_inp
                 {
                     let (buf, workspace) = workspace.split_at_mut(*routes.get());
@@ -332,7 +330,7 @@ where
                     }
                 }
             }
-            self.all_reduce(&mut x, workspace, queue_alloc)?;
+            self.all_reduce(&mut x, workspace, queue_alloc)?
         }
         if logits.shape()[0] == 0 {
             return Ok(());
@@ -359,7 +357,7 @@ where
         {
             let inplace = unsafe { x.map_slice_static() };
             let w = self.weights.output_norm(queue);
-            self.rms_norm(&mut x, &inplace, &w, workspace, queue_alloc)?
+            self.rms_norm(&mut x, &inplace, w, workspace, queue_alloc)?
         }
         let w = self.weights.output(queue);
         self.mat_mul(&mut logits, 0., &x, &w, 1., workspace, queue_alloc)
@@ -397,7 +395,7 @@ where
         &self,
         y: &mut Tensor<Y>,
         x: &Tensor<X>,
-        w: &Tensor<W_>,
+        w: Tensor<W_>,
         workspace: &mut [ByteOf<Ops::Hardware>],
         queue_alloc: &QA,
     ) -> Result<(), LaunchError>