feat(clip): 添加 layernorm 计算

YdrMaster · YdrMaster · commit 752db6170dda · 2024-11-29T18:37:29.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/Cargo.toml b/Cargo.toml
@@ -28,7 +28,7 @@ itertools = "0.13"
 build-script-cfg = "0.0"
 
 ndarray-layout = { git = "https://github.com/YdrMaster/ndarray-layout", rev = "f1fdd24" }
-operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "02f95bc", default-features = false }
+operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "8183a63", default-features = false }
 
 search-cl-tools = { git = "https://github.com/InfiniTensor/clrt", rev = "6846d52" }
 search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "e2ec203" }
diff --git a/models/clip/common-cpu/src/lib.rs b/models/clip/common-cpu/src/lib.rs
@@ -1,13 +1,16 @@
 use clip::{ClipStorage, WeightLoader};
-use operators::{add_rows, common_cpu::Cpu, conv, QueueOf, TopoNode};
+use operators::{common_cpu::Cpu, conv, QueueOf, TopoNode};
 use std::marker::PhantomData;
 
 pub struct Operators<N = Cpu>(PhantomData<N>);
 
-pub struct Weights<'w> {
-    patch_embd_w: &'w [u8],
-    patch_embd_b: &'w [u8],
-    pos_embd: &'w [u8],
+#[repr(transparent)]
+pub struct Weights<'w>(ClipStorage<&'w [u8]>);
+
+macro_rules! op {
+    ($name:ident) => {
+        operators::$name::common_cpu::Operator
+    };
 }
 
 impl<N> clip::Operators for Operators<N>
@@ -17,16 +20,13 @@ where
     type Hardware = Cpu;
     type TopoNode = Cpu;
     type Conv = conv::common_cpu::ConvIm2Col;
-    type AddRows = add_rows::common_cpu::Operator;
+    type AddRows = op!(add_rows);
+    type LayerNorm = op!(layer_norm);
 }
 
 impl<'w> Weights<'w> {
     pub fn new(model: &'w ClipStorage<&'w [u8]>) -> Self {
-        Self {
-            patch_embd_w: model.patch_embd_w,
-            patch_embd_b: model.patch_embd_b,
-            pos_embd: model.pos_embd,
-        }
+        Self(model.clone())
     }
 }
 
@@ -36,12 +36,28 @@ impl WeightLoader for Weights<'_> {
 
     #[inline]
     fn patch_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2] {
-        [self.patch_embd_w, self.patch_embd_b]
+        [self.0.patch_embd_w, self.0.patch_embd_b]
     }
 
     #[inline]
     fn pos_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a> {
-        self.pos_embd
+        self.0.pos_embd
+    }
+
+    #[inline]
+    fn pre_norm<'a>(
+        &'a self,
+        _queue: &'a QueueOf<Self::Hardware>,
+    ) -> Option<[Self::Weight<'a>; 2]> {
+        self.0.pre_norm
+    }
+
+    #[inline]
+    fn post_norm<'a>(
+        &'a self,
+        _queue: &'a QueueOf<Self::Hardware>,
+    ) -> Option<[Self::Weight<'a>; 2]> {
+        self.0.post_norm
     }
 }
 
diff --git a/models/clip/common/src/compute.rs b/models/clip/common/src/compute.rs
@@ -2,6 +2,7 @@ use super::{args::Args, ClipMeta};
 use operators::{
     add_rows::{self, AddRows},
     conv::{self, Conv},
+    layer_norm::{self, LayerNorm},
     ByteOf, Hardware, LaunchError, Operator, QueueAlloc, QueueOf, TopoNode,
 };
 use std::{
@@ -15,6 +16,7 @@ pub trait Operators {
     type TopoNode: TopoNode<Self::Hardware>;
     type Conv: Conv<Self::Hardware>;
     type AddRows: AddRows<Self::Hardware>;
+    type LayerNorm: LayerNorm<Self::Hardware>;
 }
 
 pub trait WeightLoader {
@@ -25,13 +27,17 @@ pub trait WeightLoader {
 
     fn patch_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2];
     fn pos_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a>;
+    fn pre_norm<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> Option<[Self::Weight<'a>; 2]>;
+    fn post_norm<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>)
+        -> Option<[Self::Weight<'a>; 2]>;
 }
 
 pub struct ClipWorker<Ops: Operators, W> {
     meta: ClipMeta,
     weights: WeightDecorator<W>,
     conv: Ops::Conv,
     add_rows: Ops::AddRows,
+    layer_norm: Ops::LayerNorm,
     pub debug: bool,
 }
 
@@ -43,6 +49,7 @@ impl<Ops: Operators, W> ClipWorker<Ops, W> {
             meta,
             conv: Ops::Conv::new(processor),
             add_rows: Ops::AddRows::new(processor),
+            layer_norm: Ops::LayerNorm::new(processor),
             debug: true,
         }
     }
@@ -90,6 +97,18 @@ where
         let pos_embd = self.weights.pos_embd(queue);
         self.add_rows(&mut embd, &pos_embd, &pos, workspace, queue_alloc)?;
 
+        if let Some([scale, bias]) = self.weights.pre_norm(queue) {
+            let inplace = unsafe { embd.map_slice_static() };
+            self.layer_norm(&mut embd, &inplace, &scale, &bias, workspace, queue_alloc)?;
+        }
+
+        for _ in 0..self.meta.nblk {}
+
+        if let Some([scale, bias]) = self.weights.post_norm(queue) {
+            let inplace = unsafe { embd.map_slice_static() };
+            self.layer_norm(&mut embd, &inplace, &scale, &bias, workspace, queue_alloc)?;
+        }
+
         if self.debug {
             println!("encode {n} x {h} x {w} image in {:?}", time.elapsed());
         }
@@ -166,13 +185,47 @@ where
             queue_alloc,
         )
     }
+
+    fn layer_norm<Y, X, Scale, Bias, QA>(
+        &self,
+        y: &mut Tensor<Y>,
+        x: &Tensor<X>,
+        scale: &Tensor<Scale>,
+        bias: &Tensor<Bias>,
+        workspace: &mut [ByteOf<Ops::Hardware>],
+        queue_alloc: &QA,
+    ) -> Result<(), LaunchError>
+    where
+        Y: DerefMut<Target = [ByteOf<Ops::Hardware>]>,
+        X: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        Scale: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        Bias: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        QA: QueueAlloc<Hardware = Ops::Hardware>,
+    {
+        self.layer_norm.launch(
+            &layer_norm::Args {
+                y_layout: y.layout(),
+                y_base: y.base_mut(),
+                x_layout: x.layout(),
+                x_base: x.base(),
+                scale_layout: scale.layout(),
+                scale_base: scale.base(),
+                bias_layout: bias.layout(),
+                bias_base: bias.base(),
+                epsilon: self.meta.epsilon,
+            },
+            workspace,
+            queue_alloc,
+        )
+    }
 }
 
 struct WeightDecorator<W> {
     weights: W,
     patch_embd_w: Tensor<usize>,
     patch_embd_b: Tensor<usize>,
     pos_embd: Tensor<usize>,
+    norm: Tensor<usize>,
 }
 
 impl ClipMeta {
@@ -181,6 +234,7 @@ impl ClipMeta {
             patch_embd_w: self.patch_embd_w(),
             patch_embd_b: self.patch_embd_b(),
             pos_embd: self.pos_embd(),
+            norm: self.norm(),
             weights,
         }
     }
@@ -201,4 +255,24 @@ impl<W: WeightLoader> WeightDecorator<W> {
         let pos_embd = self.weights.pos_embd(queue);
         self.pos_embd.clone().map(|_| pos_embd)
     }
+
+    #[inline]
+    pub fn pre_norm<'a>(
+        &'a self,
+        queue: &'a QueueOf<W::Hardware>,
+    ) -> Option<[Tensor<W::Weight<'a>>; 2]> {
+        self.weights
+            .pre_norm(queue)
+            .map(|pair| pair.map(|w| self.norm.clone().map(|_| w)))
+    }
+
+    #[inline]
+    pub fn post_norm<'a>(
+        &'a self,
+        queue: &'a QueueOf<W::Hardware>,
+    ) -> Option<[Tensor<W::Weight<'a>>; 2]> {
+        self.weights
+            .post_norm(queue)
+            .map(|pair| pair.map(|w| self.norm.clone().map(|_| w)))
+    }
 }
diff --git a/models/clip/common/src/lib.rs b/models/clip/common/src/lib.rs
@@ -22,9 +22,9 @@ pub struct ClipMeta {
     pub projector: ProjectorType,
     pub minicpmv_version: u8,
 
+    pub dt: DigitLayout,
     pub dt_embd: DigitLayout,
-    pub dt_mat: DigitLayout,
-    pub dt_bias: DigitLayout,
+    pub dt_norm: DigitLayout,
 
     pub nblk: usize,
     pub d_patch: usize,
@@ -81,16 +81,21 @@ impl ClipMeta {
 
     pub fn patch_embd_w(&self) -> Tensor<usize> {
         let &Self { d, d_patch, .. } = self;
-        Tensor::new(self.dt_mat, &[d, 3, d_patch, d_patch])
+        Tensor::new(self.dt, &[d, 3, d_patch, d_patch])
     }
 
     pub fn patch_embd_b(&self) -> Tensor<usize> {
         let &Self { d, .. } = self;
-        Tensor::new(self.dt_bias, &[d])
+        Tensor::new(self.dt, &[d])
     }
 
     pub fn pos_embd(&self) -> Tensor<usize> {
         let &Self { d, .. } = self;
         Tensor::new(self.dt_embd, &[D_POS_EMBD.pow(2), d])
     }
+
+    pub fn norm(&self) -> Tensor<usize> {
+        let &Self { d, .. } = self;
+        Tensor::new(self.dt_norm, &[d])
+    }
 }
diff --git a/models/clip/common/src/storage.rs b/models/clip/common/src/storage.rs
@@ -7,6 +7,8 @@ pub struct Storage<T> {
     pub patch_embd_w: T,
     pub patch_embd_b: T,
     pub pos_embd: T,
+    pub pre_norm: Option<[T; 2]>,
+    pub post_norm: Option<[T; 2]>,
 }
 
 impl<'a> Storage<&'a [u8]> {
@@ -28,9 +30,9 @@ impl<'a> Storage<&'a [u8]> {
             projector,
             minicpmv_version: gguf.get_usize("clip.minicpmv_version").unwrap() as _,
 
+            dt     : patch_embd_w.ty,
             dt_embd: pos_embd.ty,
-            dt_mat :  patch_embd_w.ty,
-            dt_bias:  patch_embd_b.ty,
+            dt_norm: gguf.tensors["v.blk.0.ln1.weight"].ty,
 
             nblk   : gguf.get_usize("clip.vision.block_count"                 ).unwrap(),
             d_patch: gguf.get_usize("clip.vision.patch_size"                  ).unwrap(),
@@ -49,6 +51,14 @@ impl<'a> Storage<&'a [u8]> {
             patch_embd_w: patch_embd_w.data,
             patch_embd_b: patch_embd_b.data,
             pos_embd: pos_embd.data,
+            pre_norm: gguf
+                .tensors
+                .get("v.pre_ln.weight")
+                .map(|w| [w.data, gguf.tensors["v.pre_ln.bias"].data]),
+            post_norm: gguf
+                .tensors
+                .get("v.post_ln.weight")
+                .map(|w| [w.data, gguf.tensors["v.post_ln.bias"].data]),
         }
     }
 }
diff --git a/models/llama/common-cpu/src/lib.rs b/models/llama/common-cpu/src/lib.rs
@@ -45,6 +45,7 @@ pub struct WeightCache {
     cached_weight: BlkWeight,
     cached_weight_iblk: usize,
 }
+
 macro_rules! op {
     ($name:ident) => {
         operators::$name::common_cpu::Operator

Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,7 @@ pub struct WeightCache {`
`45`	`45`	`cached_weight: BlkWeight,`
`46`	`46`	`cached_weight_iblk: usize,`
`47`	`47`	`}`
	`48`	`+`
`48`	`49`	`macro_rules! op {`
`49`	`50`	`($name:ident) => {`
`50`	`51`	`operators::$name::common_cpu::Operator`