Skip to content

Commit 752db61

Browse files
committed
feat(clip): 添加 layernorm 计算
Signed-off-by: YdrMaster <ydrml@hotmail.com>
1 parent 2650430 commit 752db61

File tree

6 files changed

+126
-20
lines changed

6 files changed

+126
-20
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ itertools = "0.13"
2828
build-script-cfg = "0.0"
2929

3030
ndarray-layout = { git = "https://github.com/YdrMaster/ndarray-layout", rev = "f1fdd24" }
31-
operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "02f95bc", default-features = false }
31+
operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "8183a63", default-features = false }
3232

3333
search-cl-tools = { git = "https://github.com/InfiniTensor/clrt", rev = "6846d52" }
3434
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "e2ec203" }

models/clip/common-cpu/src/lib.rs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
use clip::{ClipStorage, WeightLoader};
2-
use operators::{add_rows, common_cpu::Cpu, conv, QueueOf, TopoNode};
2+
use operators::{common_cpu::Cpu, conv, QueueOf, TopoNode};
33
use std::marker::PhantomData;
44

55
pub struct Operators<N = Cpu>(PhantomData<N>);
66

7-
pub struct Weights<'w> {
8-
patch_embd_w: &'w [u8],
9-
patch_embd_b: &'w [u8],
10-
pos_embd: &'w [u8],
7+
#[repr(transparent)]
8+
pub struct Weights<'w>(ClipStorage<&'w [u8]>);
9+
10+
macro_rules! op {
11+
($name:ident) => {
12+
operators::$name::common_cpu::Operator
13+
};
1114
}
1215

1316
impl<N> clip::Operators for Operators<N>
@@ -17,16 +20,13 @@ where
1720
type Hardware = Cpu;
1821
type TopoNode = Cpu;
1922
type Conv = conv::common_cpu::ConvIm2Col;
20-
type AddRows = add_rows::common_cpu::Operator;
23+
type AddRows = op!(add_rows);
24+
type LayerNorm = op!(layer_norm);
2125
}
2226

2327
impl<'w> Weights<'w> {
2428
pub fn new(model: &'w ClipStorage<&'w [u8]>) -> Self {
25-
Self {
26-
patch_embd_w: model.patch_embd_w,
27-
patch_embd_b: model.patch_embd_b,
28-
pos_embd: model.pos_embd,
29-
}
29+
Self(model.clone())
3030
}
3131
}
3232

@@ -36,12 +36,28 @@ impl WeightLoader for Weights<'_> {
3636

3737
#[inline]
3838
fn patch_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2] {
39-
[self.patch_embd_w, self.patch_embd_b]
39+
[self.0.patch_embd_w, self.0.patch_embd_b]
4040
}
4141

4242
#[inline]
4343
fn pos_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a> {
44-
self.pos_embd
44+
self.0.pos_embd
45+
}
46+
47+
#[inline]
48+
fn pre_norm<'a>(
49+
&'a self,
50+
_queue: &'a QueueOf<Self::Hardware>,
51+
) -> Option<[Self::Weight<'a>; 2]> {
52+
self.0.pre_norm
53+
}
54+
55+
#[inline]
56+
fn post_norm<'a>(
57+
&'a self,
58+
_queue: &'a QueueOf<Self::Hardware>,
59+
) -> Option<[Self::Weight<'a>; 2]> {
60+
self.0.post_norm
4561
}
4662
}
4763

models/clip/common/src/compute.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::{args::Args, ClipMeta};
22
use operators::{
33
add_rows::{self, AddRows},
44
conv::{self, Conv},
5+
layer_norm::{self, LayerNorm},
56
ByteOf, Hardware, LaunchError, Operator, QueueAlloc, QueueOf, TopoNode,
67
};
78
use std::{
@@ -15,6 +16,7 @@ pub trait Operators {
1516
type TopoNode: TopoNode<Self::Hardware>;
1617
type Conv: Conv<Self::Hardware>;
1718
type AddRows: AddRows<Self::Hardware>;
19+
type LayerNorm: LayerNorm<Self::Hardware>;
1820
}
1921

2022
pub trait WeightLoader {
@@ -25,13 +27,17 @@ pub trait WeightLoader {
2527

2628
fn patch_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2];
2729
fn pos_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a>;
30+
fn pre_norm<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> Option<[Self::Weight<'a>; 2]>;
31+
fn post_norm<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>)
32+
-> Option<[Self::Weight<'a>; 2]>;
2833
}
2934

3035
pub struct ClipWorker<Ops: Operators, W> {
3136
meta: ClipMeta,
3237
weights: WeightDecorator<W>,
3338
conv: Ops::Conv,
3439
add_rows: Ops::AddRows,
40+
layer_norm: Ops::LayerNorm,
3541
pub debug: bool,
3642
}
3743

@@ -43,6 +49,7 @@ impl<Ops: Operators, W> ClipWorker<Ops, W> {
4349
meta,
4450
conv: Ops::Conv::new(processor),
4551
add_rows: Ops::AddRows::new(processor),
52+
layer_norm: Ops::LayerNorm::new(processor),
4653
debug: true,
4754
}
4855
}
@@ -90,6 +97,18 @@ where
9097
let pos_embd = self.weights.pos_embd(queue);
9198
self.add_rows(&mut embd, &pos_embd, &pos, workspace, queue_alloc)?;
9299

100+
if let Some([scale, bias]) = self.weights.pre_norm(queue) {
101+
let inplace = unsafe { embd.map_slice_static() };
102+
self.layer_norm(&mut embd, &inplace, &scale, &bias, workspace, queue_alloc)?;
103+
}
104+
105+
for _ in 0..self.meta.nblk {}
106+
107+
if let Some([scale, bias]) = self.weights.post_norm(queue) {
108+
let inplace = unsafe { embd.map_slice_static() };
109+
self.layer_norm(&mut embd, &inplace, &scale, &bias, workspace, queue_alloc)?;
110+
}
111+
93112
if self.debug {
94113
println!("encode {n} x {h} x {w} image in {:?}", time.elapsed());
95114
}
@@ -166,13 +185,47 @@ where
166185
queue_alloc,
167186
)
168187
}
188+
189+
fn layer_norm<Y, X, Scale, Bias, QA>(
190+
&self,
191+
y: &mut Tensor<Y>,
192+
x: &Tensor<X>,
193+
scale: &Tensor<Scale>,
194+
bias: &Tensor<Bias>,
195+
workspace: &mut [ByteOf<Ops::Hardware>],
196+
queue_alloc: &QA,
197+
) -> Result<(), LaunchError>
198+
where
199+
Y: DerefMut<Target = [ByteOf<Ops::Hardware>]>,
200+
X: Deref<Target = [ByteOf<Ops::Hardware>]>,
201+
Scale: Deref<Target = [ByteOf<Ops::Hardware>]>,
202+
Bias: Deref<Target = [ByteOf<Ops::Hardware>]>,
203+
QA: QueueAlloc<Hardware = Ops::Hardware>,
204+
{
205+
self.layer_norm.launch(
206+
&layer_norm::Args {
207+
y_layout: y.layout(),
208+
y_base: y.base_mut(),
209+
x_layout: x.layout(),
210+
x_base: x.base(),
211+
scale_layout: scale.layout(),
212+
scale_base: scale.base(),
213+
bias_layout: bias.layout(),
214+
bias_base: bias.base(),
215+
epsilon: self.meta.epsilon,
216+
},
217+
workspace,
218+
queue_alloc,
219+
)
220+
}
169221
}
170222

171223
struct WeightDecorator<W> {
172224
weights: W,
173225
patch_embd_w: Tensor<usize>,
174226
patch_embd_b: Tensor<usize>,
175227
pos_embd: Tensor<usize>,
228+
norm: Tensor<usize>,
176229
}
177230

178231
impl ClipMeta {
@@ -181,6 +234,7 @@ impl ClipMeta {
181234
patch_embd_w: self.patch_embd_w(),
182235
patch_embd_b: self.patch_embd_b(),
183236
pos_embd: self.pos_embd(),
237+
norm: self.norm(),
184238
weights,
185239
}
186240
}
@@ -201,4 +255,24 @@ impl<W: WeightLoader> WeightDecorator<W> {
201255
let pos_embd = self.weights.pos_embd(queue);
202256
self.pos_embd.clone().map(|_| pos_embd)
203257
}
258+
259+
#[inline]
260+
pub fn pre_norm<'a>(
261+
&'a self,
262+
queue: &'a QueueOf<W::Hardware>,
263+
) -> Option<[Tensor<W::Weight<'a>>; 2]> {
264+
self.weights
265+
.pre_norm(queue)
266+
.map(|pair| pair.map(|w| self.norm.clone().map(|_| w)))
267+
}
268+
269+
#[inline]
270+
pub fn post_norm<'a>(
271+
&'a self,
272+
queue: &'a QueueOf<W::Hardware>,
273+
) -> Option<[Tensor<W::Weight<'a>>; 2]> {
274+
self.weights
275+
.post_norm(queue)
276+
.map(|pair| pair.map(|w| self.norm.clone().map(|_| w)))
277+
}
204278
}

models/clip/common/src/lib.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ pub struct ClipMeta {
2222
pub projector: ProjectorType,
2323
pub minicpmv_version: u8,
2424

25+
pub dt: DigitLayout,
2526
pub dt_embd: DigitLayout,
26-
pub dt_mat: DigitLayout,
27-
pub dt_bias: DigitLayout,
27+
pub dt_norm: DigitLayout,
2828

2929
pub nblk: usize,
3030
pub d_patch: usize,
@@ -81,16 +81,21 @@ impl ClipMeta {
8181

8282
pub fn patch_embd_w(&self) -> Tensor<usize> {
8383
let &Self { d, d_patch, .. } = self;
84-
Tensor::new(self.dt_mat, &[d, 3, d_patch, d_patch])
84+
Tensor::new(self.dt, &[d, 3, d_patch, d_patch])
8585
}
8686

8787
pub fn patch_embd_b(&self) -> Tensor<usize> {
8888
let &Self { d, .. } = self;
89-
Tensor::new(self.dt_bias, &[d])
89+
Tensor::new(self.dt, &[d])
9090
}
9191

9292
pub fn pos_embd(&self) -> Tensor<usize> {
9393
let &Self { d, .. } = self;
9494
Tensor::new(self.dt_embd, &[D_POS_EMBD.pow(2), d])
9595
}
96+
97+
pub fn norm(&self) -> Tensor<usize> {
98+
let &Self { d, .. } = self;
99+
Tensor::new(self.dt_norm, &[d])
100+
}
96101
}

models/clip/common/src/storage.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ pub struct Storage<T> {
77
pub patch_embd_w: T,
88
pub patch_embd_b: T,
99
pub pos_embd: T,
10+
pub pre_norm: Option<[T; 2]>,
11+
pub post_norm: Option<[T; 2]>,
1012
}
1113

1214
impl<'a> Storage<&'a [u8]> {
@@ -28,9 +30,9 @@ impl<'a> Storage<&'a [u8]> {
2830
projector,
2931
minicpmv_version: gguf.get_usize("clip.minicpmv_version").unwrap() as _,
3032

33+
dt : patch_embd_w.ty,
3134
dt_embd: pos_embd.ty,
32-
dt_mat : patch_embd_w.ty,
33-
dt_bias: patch_embd_b.ty,
35+
dt_norm: gguf.tensors["v.blk.0.ln1.weight"].ty,
3436

3537
nblk : gguf.get_usize("clip.vision.block_count" ).unwrap(),
3638
d_patch: gguf.get_usize("clip.vision.patch_size" ).unwrap(),
@@ -49,6 +51,14 @@ impl<'a> Storage<&'a [u8]> {
4951
patch_embd_w: patch_embd_w.data,
5052
patch_embd_b: patch_embd_b.data,
5153
pos_embd: pos_embd.data,
54+
pre_norm: gguf
55+
.tensors
56+
.get("v.pre_ln.weight")
57+
.map(|w| [w.data, gguf.tensors["v.pre_ln.bias"].data]),
58+
post_norm: gguf
59+
.tensors
60+
.get("v.post_ln.weight")
61+
.map(|w| [w.data, gguf.tensors["v.post_ln.bias"].data]),
5262
}
5363
}
5464
}

models/llama/common-cpu/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pub struct WeightCache {
4545
cached_weight: BlkWeight,
4646
cached_weight_iblk: usize,
4747
}
48+
4849
macro_rules! op {
4950
($name:ident) => {
5051
operators::$name::common_cpu::Operator

0 commit comments

Comments
 (0)