Skip to content

Commit 2650430

Browse files
committed
feat(clip): 添加 pos_embd 计算
Signed-off-by: YdrMaster <ydrml@hotmail.com>
1 parent 357bf29 commit 2650430

File tree

8 files changed

+88
-24
lines changed

8 files changed

+88
-24
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ ggus = "0.3"
2727
itertools = "0.13"
2828
build-script-cfg = "0.0"
2929

30-
ndarray-layout = { git = "https://github.com/YdrMaster/ndarray-layout", rev = "48d36c5" }
31-
operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "d73a53e", default-features = false }
30+
ndarray-layout = { git = "https://github.com/YdrMaster/ndarray-layout", rev = "f1fdd24" }
31+
operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "02f95bc", default-features = false }
3232

3333
search-cl-tools = { git = "https://github.com/InfiniTensor/clrt", rev = "6846d52" }
3434
search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "e2ec203" }

models/clip/common-cpu/src/lib.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use clip::{ClipStorage, WeightLoader};
2-
use operators::{common_cpu::Cpu, conv, QueueOf, TopoNode};
2+
use operators::{add_rows, common_cpu::Cpu, conv, QueueOf, TopoNode};
33
use std::marker::PhantomData;
44

55
pub struct Operators<N = Cpu>(PhantomData<N>);
66

77
pub struct Weights<'w> {
88
patch_embd_w: &'w [u8],
99
patch_embd_b: &'w [u8],
10+
pos_embd: &'w [u8],
1011
}
1112

1213
impl<N> clip::Operators for Operators<N>
@@ -16,13 +17,15 @@ where
1617
type Hardware = Cpu;
1718
type TopoNode = Cpu;
1819
type Conv = conv::common_cpu::ConvIm2Col;
20+
type AddRows = add_rows::common_cpu::Operator;
1921
}
2022

2123
impl<'w> Weights<'w> {
2224
pub fn new(model: &'w ClipStorage<&'w [u8]>) -> Self {
2325
Self {
2426
patch_embd_w: model.patch_embd_w,
2527
patch_embd_b: model.patch_embd_b,
28+
pos_embd: model.pos_embd,
2629
}
2730
}
2831
}
@@ -35,6 +38,11 @@ impl WeightLoader for Weights<'_> {
3538
fn patch_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2] {
3639
[self.patch_embd_w, self.patch_embd_b]
3740
}
41+
42+
#[inline]
43+
fn pos_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a> {
44+
self.pos_embd
45+
}
3846
}
3947

4048
#[cfg(test)]

models/clip/common-cpu/src/test_infer.rs

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::{Operators, Weights};
2-
use clip::{ClipArgs, ClipMeta, ClipStorage, ClipWorker, Image, Tensor};
2+
use clip::{ClipArgs, ClipMeta, ClipStorage, ClipWorker, Image, Tensor, D_POS_EMBD};
33
use gguf::{ggml_quants::digit_layout::types as ty, GGufModel};
44
use operators::{
55
common_cpu::{Cpu, ThisThread},
@@ -53,22 +53,22 @@ fn test_infer() {
5353
.launch(
5454
ClipArgs {
5555
raw: whole.to_nchw(),
56-
pos: pos70(whole.shape(), d_patch).map_slice(),
56+
pos: pos70(1, whole.shape(), d_patch).map_slice(),
5757
},
5858
&mut [],
5959
&ThisThread,
6060
)
6161
.unwrap();
6262

6363
if let Some(patches) = slices.patches_nchw() {
64-
let &[_, 3, h, w] = patches.shape() else {
64+
let &[n, 3, h, w] = patches.shape() else {
6565
unreachable!()
6666
};
6767
worker
6868
.launch(
6969
ClipArgs {
7070
raw: patches.map_slice(),
71-
pos: pos70([w, h], d_patch).map_slice(),
71+
pos: pos70(n, [w, h], d_patch).map_slice(),
7272
},
7373
&mut [],
7474
&ThisThread,
@@ -77,26 +77,21 @@ fn test_infer() {
7777
}
7878
}
7979

80-
fn pos70([w, h]: [usize; 2], d_patch: usize) -> Tensor<Blob> {
80+
fn pos70(n: usize, [w, h]: [usize; 2], d_patch: usize) -> Tensor<Blob> {
8181
let pos_w = w / d_patch;
8282
let pos_h = h / d_patch;
83-
let mut bucket_corrds_h = [0; 70];
84-
let mut bucket_corrds_w = [0; 70];
85-
for i in 0..pos_w {
86-
bucket_corrds_w[i] = ((70 * i) as f64 / pos_w as f64) as _;
87-
}
88-
for i in 0..pos_h {
89-
bucket_corrds_h[i] = ((70 * i) as f64 / pos_h as f64) as _;
90-
}
9183

92-
let mut ans = Tensor::new(ty::U32, &[pos_w * pos_h]).map(Blob::new);
84+
let mut ans = Tensor::new(ty::U32, &[1, pos_w * pos_h])
85+
.broadcast(0, n)
86+
.map(Blob::new);
9387
let (&mut [], data, &mut []) = (unsafe { ans.get_mut().align_to_mut::<u32>() }) else {
9488
panic!()
9589
};
9690

97-
let f = |i, d| ((70 * i) as f64 / d as f64) as u32;
9891
for i in 0..pos_h * pos_w {
99-
data[i] = f(i / pos_w, pos_h) * 70 + f(i % pos_w, pos_w);
92+
let y = (i / pos_w) * D_POS_EMBD / pos_h;
93+
let x = (i % pos_w) * D_POS_EMBD / pos_w;
94+
data[i] = (y * D_POS_EMBD + x) as _;
10095
}
10196

10297
ans

models/clip/common/src/args.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ use tensor::Tensor;
44
pub struct Args<'a, H: Hardware> {
55
/// shape: [n, c, h, w]
66
pub raw: Tensor<&'a [H::Byte]>,
7-
/// shape: [h x w]
7+
/// shape: [n, h x w]
88
pub pos: Tensor<&'a [H::Byte]>,
99
}

models/clip/common/src/compute.rs

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::{args::Args, ClipMeta};
22
use operators::{
3+
add_rows::{self, AddRows},
34
conv::{self, Conv},
45
ByteOf, Hardware, LaunchError, Operator, QueueAlloc, QueueOf, TopoNode,
56
};
@@ -13,6 +14,7 @@ pub trait Operators {
1314
type Hardware: Hardware;
1415
type TopoNode: TopoNode<Self::Hardware>;
1516
type Conv: Conv<Self::Hardware>;
17+
type AddRows: AddRows<Self::Hardware>;
1618
}
1719

1820
pub trait WeightLoader {
@@ -22,12 +24,14 @@ pub trait WeightLoader {
2224
Self: 's;
2325

2426
fn patch_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2];
27+
fn pos_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> Self::Weight<'a>;
2528
}
2629

2730
pub struct ClipWorker<Ops: Operators, W> {
2831
meta: ClipMeta,
2932
weights: WeightDecorator<W>,
3033
conv: Ops::Conv,
34+
add_rows: Ops::AddRows,
3135
pub debug: bool,
3236
}
3337

@@ -38,6 +42,7 @@ impl<Ops: Operators, W> ClipWorker<Ops, W> {
3842
weights: meta.decorator(weights),
3943
meta,
4044
conv: Ops::Conv::new(processor),
45+
add_rows: Ops::AddRows::new(processor),
4146
debug: true,
4247
}
4348
}
@@ -64,7 +69,7 @@ where
6469
QA: QueueAlloc<Hardware = Ops::Hardware>,
6570
{
6671
let time = Instant::now();
67-
let Args { raw, .. } = args;
72+
let Args { raw, pos } = args;
6873
let queue = queue_alloc.queue();
6974

7075
let ClipMeta { dt_embd, .. } = self.meta;
@@ -80,7 +85,10 @@ where
8085
let mut embd = Tensor::new(dt_embd, &[n, m, h / hk, w / wk]).map(|s| queue_alloc.alloc(s));
8186
self.conv(&mut embd, &raw, &k, &b, workspace, queue_alloc)?;
8287

83-
let _embd = embd.merge(2..4).unwrap().transpose(&[2, 1]);
88+
let mut embd = embd.merge(2..4).unwrap().transpose(&[2, 1]);
89+
90+
let pos_embd = self.weights.pos_embd(queue);
91+
self.add_rows(&mut embd, &pos_embd, &pos, workspace, queue_alloc)?;
8492

8593
if self.debug {
8694
println!("encode {n} x {h} x {w} image in {:?}", time.elapsed());
@@ -130,19 +138,49 @@ where
130138
queue_alloc,
131139
)
132140
}
141+
142+
fn add_rows<Dst, Src, Idx, QA>(
143+
&self,
144+
dst: &mut Tensor<Dst>,
145+
src: &Tensor<Src>,
146+
idx: &Tensor<Idx>,
147+
workspace: &mut [ByteOf<Ops::Hardware>],
148+
queue_alloc: &QA,
149+
) -> Result<(), LaunchError>
150+
where
151+
Dst: DerefMut<Target = [ByteOf<Ops::Hardware>]>,
152+
Src: Deref<Target = [ByteOf<Ops::Hardware>]>,
153+
Idx: Deref<Target = [ByteOf<Ops::Hardware>]>,
154+
QA: QueueAlloc<Hardware = Ops::Hardware>,
155+
{
156+
self.add_rows.launch(
157+
&add_rows::Args {
158+
dst_layout: dst.layout(),
159+
dst_base: dst.base_mut(),
160+
src_layout: src.layout(),
161+
src_base: src.base(),
162+
idx_layout: idx.layout(),
163+
idx_base: idx.base(),
164+
},
165+
workspace,
166+
queue_alloc,
167+
)
168+
}
133169
}
134170

135171
struct WeightDecorator<W> {
136172
weights: W,
137173
patch_embd_w: Tensor<usize>,
138174
patch_embd_b: Tensor<usize>,
175+
pos_embd: Tensor<usize>,
139176
}
140177

141178
impl ClipMeta {
142179
fn decorator<W>(&self, weights: W) -> WeightDecorator<W> {
143180
WeightDecorator {
144181
patch_embd_w: self.patch_embd_w(),
145182
patch_embd_b: self.patch_embd_b(),
183+
pos_embd: self.pos_embd(),
146184
weights,
147185
}
148186
}
@@ -157,4 +195,10 @@ impl<W: WeightLoader> WeightDecorator<W> {
157195
self.patch_embd_b.clone().map(|_| b),
158196
]
159197
}
198+
199+
#[inline]
200+
pub fn pos_embd<'a>(&'a self, queue: &'a QueueOf<W::Hardware>) -> Tensor<W::Weight<'a>> {
201+
let pos_embd = self.weights.pos_embd(queue);
202+
self.pos_embd.clone().map(|_| pos_embd)
203+
}
160204
}

models/clip/common/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ pub struct ClipMeta {
3838
pub epsilon: f32,
3939
}
4040

41+
pub const D_POS_EMBD: usize = 70;
42+
4143
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
4244
#[repr(u8)]
4345
pub enum ProjectorType {
@@ -86,4 +88,9 @@ impl ClipMeta {
8688
let &Self { d, .. } = self;
8789
Tensor::new(self.dt_bias, &[d])
8890
}
91+
92+
pub fn pos_embd(&self) -> Tensor<usize> {
93+
let &Self { d, .. } = self;
94+
Tensor::new(self.dt_embd, &[D_POS_EMBD.pow(2), d])
95+
}
8996
}

models/clip/common/src/storage.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ pub struct Storage<T> {
66
pub meta: ClipMeta,
77
pub patch_embd_w: T,
88
pub patch_embd_b: T,
9+
pub pos_embd: T,
910
}
1011

1112
impl<'a> Storage<&'a [u8]> {
1213
pub fn from_gguf(gguf: &GGufModel<'a>) -> Self {
13-
let position_embd = &gguf.tensors["v.position_embd.weight"];
14+
let pos_embd = &gguf.tensors["v.position_embd.weight"];
1415
let patch_embd_w = &gguf.tensors["v.patch_embd.weight"];
1516
let patch_embd_b = &gguf.tensors["v.patch_embd.bias"];
1617

@@ -27,7 +28,7 @@ impl<'a> Storage<&'a [u8]> {
2728
projector,
2829
minicpmv_version: gguf.get_usize("clip.minicpmv_version").unwrap() as _,
2930

30-
dt_embd: position_embd.ty,
31+
dt_embd: pos_embd.ty,
3132
dt_mat : patch_embd_w.ty,
3233
dt_bias: patch_embd_b.ty,
3334

@@ -47,6 +48,7 @@ impl<'a> Storage<&'a [u8]> {
4748
meta,
4849
patch_embd_w: patch_embd_w.data,
4950
patch_embd_b: patch_embd_b.data,
51+
pos_embd: pos_embd.data,
5052
}
5153
}
5254
}

tensor/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,14 @@ impl<T> Tensor<T> {
292292
}
293293
}
294294

295+
#[inline]
296+
pub fn broadcast(self, axis: usize, times: usize) -> Self {
297+
Self {
298+
layout: self.layout.broadcast(axis, times),
299+
..self
300+
}
301+
}
302+
295303
#[inline]
296304
pub fn merge(self, range: Range<usize>) -> Option<Self> {
297305
self.layout

0 commit comments

Comments
 (0)