feat: 开始开发 clip 模型结构

YdrMaster · YdrMaster · commit 2e5d2119c6f0 · 2024-11-26T13:39:31.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/Cargo.toml b/Cargo.toml
@@ -28,7 +28,7 @@ itertools = "0.13"
 build-script-cfg = "0.0"
 
 ndarray-layout = { git = "https://github.com/YdrMaster/ndarray-layout", rev = "48d36c5" }
-operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "48892b8", default-features = false }
+operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "1b08473", default-features = false }
 
 search-cl-tools = { git = "https://github.com/InfiniTensor/clrt", rev = "6846d52" }
 search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "e2ec203" }
diff --git a/models/clip/common-cpu/src/lib.rs b/models/clip/common-cpu/src/lib.rs
@@ -1,2 +1,41 @@
+use clip::{ClipStorage, WeightLoader};
+use operators::{common_cpu::Cpu, conv, QueueOf, TopoNode};
+use std::marker::PhantomData;
+
+pub struct Operators<N = Cpu>(PhantomData<N>);
+
+pub struct Weights<'w> {
+    patch_embd_w: &'w [u8],
+    patch_embd_b: &'w [u8],
+}
+
+impl<N> clip::Operators for Operators<N>
+where
+    N: TopoNode<Cpu>,
+{
+    type Hardware = Cpu;
+    type TopoNode = Cpu;
+    type Conv = conv::common_cpu::ConvIm2Col;
+}
+
+impl<'w> Weights<'w> {
+    pub fn new(model: &'w ClipStorage<&'w [u8]>) -> Self {
+        Self {
+            patch_embd_w: model.patch_embd_w,
+            patch_embd_b: model.patch_embd_b,
+        }
+    }
+}
+
+impl WeightLoader for Weights<'_> {
+    type Hardware = Cpu;
+    type Weight<'s> = &'s [u8] where Self: 's;
+
+    #[inline]
+    fn patch_embd<'a>(&'a self, _queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2] {
+        [self.patch_embd_w, self.patch_embd_b]
+    }
+}
+
 #[cfg(test)]
 mod test_infer;
diff --git a/models/clip/common-cpu/src/test_infer.rs b/models/clip/common-cpu/src/test_infer.rs
@@ -1,8 +1,12 @@
-﻿use clip::{ClipMeta, ClipStorage, Image};
+﻿use crate::{Operators, Weights};
+use clip::{ClipArgs, ClipMeta, ClipStorage, ClipWorker, Image};
 use gguf::GGufModel;
+use operators::common_cpu::{Cpu, ThisThread};
 use std::time::Instant;
 use test_utils::Inference;
 
+type Worker<'w> = ClipWorker<Operators, Weights<'w>>;
+
 #[test]
 fn test_infer() {
     let Some(Inference { model, .. }) = Inference::load() else {
@@ -33,8 +37,38 @@ fn test_infer() {
     println!("load image {:?}", time.elapsed());
 
     let time = Instant::now();
-    let _slices = image
+    let slices = image
         .slice_uhd(9, d_image, d_patch)
         .normalize(dt_embd, image_mean, image_std);
     println!("slice image {:?}", time.elapsed());
+
+    let weights = Weights::new(&storage);
+    let mut worker = Worker::new(&Cpu, meta.clone(), weights);
+
+    let whole = slices.whole();
+    worker
+        .launch(
+            ClipArgs {
+                raw: whole.to_nchw(),
+            },
+            &mut [],
+            &ThisThread,
+        )
+        .unwrap();
+
+    let [x, y] = slices.grid();
+    for i in 0..y {
+        for j in 0..x {
+            let patch = slices.patch(j, i);
+            worker
+                .launch(
+                    ClipArgs {
+                        raw: patch.to_nchw(),
+                    },
+                    &mut [],
+                    &ThisThread,
+                )
+                .unwrap();
+        }
+    }
 }
diff --git a/models/clip/common/Cargo.toml b/models/clip/common/Cargo.toml
@@ -10,6 +10,7 @@ authors = ["YdrMaster <ydrml@hotmail.com>"]
 common.workspace = true
 gguf.workspace = true
 tensor.workspace = true
+operators.workspace = true
 itertools.workspace = true
 image = "0.25"
 rayon = "1.10"
diff --git a/models/clip/common/src/args.rs b/models/clip/common/src/args.rs
@@ -0,0 +1,7 @@
+﻿use operators::Hardware;
+use tensor::Tensor;
+
+pub struct Args<'a, H: Hardware> {
+    /// shape: [n, c, h, w]
+    pub raw: Tensor<&'a [H::Byte]>,
+}
diff --git a/models/clip/common/src/compute.rs b/models/clip/common/src/compute.rs
@@ -0,0 +1,148 @@
+use super::{args::Args, ClipMeta};
+use operators::{
+    conv::{self, Conv},
+    ByteOf, Hardware, LaunchError, Operator, QueueAlloc, QueueOf, TopoNode,
+};
+use std::ops::{Deref, DerefMut};
+use tensor::Tensor;
+
+pub trait Operators {
+    type Hardware: Hardware;
+    type TopoNode: TopoNode<Self::Hardware>;
+    type Conv: Conv<Self::Hardware>;
+}
+
+pub trait WeightLoader {
+    type Hardware: Hardware;
+    type Weight<'s>: Deref<Target = [ByteOf<Self::Hardware>]> + 's
+    where
+        Self: 's;
+
+    fn patch_embd<'a>(&'a self, queue: &'a QueueOf<Self::Hardware>) -> [Self::Weight<'a>; 2];
+}
+
+pub struct ClipWorker<Ops: Operators, W> {
+    meta: ClipMeta,
+    weights: WeightDecorator<W>,
+    conv: Ops::Conv,
+    pub debug: bool,
+}
+
+impl<Ops: Operators, W> ClipWorker<Ops, W> {
+    pub fn new(node: &Ops::TopoNode, meta: ClipMeta, weights: W) -> Self {
+        let processor = node.processor();
+        Self {
+            weights: meta.decorator(weights),
+            meta,
+            conv: Ops::Conv::new(processor),
+            debug: true,
+        }
+    }
+
+    #[inline]
+    pub const fn meta(&self) -> &ClipMeta {
+        &self.meta
+    }
+}
+
+impl<Ops, W> ClipWorker<Ops, W>
+where
+    Ops: Operators,
+    W: WeightLoader<Hardware = Ops::Hardware>,
+    ByteOf<Ops::Hardware>: 'static,
+{
+    pub fn launch<QA>(
+        &mut self,
+        args: Args<Ops::Hardware>,
+        workspace: &mut [ByteOf<Ops::Hardware>],
+        queue_alloc: &QA,
+    ) -> Result<(), LaunchError>
+    where
+        QA: QueueAlloc<Hardware = Ops::Hardware>,
+    {
+        let Args { raw } = args;
+        let queue = queue_alloc.queue();
+
+        let ClipMeta { dt_embd, .. } = self.meta;
+
+        let [k, b] = self.weights.patch_embd(queue);
+        let &[n, _, h, w] = raw.shape() else {
+            unreachable!()
+        };
+        let &[m, _, hk, wk] = k.shape() else {
+            unreachable!()
+        };
+
+        let mut embd = Tensor::new(dt_embd, &[n, m, h / hk, w / wk]).map(|s| queue_alloc.alloc(s));
+        self.conv(&mut embd, &raw, &k, &b, workspace, queue_alloc)
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
+impl<Ops, W> ClipWorker<Ops, W>
+where
+    Ops: Operators,
+    W: WeightLoader<Hardware = Ops::Hardware>,
+{
+    fn conv<Y, X, W_, B, QA>(
+        &self,
+        y: &mut Tensor<Y>,
+        x: &Tensor<X>,
+        w: &Tensor<W_>,
+        b: &Tensor<B>,
+        workspace: &mut [ByteOf<Ops::Hardware>],
+        queue_alloc: &QA,
+    ) -> Result<(), LaunchError>
+    where
+        Y: DerefMut<Target = [ByteOf<Ops::Hardware>]>,
+        X: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        W_: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        B: Deref<Target = [ByteOf<Ops::Hardware>]>,
+        QA: QueueAlloc<Hardware = Ops::Hardware>,
+    {
+        self.conv.launch(
+            &conv::Args {
+                y_layout: y.layout(),
+                y_base: y.base_mut(),
+                x_layout: x.layout(),
+                x_base: x.base(),
+                w_layout: w.layout(),
+                w_base: w.base(),
+                b_layout: b.layout(),
+                b_base: b.base(),
+                strides: [self.meta.d_patch; 2],
+                dilations: [1; 2],
+                pads: [0; 4],
+            },
+            workspace,
+            queue_alloc,
+        )
+    }
+}
+
+struct WeightDecorator<W> {
+    weights: W,
+    patch_embd_w: Tensor<usize>,
+    patch_embd_b: Tensor<usize>,
+}
+
+impl ClipMeta {
+    fn decorator<W>(&self, weights: W) -> WeightDecorator<W> {
+        WeightDecorator {
+            patch_embd_w: self.patch_embd_w(),
+            patch_embd_b: self.patch_embd_b(),
+            weights,
+        }
+    }
+}
+
+impl<W: WeightLoader> WeightDecorator<W> {
+    #[inline]
+    pub fn patch_embd<'a>(&'a self, queue: &'a QueueOf<W::Hardware>) -> [Tensor<W::Weight<'a>>; 2] {
+        let [w, b] = self.weights.patch_embd(queue);
+        [
+            self.patch_embd_w.clone().map(|_| w),
+            self.patch_embd_b.clone().map(|_| b),
+        ]
+    }
+}
diff --git a/models/clip/common/src/lib.rs b/models/clip/common/src/lib.rs
@@ -1,11 +1,21 @@
+mod args;
+mod compute;
 mod image;
 mod storage;
 
 use gguf::ggml_quants::digit_layout::DigitLayout;
-use tensor::Tensor;
 
+pub use args::Args as ClipArgs;
+pub use compute::{ClipWorker, Operators, WeightLoader};
 pub use image::{Image, ImageGrid};
 pub use storage::Storage as ClipStorage;
+pub use tensor::Tensor;
+pub mod ext {
+    pub use gguf::{
+        ext::{utok, Mmap},
+        ggml_quants,
+    };
+}
 
 #[derive(Clone, Debug)]
 pub struct ClipMeta {
@@ -67,12 +77,12 @@ impl ClipMeta {
         }
     }
 
-    pub fn patch_embd(&self) -> Tensor<usize> {
+    pub fn patch_embd_w(&self) -> Tensor<usize> {
         let &Self { d, d_patch, .. } = self;
         Tensor::new(self.dt_mat, &[d, 3, d_patch, d_patch])
     }
 
-    pub fn patch_embd_bias(&self) -> Tensor<usize> {
+    pub fn patch_embd_b(&self) -> Tensor<usize> {
         let &Self { d, .. } = self;
         Tensor::new(self.dt_bias, &[d])
     }