Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Checks: >
modernize-make-shared,
modernize-use-nullptr,
modernize-use-override,
modernize-pass-by-value,
modernize-return-braced-init-list,
modernize-deprecated-headers,
HeaderFilterRegex: '^$'
WarningsAsErrors: ''
FormatStyle: none
36 changes: 18 additions & 18 deletions clip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ class CLIPEmbeddings : public GGMLBlock {
int64_t num_positions;
bool force_clip_f32;

void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
enum ggml_type token_wtype = GGML_TYPE_F32;
if (!force_clip_f32) {
token_wtype = get_type(prefix + "token_embedding.weight", tensor_types, GGML_TYPE_F32);
Expand Down Expand Up @@ -587,7 +587,7 @@ class CLIPEmbeddings : public GGMLBlock {

GGML_ASSERT(input_ids->ne[0] == position_embed_weight->ne[1]);
input_ids = ggml_reshape_3d(ctx, input_ids, input_ids->ne[0], 1, input_ids->ne[1]);
auto token_embedding = ggml_get_rows(ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids);
auto token_embedding = ggml_get_rows(ctx, custom_embed_weight != nullptr ? custom_embed_weight : token_embed_weight, input_ids);
token_embedding = ggml_reshape_3d(ctx, token_embedding, token_embedding->ne[0], token_embedding->ne[1], token_embedding->ne[3]);

// token_embedding + position_embedding
Expand All @@ -606,7 +606,7 @@ class CLIPVisionEmbeddings : public GGMLBlock {
int64_t image_size;
int64_t num_patches;
int64_t num_positions;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
enum ggml_type patch_wtype = GGML_TYPE_F16;
enum ggml_type class_wtype = GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32;
Expand Down Expand Up @@ -641,10 +641,10 @@ class CLIPVisionEmbeddings : public GGMLBlock {
// concat(patch_embedding, class_embedding) + position_embedding
struct ggml_tensor* patch_embedding;
int64_t N = pixel_values->ne[3];
patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, NULL, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
patch_embedding = ggml_nn_conv_2d(ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
patch_embedding = ggml_reshape_3d(ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
patch_embedding = ggml_cont(ctx, ggml_permute(ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
patch_embedding = ggml_reshape_4d(ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]

struct ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, N);
class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim]
Expand All @@ -669,7 +669,7 @@ enum CLIPVersion {

class CLIPTextModel : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
if (version == OPEN_CLIP_VIT_BIGG_14) {
enum ggml_type wtype = GGML_TYPE_F32;
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
Expand Down Expand Up @@ -735,8 +735,8 @@ class CLIPTextModel : public GGMLBlock {
if (return_pooled) {
auto text_projection = params["text_projection"];
ggml_tensor* pooled = ggml_view_1d(ctx, x, hidden_size, x->nb[1] * max_token_idx);
if (text_projection != NULL) {
pooled = ggml_nn_linear(ctx, pooled, text_projection, NULL);
if (text_projection != nullptr) {
pooled = ggml_nn_linear(ctx, pooled, text_projection, nullptr);
} else {
LOG_DEBUG("identity projection");
}
Expand Down Expand Up @@ -814,7 +814,7 @@ class CLIPProjection : public UnaryBlock {
int64_t out_features;
bool transpose_weight;

void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") override {
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
if (transpose_weight) {
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
Expand All @@ -831,12 +831,12 @@ class CLIPProjection : public UnaryBlock {
out_features(out_features),
transpose_weight(transpose_weight) {}

struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
struct ggml_tensor* w = params["weight"];
if (transpose_weight) {
w = ggml_cont(ctx, ggml_transpose(ctx, w));
}
return ggml_nn_linear(ctx, x, w, NULL);
return ggml_nn_linear(ctx, x, w, nullptr);
}
};

Expand Down Expand Up @@ -894,7 +894,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
model.init(params_ctx, tensor_types, prefix);
}

std::string get_desc() {
std::string get_desc() override {
return "clip";
}

Expand All @@ -921,17 +921,17 @@ struct CLIPTextModelRunner : public GGMLRunner {

struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
int num_custom_embeddings = 0,
void* custom_embeddings_data = NULL,
void* custom_embeddings_data = nullptr,
size_t max_token_idx = 0,
bool return_pooled = false,
int clip_skip = -1) {
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);

input_ids = to_backend(input_ids);

struct ggml_tensor* embeddings = NULL;
struct ggml_tensor* embeddings = nullptr;

if (num_custom_embeddings > 0 && custom_embeddings_data != NULL) {
if (num_custom_embeddings > 0 && custom_embeddings_data != nullptr) {
auto token_embed_weight = model.get_token_embed_weight();
auto custom_embeddings = ggml_new_tensor_2d(compute_ctx,
token_embed_weight->type,
Expand All @@ -958,7 +958,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
bool return_pooled,
int clip_skip,
ggml_tensor** output,
ggml_context* output_ctx = NULL) {
ggml_context* output_ctx = nullptr) {
auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
};
Expand Down
14 changes: 7 additions & 7 deletions common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class ResBlock : public GGMLBlock {
}
}

virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = NULL) {
virtual struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = nullptr) {
// For dims==3, we reduce dimension from 5d to 4d by merging h and w, in order not to change ggml
// [N, c, t, h, w] => [N, c, t, h * w]
// x: [N, channels, h, w] if dims == 2 else [N, channels, t, h, w]
Expand All @@ -131,7 +131,7 @@ class ResBlock : public GGMLBlock {
auto out_layers_0 = std::dynamic_pointer_cast<GroupNorm32>(blocks["out_layers.0"]);
auto out_layers_3 = std::dynamic_pointer_cast<UnaryBlock>(blocks["out_layers.3"]);

if (emb == NULL) {
if (emb == nullptr) {
GGML_ASSERT(skip_t_emb);
}

Expand Down Expand Up @@ -182,7 +182,7 @@ class GEGLU : public UnaryBlock {
int64_t dim_in;
int64_t dim_out;

void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") override {
enum ggml_type wtype = get_type(prefix + "proj.weight", tensor_types, GGML_TYPE_F32);
enum ggml_type bias_wtype = GGML_TYPE_F32;
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
Expand All @@ -193,7 +193,7 @@ class GEGLU : public UnaryBlock {
GEGLU(int64_t dim_in, int64_t dim_out)
: dim_in(dim_in), dim_out(dim_out) {}

struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
// x: [ne3, ne2, ne1, dim_in]
// return: [ne3, ne2, ne1, dim_out]
struct ggml_tensor* w = params["proj.weight"];
Expand Down Expand Up @@ -222,7 +222,7 @@ class GELU : public UnaryBlock {
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
}

struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) override {
// x: [ne3, ne2, ne1, dim_in]
// return: [ne3, ne2, ne1, dim_out]
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
Expand Down Expand Up @@ -325,7 +325,7 @@ class CrossAttention : public GGMLBlock {
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]

x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, NULL, false, false, flash_attn); // [N, n_token, inner_dim]
x = ggml_nn_attention_ext(ctx, backend, q, k, v, n_head, nullptr, false, false, flash_attn); // [N, n_token, inner_dim]

x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
return x;
Expand Down Expand Up @@ -483,7 +483,7 @@ class SpatialTransformer : public GGMLBlock {

class AlphaBlender : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") override {
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
enum ggml_type wtype = GGML_TYPE_F32;
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
Expand Down
Loading
Loading