Skip to content

Commit ad38e87

Browse files
committed
rename everywhere
1 parent bd0714b commit ad38e87

File tree

7 files changed

+350
-326
lines changed

7 files changed

+350
-326
lines changed

examples/vision/vision.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ int main(int argc, char ** argv) {
122122
int n_prompt = 0;
123123

124124
// process image
125-
llama_vision_patches * img_patches = nullptr;
125+
llama_vision_tokens * img_tokens = nullptr;
126126
{
127127
const char * img_path = params.image[0].c_str();
128128
if (params.image[0].empty()) {
@@ -131,12 +131,12 @@ int main(int argc, char ** argv) {
131131
}
132132
llama_vision_bitmap * img = load_image_from_file(img_path);
133133
LOG_INF("loaded image %s, size = %d x %d\n", img_path, img->nx, img->ny);
134-
img_patches = llama_vision_patches_init(ctx, img);
135-
if (!img_patches) {
136-
LOG_ERR("failed to create image patches\n");
134+
img_tokens = llama_vision_tokenize(ctx, img);
135+
if (!img_tokens) {
136+
LOG_ERR("failed to create image tokens\n");
137137
return 1;
138138
}
139-
if (llama_vision_encode(ctx, img_patches)) {
139+
if (llama_vision_encode(ctx, img_tokens)) {
140140
LOG_ERR("failed to encode image\n");
141141
return 1;
142142
}

include/llama.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,9 @@ extern "C" {
229229
bool sorted;
230230
} llama_token_data_array;
231231

232-
struct llama_vision_patches;
232+
// Structure represents the basic input unit of vision model
233+
// This can be a processed image or slices of images under the hood
234+
struct llama_vision_tokens;
233235

234236
// represent an RGB image
235237
// size of data must be equal to 3*nx*ny
@@ -1286,12 +1288,15 @@ extern "C" {
12861288
LLAMA_API struct llama_vision_bitmap * llama_vision_bitmap_init(uint32_t nx, uint32_t ny);
12871289
LLAMA_API void llama_vision_bitmap_free(struct llama_vision_bitmap * bmp);
12881290

1289-
// Create patches from the RGB bitmap
1290-
LLAMA_API struct llama_vision_patches * llama_vision_patches_init(struct llama_context * ctx, llama_vision_bitmap * bmp);
1291-
LLAMA_API void llama_vision_patches_free(struct llama_vision_patches * p);
1291+
// Create image tokens from the RGB bitmap
1292+
LLAMA_API struct llama_vision_tokens * llama_vision_tokenize(struct llama_context * ctx, llama_vision_bitmap * bmp);
1293+
LLAMA_API void llama_vision_tokens_free(struct llama_vision_tokens * img_tokens);
1294+
1295+
// User must reserve N number of tokens in tokenized text prompt for each image
1296+
// LLAMA_API int32_t llama_vision_get_n_tokens(const llama_vision_img_tokens * img_tokens);
12921297

12931298
// Encode patches into embeddings
1294-
LLAMA_API int32_t llama_vision_encode(struct llama_context * ctx, struct llama_vision_patches * p);
1299+
LLAMA_API int32_t llama_vision_encode(struct llama_context * ctx, struct llama_vision_tokens * img_tokens);
12951300
LLAMA_API struct ggml_tensor * llama_vision_get_output_tensor(struct llama_context * ctx);
12961301

12971302
//

src/llama-context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ struct llama_context {
110110
struct ggml_tensor * inp_KQ_mask_cross; // F32 [n_outputs_enc, n_batch]
111111

112112
// vision
113-
clip_context vctx;
113+
llama_vision_context vctx;
114114
};
115115

116116
// TODO: make these methods of llama_context

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,8 +1268,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
12681268
{
12691269
std::string name;
12701270
ml.get_key(LLM_KV_VISION_VIT_PROJECTOR_TYPE, name, true);
1271-
vparams.proj_type = clip_projector_type_from_name(name);
1272-
if (vparams.proj_type == CLIP_PROJECTOR_TYPE_UNKNOWN) {
1271+
vparams.proj_type = vision_projector_type_from_name(name);
1272+
if (vparams.proj_type == VISION_PROJECTOR_TYPE_UNKNOWN) {
12731273
throw std::runtime_error(format("unsupported clip projector type: %s", name.c_str()));
12741274
}
12751275
}
@@ -3514,7 +3514,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
35143514
throw std::runtime_error("unknown vision architecture");
35153515
}
35163516

3517-
if (clip_n_mmproj_embd(clip) != hparams.n_embd) {
3517+
if (llama_vision_n_mmproj_embd(clip) != hparams.n_embd) {
35183518
std::runtime_error("model has vision, but n_mmproj_embd != n_embd");
35193519
}
35203520
}

src/llama-model.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ struct llama_model {
365365

366366
// vision
367367
bool has_vision = false;
368-
clip_vision_model clip;
368+
llama_vision_model clip;
369369

370370
private:
371371
struct impl;

0 commit comments

Comments
 (0)