1- #ifndef CLIP_H
2- #define CLIP_H
1+ #pragma once
32
43#include "ggml.h"
54#include <stddef.h>
65#include <stdint.h>
76
8- #ifdef LLAMA_SHARED
9- # if defined(_WIN32 ) && !defined(__MINGW32__ )
10- # ifdef LLAMA_BUILD
11- # define CLIP_API __declspec(dllexport)
12- # else
13- # define CLIP_API __declspec(dllimport)
14- # endif
15- # else
16- # define CLIP_API __attribute__ ((visibility ("default")))
17- # endif
18- #else
19- # define CLIP_API
20- #endif
21-
22- #ifdef __cplusplus
23- extern "C" {
24- #endif
25-
267struct clip_ctx ;
278
289struct clip_image_size {
@@ -39,97 +20,80 @@ struct clip_context_params {
3920 enum ggml_log_level verbosity ;
4021};
4122
42- // deprecated, use clip_init
43- CLIP_API struct clip_ctx * clip_model_load (const char * fname , int verbosity );
44-
45- CLIP_API struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
23+ struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
4624
47- CLIP_API void clip_free (struct clip_ctx * ctx );
25+ void clip_free (struct clip_ctx * ctx );
4826
49- CLIP_API size_t clip_embd_nbytes (const struct clip_ctx * ctx );
50- CLIP_API size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
27+ size_t clip_embd_nbytes (const struct clip_ctx * ctx );
28+ size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
5129
52- CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx );
53- CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx );
54- CLIP_API int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
30+ int32_t clip_get_image_size (const struct clip_ctx * ctx );
31+ int32_t clip_get_patch_size (const struct clip_ctx * ctx );
32+ int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
5533
5634// TODO: should be enum, not string
57- CLIP_API const char * clip_patch_merge_type (const struct clip_ctx * ctx );
35+ const char * clip_patch_merge_type (const struct clip_ctx * ctx );
5836
59- CLIP_API const int32_t * clip_image_grid (const struct clip_ctx * ctx );
60- CLIP_API size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
37+ const int32_t * clip_image_grid (const struct clip_ctx * ctx );
38+ size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
6139
62- GGML_DEPRECATED (CLIP_API int clip_n_patches (const struct clip_ctx * ctx ),
63- "use clip_n_output_tokens instead" );
64- GGML_DEPRECATED (CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx , struct clip_image_f32 * img ),
65- "use clip_n_output_tokens instead" );
66-
67- CLIP_API int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
40+ int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
6841
6942// for M-RoPE, this will be the number of token positions in X and Y directions
7043// for other models, X will be the total number of tokens and Y will be 1
71- CLIP_API int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
72- CLIP_API int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
44+ int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
45+ int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
7346
7447// this should be equal to the embedding dimension of the text model
75- CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx );
48+ int clip_n_mmproj_embd (const struct clip_ctx * ctx );
7649
77- CLIP_API int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
78- CLIP_API void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
79- CLIP_API struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
50+ int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
51+ void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
52+ struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
8053
81- CLIP_API struct clip_image_size * clip_image_size_init (void );
82- CLIP_API struct clip_image_u8 * clip_image_u8_init (void );
83- CLIP_API struct clip_image_f32 * clip_image_f32_init (void );
84- CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
54+ struct clip_image_size * clip_image_size_init (void );
55+ struct clip_image_u8 * clip_image_u8_init (void );
56+ struct clip_image_f32 * clip_image_f32_init (void );
57+ struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
8558
8659// nx, ny are the output image dimensions
87- CLIP_API unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
60+ unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
8861
89- CLIP_API void clip_image_size_free (struct clip_image_size * img_size );
90- CLIP_API void clip_image_u8_free (struct clip_image_u8 * img );
91- CLIP_API void clip_image_f32_free (struct clip_image_f32 * img );
92- CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
93- CLIP_API void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
62+ void clip_image_size_free (struct clip_image_size * img_size );
63+ void clip_image_u8_free (struct clip_image_u8 * img );
64+ void clip_image_f32_free (struct clip_image_f32 * img );
65+ void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
66+ void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
9467
9568// use for accessing underlay data of clip_image_f32_batch
96- CLIP_API size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
97- CLIP_API size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
98- CLIP_API size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
99- CLIP_API struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
69+ size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
70+ size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
71+ size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
72+ struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
10073
10174/**
10275 * Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
10376 * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
10477 */
105- CLIP_API void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
78+ void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
10679
107- CLIP_API bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
80+ bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
10881
10982/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
110- CLIP_API bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
83+ bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
11184
11285/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
113- CLIP_API bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
114-
115- CLIP_API struct ggml_tensor * clip_get_newline_tensor (const struct clip_ctx * ctx );
116-
117- CLIP_API bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
118- CLIP_API bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
119-
120- CLIP_API bool clip_model_quantize (const char * fname_inp , const char * fname_out , int itype );
121-
122- CLIP_API int clip_is_minicpmv (const struct clip_ctx * ctx );
123- CLIP_API bool clip_is_glm (const struct clip_ctx * ctx );
124- CLIP_API bool clip_is_qwen2vl (const struct clip_ctx * ctx );
125- CLIP_API bool clip_is_llava (const struct clip_ctx * ctx );
126- CLIP_API bool clip_is_gemma3 (const struct clip_ctx * ctx );
86+ bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
12787
128- CLIP_API bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
88+ struct ggml_tensor * clip_get_newline_tensor ( const struct clip_ctx * ctx );
12989
90+ bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
91+ bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
13092
131- #ifdef __cplusplus
132- }
133- #endif
93+ int clip_is_minicpmv (const struct clip_ctx * ctx );
94+ bool clip_is_glm (const struct clip_ctx * ctx );
95+ bool clip_is_qwen2vl (const struct clip_ctx * ctx );
96+ bool clip_is_llava (const struct clip_ctx * ctx );
97+ bool clip_is_gemma3 (const struct clip_ctx * ctx );
13498
135- #endif // CLIP_H
99+ bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
0 commit comments