1- #pragma once
1+ #ifndef CLIP_H
2+ #define CLIP_H
23
34#include "ggml.h"
45#include <stddef.h>
56#include <stdint.h>
67
8+ #ifdef LLAMA_SHARED
9+ # if defined(_WIN32 ) && !defined(__MINGW32__ )
10+ # ifdef LLAMA_BUILD
11+ # define CLIP_API __declspec(dllexport)
12+ # else
13+ # define CLIP_API __declspec(dllimport)
14+ # endif
15+ # else
16+ # define CLIP_API __attribute__ ((visibility ("default")))
17+ # endif
18+ #else
19+ # define CLIP_API
20+ #endif
21+
22+ #ifdef __cplusplus
23+ extern "C" {
24+ #endif
25+
726struct clip_ctx ;
827
928struct clip_image_size {
@@ -20,80 +39,97 @@ struct clip_context_params {
2039 enum ggml_log_level verbosity ;
2140};
2241
23- struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
42+ // deprecated, use clip_init
43+ CLIP_API struct clip_ctx * clip_model_load (const char * fname , int verbosity );
44+
45+ CLIP_API struct clip_ctx * clip_init (const char * fname , struct clip_context_params ctx_params );
2446
25- void clip_free (struct clip_ctx * ctx );
47+ CLIP_API void clip_free (struct clip_ctx * ctx );
2648
27- size_t clip_embd_nbytes (const struct clip_ctx * ctx );
28- size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
49+ CLIP_API size_t clip_embd_nbytes (const struct clip_ctx * ctx );
50+ CLIP_API size_t clip_embd_nbytes_by_img (const struct clip_ctx * ctx , int img_w , int img_h );
2951
30- int32_t clip_get_image_size (const struct clip_ctx * ctx );
31- int32_t clip_get_patch_size (const struct clip_ctx * ctx );
32- int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
52+ CLIP_API int32_t clip_get_image_size (const struct clip_ctx * ctx );
53+ CLIP_API int32_t clip_get_patch_size (const struct clip_ctx * ctx );
54+ CLIP_API int32_t clip_get_hidden_size (const struct clip_ctx * ctx );
3355
3456// TODO: should be enum, not string
35- const char * clip_patch_merge_type (const struct clip_ctx * ctx );
57+ CLIP_API const char * clip_patch_merge_type (const struct clip_ctx * ctx );
3658
37- const int32_t * clip_image_grid (const struct clip_ctx * ctx );
38- size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
59+ CLIP_API const int32_t * clip_image_grid (const struct clip_ctx * ctx );
60+ CLIP_API size_t get_clip_image_grid_size (const struct clip_ctx * ctx );
3961
40- int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
62+ GGML_DEPRECATED (CLIP_API int clip_n_patches (const struct clip_ctx * ctx ),
63+ "use clip_n_output_tokens instead" );
64+ GGML_DEPRECATED (CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx , struct clip_image_f32 * img ),
65+ "use clip_n_output_tokens instead" );
66+
67+ CLIP_API int clip_n_output_tokens (const struct clip_ctx * ctx , struct clip_image_f32 * img );
4168
4269// for M-RoPE, this will be the number of token positions in X and Y directions
4370// for other models, X will be the total number of tokens and Y will be 1
44- int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
45- int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
71+ CLIP_API int clip_n_output_tokens_x (const struct clip_ctx * ctx , struct clip_image_f32 * img );
72+ CLIP_API int clip_n_output_tokens_y (const struct clip_ctx * ctx , struct clip_image_f32 * img );
4673
4774// this should be equal to the embedding dimension of the text model
48- int clip_n_mmproj_embd (const struct clip_ctx * ctx );
75+ CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx );
4976
50- int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
51- void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
52- struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
77+ CLIP_API int clip_uhd_num_image_embeds_col (struct clip_ctx * ctx_clip );
78+ CLIP_API void clip_add_load_image_size (struct clip_ctx * ctx_clip , struct clip_image_size * load_image_size );
79+ CLIP_API struct clip_image_size * clip_get_load_image_size (struct clip_ctx * ctx_clip );
5380
54- struct clip_image_size * clip_image_size_init (void );
55- struct clip_image_u8 * clip_image_u8_init (void );
56- struct clip_image_f32 * clip_image_f32_init (void );
57- struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
81+ CLIP_API struct clip_image_size * clip_image_size_init (void );
82+ CLIP_API struct clip_image_u8 * clip_image_u8_init (void );
83+ CLIP_API struct clip_image_f32 * clip_image_f32_init (void );
84+ CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init (void ); // only used by libllava
5885
5986// nx, ny are the output image dimensions
60- unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
87+ CLIP_API unsigned char * clip_image_u8_get_data (struct clip_image_u8 * img , uint32_t * nx , uint32_t * ny );
6188
62- void clip_image_size_free (struct clip_image_size * img_size );
63- void clip_image_u8_free (struct clip_image_u8 * img );
64- void clip_image_f32_free (struct clip_image_f32 * img );
65- void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
66- void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
89+ CLIP_API void clip_image_size_free (struct clip_image_size * img_size );
90+ CLIP_API void clip_image_u8_free (struct clip_image_u8 * img );
91+ CLIP_API void clip_image_f32_free (struct clip_image_f32 * img );
92+ CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch );
93+ CLIP_API void clip_image_f32_batch_free (struct clip_image_f32_batch * batch );
6794
6895// use for accessing underlay data of clip_image_f32_batch
69- size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
70- size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
71- size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
72- struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
96+ CLIP_API size_t clip_image_f32_batch_n_images (const struct clip_image_f32_batch * batch ); // equivalent to batch->size()
97+ CLIP_API size_t clip_image_f32_batch_nx (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->nx
98+ CLIP_API size_t clip_image_f32_batch_ny (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->ny
99+ CLIP_API struct clip_image_f32 * clip_image_f32_get_img (const struct clip_image_f32_batch * batch , int idx ); // equivalent to batch[idx]->data
73100
74101/**
75102 * Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
76103 * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
77104 */
78- void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
105+ CLIP_API void clip_build_img_from_pixels (const unsigned char * rgb_pixels , int nx , int ny , struct clip_image_u8 * img );
79106
80- bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
107+ CLIP_API bool clip_image_load_from_file (const char * fname , struct clip_image_u8 * img );
81108
82109/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
83- bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
110+ CLIP_API bool clip_image_load_from_bytes (const unsigned char * bytes , size_t bytes_length , struct clip_image_u8 * img );
84111
85112/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
86- bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
113+ CLIP_API bool clip_image_preprocess (struct clip_ctx * ctx , const struct clip_image_u8 * img , struct clip_image_f32_batch * res_imgs );
114+
115+ CLIP_API struct ggml_tensor * clip_get_newline_tensor (const struct clip_ctx * ctx );
116+
117+ CLIP_API bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
118+ CLIP_API bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
119+
120+ CLIP_API bool clip_model_quantize (const char * fname_inp , const char * fname_out , int itype );
121+
122+ CLIP_API int clip_is_minicpmv (const struct clip_ctx * ctx );
123+ CLIP_API bool clip_is_glm (const struct clip_ctx * ctx );
124+ CLIP_API bool clip_is_qwen2vl (const struct clip_ctx * ctx );
125+ CLIP_API bool clip_is_llava (const struct clip_ctx * ctx );
126+ CLIP_API bool clip_is_gemma3 (const struct clip_ctx * ctx );
87127
88- struct ggml_tensor * clip_get_newline_tensor ( const struct clip_ctx * ctx );
128+ CLIP_API bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
89129
90- bool clip_image_encode (struct clip_ctx * ctx , int n_threads , struct clip_image_f32 * img , float * vec );
91- bool clip_image_batch_encode (struct clip_ctx * ctx , int n_threads , const struct clip_image_f32_batch * imgs , float * vec );
92130
93- int clip_is_minicpmv (const struct clip_ctx * ctx );
94- bool clip_is_glm (const struct clip_ctx * ctx );
95- bool clip_is_qwen2vl (const struct clip_ctx * ctx );
96- bool clip_is_llava (const struct clip_ctx * ctx );
97- bool clip_is_gemma3 (const struct clip_ctx * ctx );
131+ #ifdef __cplusplus
132+ }
133+ #endif
98134
99- bool clip_encode_float_image ( struct clip_ctx * ctx , int n_threads , float * img , int h , int w , float * vec );
135+ #endif // CLIP_H
0 commit comments