|
241 | 241 | #define GGML_ROPE_TYPE_MROPE 8 |
242 | 242 | #define GGML_ROPE_TYPE_VISION 24 |
243 | 243 |
|
244 | | -#define GGUF_MAGIC "GGUF" |
245 | | - |
246 | | -#define GGUF_VERSION 3 |
247 | | - |
248 | | -#define GGUF_DEFAULT_ALIGNMENT 32 |
249 | | - |
250 | 244 | #define GGML_UNUSED(x) (void)(x) |
251 | 245 |
|
252 | 246 | #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) |
@@ -409,12 +403,6 @@ extern "C" { |
409 | 403 | GGML_PREC_F32, |
410 | 404 | }; |
411 | 405 |
|
412 | | - enum ggml_backend_type { |
413 | | - GGML_BACKEND_TYPE_CPU = 0, |
414 | | - GGML_BACKEND_TYPE_GPU = 10, |
415 | | - GGML_BACKEND_TYPE_GPU_SPLIT = 20, |
416 | | - }; |
417 | | - |
418 | 406 | // model file types |
419 | 407 | enum ggml_ftype { |
420 | 408 | GGML_FTYPE_UNKNOWN = -1, |
@@ -593,8 +581,6 @@ extern "C" { |
593 | 581 | struct ggml_tensor { |
594 | 582 | enum ggml_type type; |
595 | 583 |
|
596 | | - GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor"); |
597 | | - |
598 | 584 | struct ggml_backend_buffer * buffer; |
599 | 585 |
|
600 | 586 | int64_t ne[GGML_MAX_DIMS]; // number of elements |
@@ -623,7 +609,14 @@ extern "C" { |
623 | 609 |
|
624 | 610 | void * extra; // extra things e.g. for ggml-cuda.cu |
625 | 611 |
|
| 612 | + union { |
626 | 613 | char padding[8]; |
| 614 | + union { |
| 615 | + char trimmed_pad_1[3]; |
| 616 | + char clblast_offload_gpu; //we sneak the flag for gpu offloading for clblast into the padding |
| 617 | + char trimmed_pad_2[4]; |
| 618 | + }; |
| 619 | + }; |
627 | 620 | }; |
628 | 621 |
|
629 | 622 | static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); |
@@ -2117,132 +2110,6 @@ extern "C" { |
2117 | 2110 | int64_t n_per_row, |
2118 | 2111 | const float * imatrix); |
2119 | 2112 |
|
2120 | | - // |
2121 | | - // gguf |
2122 | | - // |
2123 | | - |
2124 | | - enum gguf_type { |
2125 | | - GGUF_TYPE_UINT8 = 0, |
2126 | | - GGUF_TYPE_INT8 = 1, |
2127 | | - GGUF_TYPE_UINT16 = 2, |
2128 | | - GGUF_TYPE_INT16 = 3, |
2129 | | - GGUF_TYPE_UINT32 = 4, |
2130 | | - GGUF_TYPE_INT32 = 5, |
2131 | | - GGUF_TYPE_FLOAT32 = 6, |
2132 | | - GGUF_TYPE_BOOL = 7, |
2133 | | - GGUF_TYPE_STRING = 8, |
2134 | | - GGUF_TYPE_ARRAY = 9, |
2135 | | - GGUF_TYPE_UINT64 = 10, |
2136 | | - GGUF_TYPE_INT64 = 11, |
2137 | | - GGUF_TYPE_FLOAT64 = 12, |
2138 | | - GGUF_TYPE_COUNT, // marks the end of the enum |
2139 | | - }; |
2140 | | - |
2141 | | - struct gguf_context; |
2142 | | - |
2143 | | - struct gguf_init_params { |
2144 | | - bool no_alloc; |
2145 | | - |
2146 | | - // if not NULL, create a ggml_context and allocate the tensor data in it |
2147 | | - struct ggml_context ** ctx; |
2148 | | - }; |
2149 | | - |
2150 | | - GGML_API struct gguf_context * gguf_init_empty(void); |
2151 | | - GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); |
2152 | | - //GGML_API struct gguf_context * gguf_init_from_buffer(..); |
2153 | | - |
2154 | | - GGML_API void gguf_free(struct gguf_context * ctx); |
2155 | | - |
2156 | | - GGML_API const char * gguf_type_name(enum gguf_type type); |
2157 | | - |
2158 | | - GGML_API int gguf_get_version (const struct gguf_context * ctx); |
2159 | | - GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx); |
2160 | | - GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); |
2161 | | - GGML_API void * gguf_get_data (const struct gguf_context * ctx); |
2162 | | - |
2163 | | - GGML_API int gguf_get_n_kv(const struct gguf_context * ctx); |
2164 | | - GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key); |
2165 | | - GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id); |
2166 | | - |
2167 | | - GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id); |
2168 | | - GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id); |
2169 | | - |
2170 | | - // will abort if the wrong type is used for the key |
2171 | | - GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id); |
2172 | | - GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id); |
2173 | | - GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id); |
2174 | | - GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id); |
2175 | | - GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id); |
2176 | | - GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id); |
2177 | | - GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id); |
2178 | | - GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id); |
2179 | | - GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id); |
2180 | | - GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id); |
2181 | | - GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id); |
2182 | | - GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id); |
2183 | | - GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id); |
2184 | | - GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id); |
2185 | | - GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id); |
2186 | | - GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); |
2187 | | - |
2188 | | - GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx); |
2189 | | - GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name); |
2190 | | - GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i); |
2191 | | - GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i); |
2192 | | - GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i); |
2193 | | - |
2194 | | - // removes key if it exists |
2195 | | - GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key); |
2196 | | - |
2197 | | - // overrides existing values or adds a new one |
2198 | | - GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); |
2199 | | - GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); |
2200 | | - GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); |
2201 | | - GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); |
2202 | | - GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); |
2203 | | - GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); |
2204 | | - GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); |
2205 | | - GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); |
2206 | | - GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); |
2207 | | - GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); |
2208 | | - GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val); |
2209 | | - GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); |
2210 | | - GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); |
2211 | | - GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); |
2212 | | - |
2213 | | - // set or add KV pairs from another context |
2214 | | - GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); |
2215 | | - |
2216 | | - // manage tensor info |
2217 | | - GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); |
2218 | | - GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); |
2219 | | - GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size); |
2220 | | - |
2221 | | - // writing gguf files can be done in 2 ways: |
2222 | | - // |
2223 | | - // - write the entire gguf_context to a binary file in a single pass: |
2224 | | - // |
2225 | | - // gguf_write_to_file(ctx, fname); |
2226 | | - // |
2227 | | - // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data: |
2228 | | - // |
2229 | | - // FILE * f = fopen(fname, "wb"); |
2230 | | - // fseek(f, gguf_get_meta_size(ctx), SEEK_SET); |
2231 | | - // fwrite(f, ...); |
2232 | | - // void * data = gguf_meta_get_meta_data(ctx); |
2233 | | - // fseek(f, 0, SEEK_SET); |
2234 | | - // fwrite(f, data, gguf_get_meta_size(ctx)); |
2235 | | - // free(data); |
2236 | | - // fclose(f); |
2237 | | - // |
2238 | | - |
2239 | | - // write the entire context to a binary file |
2240 | | - GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); |
2241 | | - |
2242 | | - // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding |
2243 | | - GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); |
2244 | | - GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data); |
2245 | | - |
2246 | 2113 | #ifdef __cplusplus |
2247 | 2114 | // restrict not standard in C++ |
2248 | 2115 | # if defined(__GNUC__) |
|
0 commit comments