|
256 | 256 | # // note: these values should be synchronized with ggml_rope |
257 | 257 | # // TODO: maybe move this enum to ggml.h (ggml_rope_type) |
258 | 258 | # enum llama_rope_type { |
259 | | -# LLAMA_ROPE_TYPE_NONE = -1, |
260 | | -# LLAMA_ROPE_TYPE_NORM = 0, |
261 | | -# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, |
| 259 | +# LLAMA_ROPE_TYPE_NONE = -1, |
| 260 | +# LLAMA_ROPE_TYPE_NORM = 0, |
| 261 | +# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX, |
| 262 | +# LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE, |
| 263 | +# LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION, |
262 | 264 | # }; |
263 | 265 | LLAMA_ROPE_TYPE_NONE = -1 |
264 | 266 | LLAMA_ROPE_TYPE_NORM = 0 |
265 | 267 | LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2 |
| 268 | +LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8 |
| 269 | +LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24 |
266 | 270 |
|
267 | 271 |
|
268 | 272 | # enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file |
@@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float: |
1265 | 1269 | # // Functions to access the model's GGUF metadata scalar values |
1266 | 1270 | # // - The functions return the length of the string on success, or -1 on failure |
1267 | 1271 | # // - The output string is always null-terminated and cleared on failure |
| 1272 | +# // - When retrieving a string, an extra byte must be allocated to account for the null terminator |
1268 | 1273 | # // - GGUF array values are not supported by these functions |
1269 | 1274 |
|
1270 | 1275 |
|
@@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int: |
1378 | 1383 | ... |
1379 | 1384 |
|
1380 | 1385 |
|
1381 | | -# // Get a llama model tensor |
1382 | | -# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name); |
1383 | | -@ctypes_function( |
1384 | | - "llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p |
1385 | | -) |
1386 | | -def llama_get_model_tensor( |
1387 | | - model: llama_model_p, name: Union[ctypes.c_char_p, bytes], / |
1388 | | -) -> ctypes.c_void_p: |
1389 | | - """Get a llama model tensor""" |
1390 | | - ... |
1391 | | - |
1392 | | - |
1393 | 1386 | # // Returns true if the model contains an encoder that requires llama_encode() call |
1394 | 1387 | # LLAMA_API bool llama_model_has_encoder(const struct llama_model * model); |
1395 | 1388 | @ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool) |
@@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar( |
3336 | 3329 | ... |
3337 | 3330 |
|
3338 | 3331 |
|
| 3332 | +# /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first. |
3339 | 3333 | # LLAMA_API struct llama_sampler * llama_sampler_init_penalties( |
3340 | | -# int32_t n_vocab, // llama_n_vocab() |
3341 | | -# llama_token special_eos_id, // llama_token_eos() |
3342 | | -# llama_token linefeed_id, // llama_token_nl() |
3343 | | -# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) |
3344 | | -# float penalty_repeat, // 1.0 = disabled |
3345 | | -# float penalty_freq, // 0.0 = disabled |
3346 | | -# float penalty_present, // 0.0 = disabled |
3347 | | -# bool penalize_nl, // consider newlines as a repeatable token |
3348 | | -# bool ignore_eos); // ignore the end-of-sequence token |
| 3334 | +# int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) |
| 3335 | +# float penalty_repeat, // 1.0 = disabled |
| 3336 | +# float penalty_freq, // 0.0 = disabled |
| 3337 | +# float penalty_present); // 0.0 = disabled |
3349 | 3338 | @ctypes_function( |
3350 | 3339 | "llama_sampler_init_penalties", |
3351 | | - [ |
3352 | | - ctypes.c_int32, |
3353 | | - llama_token, |
3354 | | - llama_token, |
3355 | | - ctypes.c_int32, |
3356 | | - ctypes.c_float, |
3357 | | - ctypes.c_float, |
3358 | | - ctypes.c_float, |
3359 | | - ctypes.c_bool, |
3360 | | - ctypes.c_bool, |
3361 | | - ], |
| 3340 | + [ctypes.c_int32, ctypes.c_float, ctypes.c_float, ctypes.c_float], |
3362 | 3341 | llama_sampler_p_ctypes, |
3363 | 3342 | ) |
3364 | 3343 | def llama_sampler_init_penalties( |
3365 | | - n_vocab: int, |
3366 | | - special_eos_id: int, |
3367 | | - linefeed_id: int, |
3368 | 3344 | penalty_last_n: int, |
3369 | 3345 | penalty_repeat: float, |
3370 | 3346 | penalty_freq: float, |
3371 | 3347 | penalty_present: float, |
3372 | | - penalize_nl: bool, |
3373 | | - ignore_eos: bool, |
3374 | 3348 | /, |
3375 | 3349 | ) -> llama_sampler_p: |
3376 | 3350 | ... |
|
0 commit comments