|
41 | 41 | #endif |
42 | 42 |
|
43 | 43 | // |
44 | | -// helpers |
| 44 | +// tensor loading (TODO: add llama_tesor_loader?) |
45 | 45 | // |
46 | 46 |
|
47 | 47 | static int llama_get_device_count(const llama_model & model) { |
48 | 48 | return (int) model.devices.size(); |
49 | 49 | } |
50 | 50 |
|
51 | | -// |
52 | | -// model loading and saving |
53 | | -// |
54 | | - |
55 | 51 | // checks if the weight tensor can be used with the specified buffer type and device |
56 | 52 | static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w, ggml_op op, ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev) { |
57 | 53 | GGML_ASSERT(w != nullptr); |
@@ -11319,21 +11315,6 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { |
11319 | 11315 | } |
11320 | 11316 | } |
11321 | 11317 |
|
11322 | | -struct llama_lora_adapter * llama_lora_adapter_init(struct llama_model * model, const char * path_lora) { |
11323 | | - struct llama_lora_adapter * adapter = new llama_lora_adapter(); |
11324 | | - |
11325 | | - try { |
11326 | | - llama_lora_adapter_init_impl(*model, path_lora, *adapter); |
11327 | | - return adapter; |
11328 | | - } catch (const std::exception & err) { |
11329 | | - LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what()); |
11330 | | - |
11331 | | - delete adapter; |
11332 | | - } |
11333 | | - |
11334 | | - return nullptr; |
11335 | | -} |
11336 | | - |
11337 | 11318 | int32_t llama_lora_adapter_set( |
11338 | 11319 | struct llama_context * ctx, |
11339 | 11320 | struct llama_lora_adapter * adapter, |
@@ -11585,6 +11566,7 @@ struct llama_model * llama_load_model_from_file( |
11585 | 11566 | } else if (status == -2) { |
11586 | 11567 | LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); |
11587 | 11568 | } |
| 11569 | + |
11588 | 11570 | llama_free_model(model); |
11589 | 11571 | return nullptr; |
11590 | 11572 | } |
@@ -11943,20 +11925,6 @@ struct llama_context * llama_new_context_with_model( |
11943 | 11925 | return ctx; |
11944 | 11926 | } |
11945 | 11927 |
|
11946 | | -uint32_t llama_model_quantize( |
11947 | | - const char * fname_inp, |
11948 | | - const char * fname_out, |
11949 | | - const llama_model_quantize_params * params) { |
11950 | | - try { |
11951 | | - llama_model_quantize_internal(fname_inp, fname_out, params); |
11952 | | - } catch (const std::exception & err) { |
11953 | | - LLAMA_LOG_ERROR("%s: failed to quantize: %s\n", __func__, err.what()); |
11954 | | - return 1; |
11955 | | - } |
11956 | | - |
11957 | | - return 0; |
11958 | | -} |
11959 | | - |
11960 | 11928 | // |
11961 | 11929 | // kv cache |
11962 | 11930 | // |
@@ -12343,4 +12311,3 @@ void llama_perf_context_reset(struct llama_context * ctx) { |
12343 | 12311 | ctx->t_eval_us = ctx->n_eval = 0; |
12344 | 12312 | ctx->t_p_eval_us = ctx->n_p_eval = 0; |
12345 | 12313 | } |
12346 | | - |
0 commit comments