Skip to content

Commit 1a95850

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents 09263e0 + 59e991c commit 1a95850

File tree

10 files changed

+957
-442
lines changed

10 files changed

+957
-442
lines changed

common/arg.cpp

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ struct common_hf_file_res {
162162

163163
#ifdef LLAMA_USE_CURL
164164

165+
bool common_has_curl() {
166+
return true;
167+
}
168+
165169
#ifdef __linux__
166170
#include <linux/limits.h>
167171
#elif defined(_WIN32)
@@ -527,64 +531,89 @@ static bool common_download_model(
527531
return true;
528532
}
529533

530-
/**
531-
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
532-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
533-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
534-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
535-
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
536-
*
537-
* Return pair of <repo, file> (with "repo" already having tag removed)
538-
*
539-
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
540-
*/
541-
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
542-
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
543-
std::string tag = parts.size() > 1 ? parts.back() : "latest";
544-
std::string hf_repo = parts[0];
545-
if (string_split<std::string>(hf_repo, '/').size() != 2) {
546-
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
547-
}
548-
549-
// fetch model info from Hugging Face Hub API
534+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
550535
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
551536
curl_slist_ptr http_headers;
552-
std::string res_str;
537+
std::vector<char> res_buffer;
553538

554-
std::string model_endpoint = get_model_endpoint();
555-
556-
std::string url = model_endpoint + "v2/" + hf_repo + "/manifests/" + tag;
557539
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
558540
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
541+
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
559542
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
560543
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
561-
static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
544+
auto data_vec = static_cast<std::vector<char> *>(data);
545+
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
562546
return size * nmemb;
563547
};
564548
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
565-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
549+
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
566550
#if defined(_WIN32)
567551
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
568552
#endif
569-
if (!bearer_token.empty()) {
570-
std::string auth_header = "Authorization: Bearer " + bearer_token;
571-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
553+
if (params.timeout > 0) {
554+
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
555+
}
556+
if (params.max_size > 0) {
557+
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
572558
}
573-
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
574559
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
575-
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
560+
for (const auto & header : params.headers) {
561+
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
562+
}
576563
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
577564

578565
CURLcode res = curl_easy_perform(curl.get());
579566

580567
if (res != CURLE_OK) {
581-
throw std::runtime_error("error: cannot make GET request to HF API");
568+
std::string error_msg = curl_easy_strerror(res);
569+
throw std::runtime_error("error: cannot make GET request: " + error_msg);
582570
}
583571

584572
long res_code;
585-
std::string ggufFile = "";
586-
std::string mmprojFile = "";
587573
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
574+
575+
return { res_code, std::move(res_buffer) };
576+
}
577+
578+
/**
579+
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
580+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
581+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
582+
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
583+
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
584+
*
585+
* Return pair of <repo, file> (with "repo" already having tag removed)
586+
*
587+
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
588+
*/
589+
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
590+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
591+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
592+
std::string hf_repo = parts[0];
593+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
594+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
595+
}
596+
597+
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
598+
599+
// headers
600+
std::vector<std::string> headers;
601+
headers.push_back("Accept: application/json");
602+
if (!bearer_token.empty()) {
603+
headers.push_back("Authorization: Bearer " + bearer_token);
604+
}
605+
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
606+
// User-Agent header is already set in common_remote_get_content, no need to set it here
607+
608+
// make the request
609+
common_remote_params params;
610+
params.headers = headers;
611+
auto res = common_remote_get_content(url, params);
612+
long res_code = res.first;
613+
std::string res_str(res.second.data(), res.second.size());
614+
std::string ggufFile;
615+
std::string mmprojFile;
616+
588617
if (res_code == 200) {
589618
// extract ggufFile.rfilename in json, using regex
590619
{
@@ -618,6 +647,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
618647

619648
#else
620649

650+
bool common_has_curl() {
651+
return false;
652+
}
653+
621654
static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
622655
LOG_ERR("error: built without CURL, cannot download model from internet\n");
623656
return false;
@@ -640,6 +673,10 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
640673
return {};
641674
}
642675

676+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
677+
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
678+
}
679+
643680
#endif // LLAMA_USE_CURL
644681

645682
//

common/arg.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,12 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
7878

7979
// function to be used by test-arg-parser
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81+
bool common_has_curl();
82+
83+
struct common_remote_params {
84+
std::vector<std::string> headers;
85+
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
86+
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
87+
};
88+
// get remote file content, returns <http_code, raw_response_body>
89+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);

convert_hf_to_gguf.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2554,11 +2554,12 @@ def set_vocab(self):
25542554
except FileNotFoundError:
25552555
self._set_vocab_gpt2()
25562556

2557-
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
2558-
for name, data in super().get_tensors():
2559-
if name.startswith("visual."):
2560-
continue
2561-
yield name, data
2557+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2558+
del bid # unused
2559+
if name.startswith("visual."):
2560+
# skip visual tensors
2561+
return []
2562+
return [(self.map_tensor_name(name), data_torch)]
25622563

25632564

25642565
@ModelBase.register("WavTokenizerDec")

examples/llava/clip-impl.h

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,15 @@
1717
#define KEY_FTYPE "general.file_type"
1818
#define KEY_NAME "general.name"
1919
#define KEY_DESCRIPTION "general.description"
20-
#define KEY_HAS_TEXT_ENC "clip.has_text_encoder"
21-
#define KEY_HAS_VIS_ENC "clip.has_vision_encoder"
22-
#define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector"
23-
#define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector"
24-
#define KEY_HAS_GLM_PROJ "clip.has_glm_projector"
2520
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
26-
#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger"
2721
#define KEY_USE_GELU "clip.use_gelu"
2822
#define KEY_USE_SILU "clip.use_silu"
29-
#define KEY_N_EMBD "clip.%s.embedding_length"
30-
#define KEY_N_FF "clip.%s.feed_forward_length"
31-
#define KEY_N_BLOCK "clip.%s.block_count"
32-
#define KEY_N_HEAD "clip.%s.attention.head_count"
33-
#define KEY_LAYER_NORM_EPS "clip.%s.attention.layer_norm_epsilon"
34-
#define KEY_PROJ_DIM "clip.%s.projection_dim"
35-
#define KEY_TOKENS "tokenizer.ggml.tokens"
23+
#define KEY_N_EMBD "clip.vision.embedding_length"
24+
#define KEY_N_FF "clip.vision.feed_forward_length"
25+
#define KEY_N_BLOCK "clip.vision.block_count"
26+
#define KEY_N_HEAD "clip.vision.attention.head_count"
27+
#define KEY_LAYER_NORM_EPS "clip.vision.attention.layer_norm_epsilon"
28+
#define KEY_PROJ_DIM "clip.vision.projection_dim"
3629
#define KEY_IMAGE_SIZE "clip.vision.image_size"
3730
#define KEY_PATCH_SIZE "clip.vision.patch_size"
3831
#define KEY_IMAGE_MEAN "clip.vision.image_mean"
@@ -41,9 +34,14 @@
4134
#define KEY_PROJ_SCALE_FACTOR "clip.vision.projector.scale_factor"
4235
#define KEY_PROJ_TYPE "clip.projector_type"
4336

37+
#define KEY_USE_GLU_MLP "clip.use_glu_mlp" // for qwen2.5vl
38+
#define KEY_USE_RMS_NORM "clip.use_rms_norm" // for qwen2.5vl
39+
4440
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
4541
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
4642
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
43+
#define KEY_WIN_ATTN_PATTERN "clip.vision.n_wa_pattern"
44+
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
4745

4846

4947
//
@@ -62,6 +60,7 @@
6260
#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s"
6361
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
6462
#define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
63+
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
6564
#define TN_LN_1 "%s.blk.%d.ln1.%s"
6665
#define TN_LN_2 "%s.blk.%d.ln2.%s"
6766
#define TN_LN_PRE "%s.pre_ln.%s"
@@ -96,22 +95,24 @@ enum projector_type {
9695
PROJECTOR_TYPE_MLP_NORM,
9796
PROJECTOR_TYPE_LDP,
9897
PROJECTOR_TYPE_LDPV2,
99-
PROJECTOR_TYPE_RESAMPLER,
98+
PROJECTOR_TYPE_MINICPMV,
10099
PROJECTOR_TYPE_GLM_EDGE,
101-
PROJECTOR_TYPE_MERGER,
100+
PROJECTOR_TYPE_QWEN2VL,
102101
PROJECTOR_TYPE_GEMMA3,
103102
PROJECTOR_TYPE_IDEFICS3,
104103
PROJECTOR_TYPE_PIXTRAL,
104+
PROJECTOR_TYPE_QWEN25VL,
105105
PROJECTOR_TYPE_UNKNOWN,
106106
};
107107

108108
static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
109109
{ PROJECTOR_TYPE_MLP, "mlp" },
110110
{ PROJECTOR_TYPE_LDP, "ldp" },
111111
{ PROJECTOR_TYPE_LDPV2, "ldpv2"},
112-
{ PROJECTOR_TYPE_RESAMPLER, "resampler"},
112+
{ PROJECTOR_TYPE_MINICPMV, "resampler"},
113113
{ PROJECTOR_TYPE_GLM_EDGE, "adapter"},
114-
{ PROJECTOR_TYPE_MERGER, "qwen2vl_merger"},
114+
{ PROJECTOR_TYPE_QWEN2VL, "qwen2vl_merger"},
115+
{ PROJECTOR_TYPE_QWEN25VL, "qwen2.5vl_merger"},
115116
{ PROJECTOR_TYPE_GEMMA3, "gemma3"},
116117
{ PROJECTOR_TYPE_IDEFICS3, "idefics3"},
117118
{ PROJECTOR_TYPE_PIXTRAL, "pixtral"},

0 commit comments

Comments
 (0)