Skip to content

Commit 1731a32

Browse files
committed
Merge commit 'ded67b94446ef4f7fd988dbde7a12deef9870c13' into concedo_experimental
# Conflicts: # .devops/rocm.Dockerfile # .github/workflows/build.yml # .github/workflows/docker.yml # .github/workflows/release.yml # CODEOWNERS # common/CMakeLists.txt # common/arg.cpp # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/get_rows.cl # ggml/src/ggml-opencl/kernels/pad.cl # ggml/src/ggml-webgpu/ggml-webgpu.cpp # ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py # tests/test-arg-parser.cpp # tests/test-backend-ops.cpp # tools/run/run.cpp
2 parents 4f8f0e5 + ded67b9 commit 1731a32

33 files changed

+1439
-567
lines changed

common/arg.cpp

Lines changed: 22 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,11 @@
3434
#include <thread>
3535
#include <vector>
3636

37-
//#define LLAMA_USE_CURL
38-
3937
#if defined(LLAMA_USE_CURL)
4038
#include <curl/curl.h>
4139
#include <curl/easy.h>
4240
#elif defined(LLAMA_USE_HTTPLIB)
43-
#include <cpp-httplib/httplib.h>
41+
#include "http.h"
4442
#endif
4543

4644
#ifdef __linux__
@@ -56,6 +54,13 @@
5654
#endif
5755
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
5856

57+
// isatty
58+
#if defined(_WIN32)
59+
#include <io.h>
60+
#else
61+
#include <unistd.h>
62+
#endif
63+
5964
using json = nlohmann::ordered_json;
6065

6166
std::initializer_list<enum llama_example> mmproj_examples = {
@@ -102,6 +107,14 @@ static void write_file(const std::string & fname, const std::string & content) {
102107
}
103108
}
104109

110+
static bool is_output_a_tty() {
111+
#if defined(_WIN32)
112+
return _isatty(_fileno(stdout));
113+
#else
114+
return isatty(1);
115+
#endif
116+
}
117+
105118
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
106119
this->examples = std::move(examples);
107120
return *this;
@@ -268,10 +281,6 @@ static std::string read_etag(const std::string & path) {
268281

269282
#ifdef LLAMA_USE_CURL
270283

271-
bool common_has_curl() {
272-
return true;
273-
}
274-
275284
//
276285
// CURL utils
277286
//
@@ -588,83 +597,11 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
588597
#else
589598

590599
#ifdef LLAMA_USE_HTTPLIB
591-
592-
bool common_has_curl() {
593-
return false;
594-
}
595-
596-
struct common_url {
597-
std::string scheme;
598-
std::string user;
599-
std::string password;
600-
std::string host;
601-
std::string path;
602-
};
603-
604-
static common_url parse_url(const std::string & url) {
605-
common_url parts;
606-
auto scheme_end = url.find("://");
607-
608-
if (scheme_end == std::string::npos) {
609-
throw std::runtime_error("invalid URL: no scheme");
610-
}
611-
parts.scheme = url.substr(0, scheme_end);
612-
613-
if (parts.scheme != "http" && parts.scheme != "https") {
614-
throw std::runtime_error("unsupported URL scheme: " + parts.scheme);
615-
}
616-
617-
auto rest = url.substr(scheme_end + 3);
618-
auto at_pos = rest.find('@');
619-
620-
if (at_pos != std::string::npos) {
621-
auto auth = rest.substr(0, at_pos);
622-
auto colon_pos = auth.find(':');
623-
if (colon_pos != std::string::npos) {
624-
parts.user = auth.substr(0, colon_pos);
625-
parts.password = auth.substr(colon_pos + 1);
626-
} else {
627-
parts.user = auth;
628-
}
629-
rest = rest.substr(at_pos + 1);
630-
}
631-
632-
auto slash_pos = rest.find('/');
633-
634-
if (slash_pos != std::string::npos) {
635-
parts.host = rest.substr(0, slash_pos);
636-
parts.path = rest.substr(slash_pos);
637-
} else {
638-
parts.host = rest;
639-
parts.path = "/";
640-
}
641-
return parts;
642-
}
643-
644-
static std::pair<httplib::Client, common_url> http_client(const std::string & url) {
645-
common_url parts = parse_url(url);
646-
647-
if (parts.host.empty()) {
648-
throw std::runtime_error("error: invalid URL format");
649-
}
650-
651-
if (!parts.user.empty()) {
652-
throw std::runtime_error("error: user:password@ not supported yet"); // TODO
600+
static void print_progress(size_t current, size_t total) {
601+
if (!is_output_a_tty()) {
602+
return;
653603
}
654604

655-
httplib::Client cli(parts.scheme + "://" + parts.host);
656-
cli.set_follow_location(true);
657-
658-
// TODO cert
659-
660-
return { std::move(cli), std::move(parts) };
661-
}
662-
663-
static std::string show_masked_url(const common_url & parts) {
664-
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
665-
}
666-
667-
static void print_progress(size_t current, size_t total) { // TODO isatty
668605
if (!total) {
669606
return;
670607
}
@@ -752,7 +689,7 @@ static bool common_download_file_single_online(const std::string & url,
752689
static const int max_attempts = 3;
753690
static const int retry_delay_seconds = 2;
754691

755-
auto [cli, parts] = http_client(url);
692+
auto [cli, parts] = common_http_client(url);
756693

757694
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
758695
if (!bearer_token.empty()) {
@@ -832,7 +769,7 @@ static bool common_download_file_single_online(const std::string & url,
832769

833770
// start the download
834771
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
835-
__func__, show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
772+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
836773
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
837774
if (!was_pull_successful) {
838775
if (i + 1 < max_attempts) {
@@ -860,7 +797,7 @@ static bool common_download_file_single_online(const std::string & url,
860797

861798
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
862799
const common_remote_params & params) {
863-
auto [cli, parts] = http_client(url);
800+
auto [cli, parts] = common_http_client(url);
864801

865802
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
866803
for (const auto & header : params.headers) {

common/arg.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
7878

7979
// function to be used by test-arg-parser
8080
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
81-
bool common_has_curl();
8281

8382
struct common_remote_params {
8483
std::vector<std::string> headers;

common/http.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#pragma once
2+
3+
#include <cpp-httplib/httplib.h>
4+
5+
struct common_http_url {
6+
std::string scheme;
7+
std::string user;
8+
std::string password;
9+
std::string host;
10+
std::string path;
11+
};
12+
13+
static common_http_url common_http_parse_url(const std::string & url) {
14+
common_http_url parts;
15+
auto scheme_end = url.find("://");
16+
17+
if (scheme_end == std::string::npos) {
18+
throw std::runtime_error("invalid URL: no scheme");
19+
}
20+
parts.scheme = url.substr(0, scheme_end);
21+
22+
if (parts.scheme != "http" && parts.scheme != "https") {
23+
throw std::runtime_error("unsupported URL scheme: " + parts.scheme);
24+
}
25+
26+
auto rest = url.substr(scheme_end + 3);
27+
auto at_pos = rest.find('@');
28+
29+
if (at_pos != std::string::npos) {
30+
auto auth = rest.substr(0, at_pos);
31+
auto colon_pos = auth.find(':');
32+
if (colon_pos != std::string::npos) {
33+
parts.user = auth.substr(0, colon_pos);
34+
parts.password = auth.substr(colon_pos + 1);
35+
} else {
36+
parts.user = auth;
37+
}
38+
rest = rest.substr(at_pos + 1);
39+
}
40+
41+
auto slash_pos = rest.find('/');
42+
43+
if (slash_pos != std::string::npos) {
44+
parts.host = rest.substr(0, slash_pos);
45+
parts.path = rest.substr(slash_pos);
46+
} else {
47+
parts.host = rest;
48+
parts.path = "/";
49+
}
50+
return parts;
51+
}
52+
53+
static std::pair<httplib::Client, common_http_url> common_http_client(const std::string & url) {
54+
common_http_url parts = common_http_parse_url(url);
55+
56+
if (parts.host.empty()) {
57+
throw std::runtime_error("error: invalid URL format");
58+
}
59+
60+
httplib::Client cli(parts.scheme + "://" + parts.host);
61+
62+
if (!parts.user.empty()) {
63+
cli.set_basic_auth(parts.user, parts.password);
64+
}
65+
66+
cli.set_follow_location(true);
67+
68+
return { std::move(cli), std::move(parts) };
69+
}
70+
71+
static std::string common_http_show_masked_url(const common_http_url & parts) {
72+
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
73+
}

convert_hf_to_gguf.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4255,7 +4255,8 @@ def set_gguf_parameters(self):
42554255
# This logic matches modeling_plamo.py's is_mamba function
42564256
mamba_step = hparams.get("mamba_step", 2)
42574257
mamba_enabled = hparams.get("mamba_enabled", True)
4258-
mamba_layers = []
4258+
num_key_value_heads = []
4259+
num_attention_heads = []
42594260

42604261
if mamba_enabled:
42614262
for i in range(block_count):
@@ -4265,17 +4266,21 @@ def set_gguf_parameters(self):
42654266
else:
42664267
is_mamba = (i % mamba_step) != (mamba_step // 2)
42674268
if is_mamba:
4268-
mamba_layers.append(0)
4269+
num_key_value_heads.append(0)
4270+
num_attention_heads.append(0)
42694271
else:
4270-
mamba_layers.append(hparams.get("num_key_value_heads", 4))
4272+
num_key_value_heads.append(hparams.get("num_key_value_heads", 4))
4273+
num_attention_heads.append(hparams.get("num_attention_heads", 32))
42714274

4272-
if mamba_layers:
4273-
self.gguf_writer.add_head_count_kv(mamba_layers)
4275+
if num_key_value_heads and num_attention_heads:
4276+
self.gguf_writer.add_head_count_kv(num_key_value_heads)
4277+
self.gguf_writer.add_head_count(num_attention_heads)
42744278

42754279
self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 2048))
42764280
self.gguf_writer.add_embedding_length(hparams.get("hidden_size", 4096))
4281+
self.gguf_writer.add_key_length(hparams.get("hidden_size_per_head", 128))
4282+
self.gguf_writer.add_value_length(hparams.get("hidden_size_per_head", 128))
42774283
self.gguf_writer.add_block_count(block_count)
4278-
self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32))
42794284
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06))
42804285
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 10000))
42814286

ggml/include/ggml.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@
237237
#define GGML_EXIT_SUCCESS 0
238238
#define GGML_EXIT_ABORTED 1
239239

240+
// TODO: convert to enum https://github.com/ggml-org/llama.cpp/pull/16187#discussion_r2388538726
241+
#define GGML_ROPE_TYPE_NORMAL 0
240242
#define GGML_ROPE_TYPE_NEOX 2
241243
#define GGML_ROPE_TYPE_MROPE 8
242244
#define GGML_ROPE_TYPE_VISION 24

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,14 @@
2121
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
2222
// We use VULKAN_HPP_DEFAULT_DISPATCHER, but not VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
2323
// to avoid conflicts with applications or other libraries who might use it.
24+
#if VK_HEADER_VERSION >= 301
2425
namespace vk::detail { class DispatchLoaderDynamic; }
25-
vk::detail::DispatchLoaderDynamic & ggml_vk_default_dispatcher();
26+
using vk::detail::DispatchLoaderDynamic;
27+
#else
28+
namespace vk { class DispatchLoaderDynamic; }
29+
using vk::DispatchLoaderDynamic;
30+
#endif
31+
DispatchLoaderDynamic & ggml_vk_default_dispatcher();
2632
#define VULKAN_HPP_DEFAULT_DISPATCHER ggml_vk_default_dispatcher()
2733

2834
#include <vulkan/vulkan.hpp>
@@ -4568,9 +4574,8 @@ static bool ggml_vk_instance_portability_enumeration_ext_available(const std::ve
45684574
static bool ggml_vk_instance_debug_utils_ext_available(const std::vector<vk::ExtensionProperties> & instance_extensions);
45694575
static bool ggml_vk_device_is_supported(const vk::PhysicalDevice & vkdev);
45704576

4571-
static vk::detail::DispatchLoaderDynamic ggml_vk_default_dispatcher_instance;
4572-
4573-
vk::detail::DispatchLoaderDynamic & ggml_vk_default_dispatcher() {
4577+
static DispatchLoaderDynamic ggml_vk_default_dispatcher_instance;
4578+
DispatchLoaderDynamic & ggml_vk_default_dispatcher() {
45744579
return ggml_vk_default_dispatcher_instance;
45754580
}
45764581

ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)