Skip to content

Commit 5125c0b

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/vulkan.Dockerfile # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/set_rows.cl # ggml/src/ggml-vulkan/ggml-vulkan.cpp # ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp # tests/test-backend-ops.cpp # tools/batched-bench/batched-bench.cpp
2 parents a5cc934 + ece0f5c commit 5125c0b

34 files changed

+1321
-371
lines changed

.editorconfig

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,11 @@ end_of_line = unset
6060
charset = unset
6161
trim_trailing_whitespace = unset
6262
insert_final_newline = unset
63+
64+
[benches/**]
65+
indent_style = unset
66+
indent_size = unset
67+
end_of_line = unset
68+
charset = unset
69+
trim_trailing_whitespace = unset
70+
insert_final_newline = unset

common/arg.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
742742
exit(0);
743743
}
744744
));
745+
add_opt(common_arg(
746+
{"-cl", "--cache-list"},
747+
"show list of models in cache",
748+
[](common_params &) {
749+
printf("model cache directory: %s\n", fs_get_cache_directory().c_str());
750+
auto models = common_list_cached_models();
751+
printf("number of models in cache: %zu\n", models.size());
752+
for (size_t i = 0; i < models.size(); i++) {
753+
auto & model = models[i];
754+
printf("%4d. %s\n", (int) i + 1, model.to_string().c_str());
755+
}
756+
exit(0);
757+
}
758+
));
745759
add_opt(common_arg(
746760
{"--completion-bash"},
747761
"print source-able bash completion script for llama.cpp",
@@ -2241,6 +2255,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22412255
params.is_pp_shared = true;
22422256
}
22432257
).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
2258+
add_opt(common_arg(
2259+
{"-tgs"},
2260+
string_format("is the text generation separated across the different sequences (default: %s)", params.is_tg_separate ? "true" : "false"),
2261+
[](common_params & params) {
2262+
params.is_tg_separate = true;
2263+
}
2264+
).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
22442265
add_opt(common_arg(
22452266
{"-npp"}, "n0,n1,...",
22462267
"number of prompt tokens",

common/common.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,39 @@ std::string fs_get_cache_file(const std::string & filename) {
916916
return cache_directory + filename;
917917
}
918918

919+
std::vector<common_file_info> fs_list_files(const std::string & path) {
920+
std::vector<common_file_info> files;
921+
if (path.empty()) return files;
922+
923+
std::filesystem::path dir(path);
924+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
925+
return files;
926+
}
927+
928+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
929+
try {
930+
// Only include regular files (skip directories)
931+
const auto & p = entry.path();
932+
if (std::filesystem::is_regular_file(p)) {
933+
common_file_info info;
934+
info.path = p.string();
935+
info.name = p.filename().string();
936+
try {
937+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
938+
} catch (const std::filesystem::filesystem_error &) {
939+
info.size = 0;
940+
}
941+
files.push_back(std::move(info));
942+
}
943+
} catch (const std::filesystem::filesystem_error &) {
944+
// skip entries we cannot inspect
945+
continue;
946+
}
947+
}
948+
949+
return files;
950+
}
951+
919952

920953
//
921954
// Model utils

common/common.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,8 @@ struct common_params {
456456
float slot_prompt_similarity = 0.1f;
457457

458458
// batched-bench params
459-
bool is_pp_shared = false;
459+
bool is_pp_shared = false;
460+
bool is_tg_separate = false;
460461

461462
std::vector<int32_t> n_pp;
462463
std::vector<int32_t> n_tg;
@@ -607,6 +608,13 @@ bool fs_create_directory_with_parents(const std::string & path);
607608
std::string fs_get_cache_directory();
608609
std::string fs_get_cache_file(const std::string & filename);
609610

611+
struct common_file_info {
612+
std::string path;
613+
std::string name;
614+
size_t size = 0; // in bytes
615+
};
616+
std::vector<common_file_info> fs_list_files(const std::string & path);
617+
610618
//
611619
// Model utils
612620
//

common/download.cpp

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,22 @@ using json = nlohmann::ordered_json;
5050
// downloader
5151
//
5252

53+
// validate repo name format: owner/repo
54+
static bool validate_repo_name(const std::string & repo) {
55+
static const std::regex repo_regex(R"(^[A-Za-z0-9_.\-]+\/[A-Za-z0-9_.\-]+$)");
56+
return std::regex_match(repo, repo_regex);
57+
}
58+
59+
static std::string get_manifest_path(const std::string & repo, const std::string & tag) {
60+
// we use "=" to avoid clashing with other component, while still being allowed on windows
61+
std::string fname = "manifest=" + repo + "=" + tag + ".json";
62+
if (!validate_repo_name(repo)) {
63+
throw std::runtime_error("error: repo name must be in the format 'owner/repo'");
64+
}
65+
string_replace_all(fname, "/", "=");
66+
return fs_get_cache_file(fname);
67+
}
68+
5369
static std::string read_file(const std::string & fname) {
5470
std::ifstream file(fname);
5571
if (!file) {
@@ -851,17 +867,13 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
851867
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
852868
// User-Agent header is already set in common_remote_get_content, no need to set it here
853869

854-
// we use "=" to avoid clashing with other component, while still being allowed on windows
855-
std::string cached_response_fname = "manifest=" + hf_repo + "=" + tag + ".json";
856-
string_replace_all(cached_response_fname, "/", "_");
857-
std::string cached_response_path = fs_get_cache_file(cached_response_fname);
858-
859870
// make the request
860871
common_remote_params params;
861872
params.headers = headers;
862873
long res_code = 0;
863874
std::string res_str;
864875
bool use_cache = false;
876+
std::string cached_response_path = get_manifest_path(hf_repo, tag);
865877
if (!offline) {
866878
try {
867879
auto res = common_remote_get_content(url, params);
@@ -917,6 +929,33 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
917929
return { hf_repo, ggufFile, mmprojFile };
918930
}
919931

932+
std::vector<common_cached_model_info> common_list_cached_models() {
933+
std::vector<common_cached_model_info> models;
934+
const std::string cache_dir = fs_get_cache_directory();
935+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
936+
for (const auto & file : files) {
937+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
938+
common_cached_model_info model_info;
939+
model_info.manifest_path = file.path;
940+
std::string fname = file.name;
941+
string_replace_all(fname, ".json", ""); // remove extension
942+
auto parts = string_split<std::string>(fname, '=');
943+
if (parts.size() == 4) {
944+
// expect format: manifest=<user>=<model>=<tag>=<other>
945+
model_info.user = parts[1];
946+
model_info.model = parts[2];
947+
model_info.tag = parts[3];
948+
} else {
949+
// invalid format
950+
continue;
951+
}
952+
model_info.size = 0; // TODO: get GGUF size, not manifest size
953+
models.push_back(model_info);
954+
}
955+
}
956+
return models;
957+
}
958+
920959
//
921960
// Docker registry functions
922961
//
@@ -981,6 +1020,7 @@ std::string common_docker_resolve_model(const std::string & docker) {
9811020
std::string token = common_docker_get_token(repo); // Get authentication token
9821021

9831022
// Get manifest
1023+
// TODO: cache the manifest response so that it appears in the model list
9841024
const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
9851025
std::string manifest_url = url_prefix + "/manifests/" + tag;
9861026
common_remote_params manifest_params;

common/download.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,23 @@ struct common_params_model;
88
// download functionalities
99
//
1010

11+
struct common_cached_model_info {
12+
std::string manifest_path;
13+
std::string user;
14+
std::string model;
15+
std::string tag;
16+
size_t size = 0; // GGUF size in bytes
17+
std::string to_string() const {
18+
return user + "/" + model + ":" + tag;
19+
}
20+
};
21+
1122
struct common_hf_file_res {
1223
std::string repo; // repo name with ":tag" removed
1324
std::string ggufFile;
1425
std::string mmprojFile;
1526
};
1627

17-
// resolve and download model from Docker registry
18-
// return local path to downloaded model file
19-
std::string common_docker_resolve_model(const std::string & docker);
20-
2128
/**
2229
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
2330
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -39,3 +46,10 @@ bool common_download_model(
3946
const common_params_model & model,
4047
const std::string & bearer_token,
4148
bool offline);
49+
50+
// returns list of cached models
51+
std::vector<common_cached_model_info> common_list_cached_models();
52+
53+
// resolve and download model from Docker registry
54+
// return local path to downloaded model file
55+
std::string common_docker_resolve_model(const std::string & docker);

0 commit comments

Comments
 (0)