Skip to content

Commit c47c41c

Browse files
authored
Merge branch 'ggerganov:master' into token
2 parents ce32516 + 3e0ba0e commit c47c41c

File tree

124 files changed

+2520
-1568
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

124 files changed

+2520
-1568
lines changed

.clang-tidy

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ Checks: >
1717
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1818
performance-*,
1919
portability-*,
20+
-portability-simd-intrinsics,
2021
misc-*,
2122
-misc-const-correctness,
2223
-misc-non-private-member-variables-in-classes,
2324
-misc-no-recursion,
25+
-misc-use-anonymous-namespace,
2426
FormatStyle: none

.github/workflows/build.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,8 @@ jobs:
904904
- name: Clone
905905
id: checkout
906906
uses: actions/checkout@v4
907+
with:
908+
fetch-depth: 0
907909

908910
- name: Install Cuda Toolkit 11.7
909911
if: ${{ matrix.cuda == '11.7' }}
@@ -1119,6 +1121,11 @@ jobs:
11191121
run: |
11201122
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
11211123
1124+
- name: Install ccache
1125+
uses: hendrikmuhs/[email protected]
1126+
with:
1127+
key: ${{ github.job }}
1128+
11221129
- name: Build
11231130
id: cmake_build
11241131
run: |
@@ -1139,6 +1146,8 @@ jobs:
11391146
- name: Clone
11401147
id: checkout
11411148
uses: actions/checkout@v4
1149+
with:
1150+
fetch-depth: 0
11421151

11431152
- name: Install
11441153
id: depends

AUTHORS

Lines changed: 185 additions & 1 deletion
Large diffs are not rendered by default.

Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ endif
254254
# keep standard at C11 and C++11
255255
MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU
256256
MK_CFLAGS = -std=c11 -fPIC
257-
MK_CXXFLAGS = -std=c++11 -fPIC
258-
MK_NVCCFLAGS = -std=c++11
257+
MK_CXXFLAGS = -std=c++17 -fPIC
258+
MK_NVCCFLAGS = -std=c++17
259259

260260
ifdef LLAMA_NO_CCACHE
261261
GGML_NO_CCACHE := 1
@@ -575,9 +575,12 @@ endif
575575

576576
ifndef GGML_NO_AMX
577577
MK_CPPFLAGS += -DGGML_USE_AMX
578-
OBJ_GGML_EXT += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o
578+
OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o
579579
endif
580580

581+
# only necessary for the CPU backend files
582+
MK_CPPFLAGS += -Iggml/src/ggml-cpu
583+
581584
ifdef GGML_RPC
582585
MK_CPPFLAGS += -DGGML_USE_RPC
583586
OBJ_GGML_EXT += ggml/src/ggml-rpc.o

Package.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,16 @@ var cSettings: [CSetting] = [
2828
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
2929
.unsafeFlags(["-fno-objc-arc"]),
3030
.headerSearchPath("ggml/src"),
31+
.headerSearchPath("ggml/src/ggml-cpu"),
3132
// NOTE: NEW_LAPACK will required iOS version 16.4+
3233
// We should consider add this in the future when we drop support for iOS 14
3334
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
3435
// .define("ACCELERATE_NEW_LAPACK"),
3536
// .define("ACCELERATE_LAPACK_ILP64")
37+
.define("GGML_USE_CPU"),
3638
]
3739

40+
3841
#if canImport(Darwin)
3942
sources.append("ggml/src/ggml-common.h")
4043
sources.append("ggml/src/ggml-metal/ggml-metal.m")
@@ -44,7 +47,6 @@ cSettings.append(
4447
contentsOf: [
4548
.define("GGML_USE_ACCELERATE"),
4649
.define("GGML_USE_METAL"),
47-
.define("GGML_USE_CPU")
4850
]
4951
)
5052
#endif

README.md

Lines changed: 108 additions & 233 deletions
Large diffs are not rendered by default.

common/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ if (LLAMA_CURL)
8888
endif ()
8989

9090
target_include_directories(${TARGET} PUBLIC .)
91-
target_compile_features (${TARGET} PUBLIC cxx_std_11)
91+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
9292
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

common/arg.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ static void common_params_handle_model_default(common_params & params) {
128128
}
129129
params.hf_file = params.model;
130130
} else if (params.model.empty()) {
131-
params.model = fs_get_cache_file(string_split<std::string>(params.hf_file, '/').back());
131+
// this is to avoid different repo having same file name, or same file name in different subdirs
132+
std::string filename = params.hf_repo + "_" + params.hf_file;
133+
// to make sure we don't have any slashes in the filename
134+
string_replace_all(filename, "/", "_");
135+
params.model = fs_get_cache_file(filename);
132136
}
133137
} else if (!params.model_url.empty()) {
134138
if (params.model.empty()) {
@@ -1366,8 +1370,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13661370
[](common_params & params, int value) {
13671371
params.n_gpu_layers = value;
13681372
if (!llama_supports_gpu_offload()) {
1369-
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers option will be ignored\n");
1370-
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
1373+
fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n");
1374+
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
1375+
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
13711376
}
13721377
}
13731378
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
@@ -2100,8 +2105,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21002105
[](common_params & params, int value) {
21012106
params.speculative.n_gpu_layers = value;
21022107
if (!llama_supports_gpu_offload()) {
2103-
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers-draft option will be ignored\n");
2104-
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
2108+
fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n");
2109+
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
2110+
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
21052111
}
21062112
}
21072113
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));

common/common.cpp

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,17 @@ bool fs_validate_filename(const std::string & filename) {
652652

653653
std::u32string filename_utf32;
654654
try {
655+
#if defined(__clang__)
656+
// disable C++17 deprecation warning for std::codecvt_utf8
657+
# pragma clang diagnostic push
658+
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
659+
#endif
655660
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
661+
662+
#if defined(__clang__)
663+
# pragma clang diagnostic pop
664+
#endif
665+
656666
filename_utf32 = converter.from_bytes(filename);
657667

658668
// If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,
@@ -829,9 +839,9 @@ struct common_init_result common_init_from_params(common_params & params) {
829839
llama_model * model = nullptr;
830840

831841
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
832-
model = common_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
842+
model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams);
833843
} else if (!params.model_url.empty()) {
834-
model = common_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
844+
model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
835845
} else {
836846
model = llama_load_model_from_file(params.model.c_str(), mparams);
837847
}
@@ -1342,17 +1352,17 @@ static bool common_download_file(const std::string & url, const std::string & pa
13421352
}
13431353

13441354
struct llama_model * common_load_model_from_url(
1345-
const char * model_url,
1346-
const char * path_model,
1347-
const char * hf_token,
1355+
const std::string & model_url,
1356+
const std::string & local_path,
1357+
const std::string & hf_token,
13481358
const struct llama_model_params & params) {
13491359
// Basic validation of the model_url
1350-
if (!model_url || strlen(model_url) == 0) {
1360+
if (model_url.empty()) {
13511361
LOG_ERR("%s: invalid model_url\n", __func__);
13521362
return NULL;
13531363
}
13541364

1355-
if (!common_download_file(model_url, path_model, hf_token)) {
1365+
if (!common_download_file(model_url, local_path, hf_token)) {
13561366
return NULL;
13571367
}
13581368

@@ -1363,9 +1373,9 @@ struct llama_model * common_load_model_from_url(
13631373
/*.no_alloc = */ true,
13641374
/*.ctx = */ NULL,
13651375
};
1366-
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
1376+
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
13671377
if (!ctx_gguf) {
1368-
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model);
1378+
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str());
13691379
return NULL;
13701380
}
13711381

@@ -1384,13 +1394,13 @@ struct llama_model * common_load_model_from_url(
13841394
// Verify the first split file format
13851395
// and extract split URL and PATH prefixes
13861396
{
1387-
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
1388-
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split);
1397+
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) {
1398+
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split);
13891399
return NULL;
13901400
}
13911401

1392-
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
1393-
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split);
1402+
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) {
1403+
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split);
13941404
return NULL;
13951405
}
13961406
}
@@ -1417,14 +1427,14 @@ struct llama_model * common_load_model_from_url(
14171427
}
14181428
}
14191429

1420-
return llama_load_model_from_file(path_model, params);
1430+
return llama_load_model_from_file(local_path.c_str(), params);
14211431
}
14221432

14231433
struct llama_model * common_load_model_from_hf(
1424-
const char * repo,
1425-
const char * model,
1426-
const char * path_model,
1427-
const char * hf_token,
1434+
const std::string & repo,
1435+
const std::string & remote_path,
1436+
const std::string & local_path,
1437+
const std::string & hf_token,
14281438
const struct llama_model_params & params) {
14291439
// construct hugging face model url:
14301440
//
@@ -1438,27 +1448,27 @@ struct llama_model * common_load_model_from_hf(
14381448
std::string model_url = "https://huggingface.co/";
14391449
model_url += repo;
14401450
model_url += "/resolve/main/";
1441-
model_url += model;
1451+
model_url += remote_path;
14421452

1443-
return common_load_model_from_url(model_url.c_str(), path_model, hf_token, params);
1453+
return common_load_model_from_url(model_url, local_path, hf_token, params);
14441454
}
14451455

14461456
#else
14471457

14481458
struct llama_model * common_load_model_from_url(
1449-
const char * /*model_url*/,
1450-
const char * /*path_model*/,
1451-
const char * /*hf_token*/,
1459+
const std::string & /*model_url*/,
1460+
const std::string & /*local_path*/,
1461+
const std::string & /*hf_token*/,
14521462
const struct llama_model_params & /*params*/) {
14531463
LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
14541464
return nullptr;
14551465
}
14561466

14571467
struct llama_model * common_load_model_from_hf(
1458-
const char * /*repo*/,
1459-
const char * /*model*/,
1460-
const char * /*path_model*/,
1461-
const char * /*hf_token*/,
1468+
const std::string & /*repo*/,
1469+
const std::string & /*remote_path*/,
1470+
const std::string & /*local_path*/,
1471+
const std::string & /*hf_token*/,
14621472
const struct llama_model_params & /*params*/) {
14631473
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
14641474
return nullptr;

common/common.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,17 @@ struct llama_model_params common_model_params_to_llama ( common_params
471471
struct llama_context_params common_context_params_to_llama(const common_params & params);
472472
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
473473

474-
struct llama_model * common_load_model_from_url(const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params);
475-
struct llama_model * common_load_model_from_hf(const char * repo, const char * file, const char * path_model, const char * hf_token, const struct llama_model_params & params);
474+
struct llama_model * common_load_model_from_url(
475+
const std::string & model_url,
476+
const std::string & local_path,
477+
const std::string & hf_token,
478+
const struct llama_model_params & params);
479+
struct llama_model * common_load_model_from_hf(
480+
const std::string & repo,
481+
const std::string & remote_path,
482+
const std::string & local_path,
483+
const std::string & hf_token,
484+
const struct llama_model_params & params);
476485

477486
// clear LoRA adapters from context, then apply new list of adapters
478487
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters);

0 commit comments

Comments
 (0)