Skip to content

Commit 61607e8

Browse files
authored
Merge pull request #47 from l3utterfly/master
merge upstream
2 parents bca159c + 266b851 commit 61607e8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1640
-486
lines changed

.github/workflows/build.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,8 @@ jobs:
904904
- name: Clone
905905
id: checkout
906906
uses: actions/checkout@v4
907+
with:
908+
fetch-depth: 0
907909

908910
- name: Install Cuda Toolkit 11.7
909911
if: ${{ matrix.cuda == '11.7' }}
@@ -1139,6 +1141,8 @@ jobs:
11391141
- name: Clone
11401142
id: checkout
11411143
uses: actions/checkout@v4
1144+
with:
1145+
fetch-depth: 0
11421146

11431147
- name: Install
11441148
id: depends

AUTHORS

Lines changed: 185 additions & 1 deletion
Large diffs are not rendered by default.

common/arg.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ static void common_params_handle_model_default(common_params & params) {
128128
}
129129
params.hf_file = params.model;
130130
} else if (params.model.empty()) {
131-
params.model = fs_get_cache_file(string_split<std::string>(params.hf_file, '/').back());
131+
// this is to avoid different repo having same file name, or same file name in different subdirs
132+
std::string filename = params.hf_repo + "_" + params.hf_file;
133+
// to make sure we don't have any slashes in the filename
134+
string_replace_all(filename, "/", "_");
135+
params.model = fs_get_cache_file(filename);
132136
}
133137
} else if (!params.model_url.empty()) {
134138
if (params.model.empty()) {
@@ -1366,8 +1370,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13661370
[](common_params & params, int value) {
13671371
params.n_gpu_layers = value;
13681372
if (!llama_supports_gpu_offload()) {
1369-
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers option will be ignored\n");
1370-
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
1373+
fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n");
1374+
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
1375+
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
13711376
}
13721377
}
13731378
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
@@ -2100,8 +2105,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21002105
[](common_params & params, int value) {
21012106
params.speculative.n_gpu_layers = value;
21022107
if (!llama_supports_gpu_offload()) {
2103-
fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers-draft option will be ignored\n");
2104-
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
2108+
fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n");
2109+
fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n");
2110+
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
21052111
}
21062112
}
21072113
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));

common/common.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -829,9 +829,9 @@ struct common_init_result common_init_from_params(common_params & params) {
829829
llama_model * model = nullptr;
830830

831831
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
832-
model = common_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
832+
model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams);
833833
} else if (!params.model_url.empty()) {
834-
model = common_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams);
834+
model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
835835
} else {
836836
model = llama_load_model_from_file(params.model.c_str(), mparams);
837837
}
@@ -1342,17 +1342,17 @@ static bool common_download_file(const std::string & url, const std::string & pa
13421342
}
13431343

13441344
struct llama_model * common_load_model_from_url(
1345-
const char * model_url,
1346-
const char * path_model,
1347-
const char * hf_token,
1345+
const std::string & model_url,
1346+
const std::string & local_path,
1347+
const std::string & hf_token,
13481348
const struct llama_model_params & params) {
13491349
// Basic validation of the model_url
1350-
if (!model_url || strlen(model_url) == 0) {
1350+
if (model_url.empty()) {
13511351
LOG_ERR("%s: invalid model_url\n", __func__);
13521352
return NULL;
13531353
}
13541354

1355-
if (!common_download_file(model_url, path_model, hf_token)) {
1355+
if (!common_download_file(model_url, local_path, hf_token)) {
13561356
return NULL;
13571357
}
13581358

@@ -1363,9 +1363,9 @@ struct llama_model * common_load_model_from_url(
13631363
/*.no_alloc = */ true,
13641364
/*.ctx = */ NULL,
13651365
};
1366-
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
1366+
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
13671367
if (!ctx_gguf) {
1368-
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model);
1368+
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str());
13691369
return NULL;
13701370
}
13711371

@@ -1384,13 +1384,13 @@ struct llama_model * common_load_model_from_url(
13841384
// Verify the first split file format
13851385
// and extract split URL and PATH prefixes
13861386
{
1387-
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
1388-
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split);
1387+
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) {
1388+
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split);
13891389
return NULL;
13901390
}
13911391

1392-
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
1393-
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split);
1392+
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) {
1393+
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split);
13941394
return NULL;
13951395
}
13961396
}
@@ -1417,14 +1417,14 @@ struct llama_model * common_load_model_from_url(
14171417
}
14181418
}
14191419

1420-
return llama_load_model_from_file(path_model, params);
1420+
return llama_load_model_from_file(local_path.c_str(), params);
14211421
}
14221422

14231423
struct llama_model * common_load_model_from_hf(
1424-
const char * repo,
1425-
const char * model,
1426-
const char * path_model,
1427-
const char * hf_token,
1424+
const std::string & repo,
1425+
const std::string & remote_path,
1426+
const std::string & local_path,
1427+
const std::string & hf_token,
14281428
const struct llama_model_params & params) {
14291429
// construct hugging face model url:
14301430
//
@@ -1438,27 +1438,27 @@ struct llama_model * common_load_model_from_hf(
14381438
std::string model_url = "https://huggingface.co/";
14391439
model_url += repo;
14401440
model_url += "/resolve/main/";
1441-
model_url += model;
1441+
model_url += remote_path;
14421442

1443-
return common_load_model_from_url(model_url.c_str(), path_model, hf_token, params);
1443+
return common_load_model_from_url(model_url, local_path, hf_token, params);
14441444
}
14451445

14461446
#else
14471447

14481448
struct llama_model * common_load_model_from_url(
1449-
const char * /*model_url*/,
1450-
const char * /*path_model*/,
1451-
const char * /*hf_token*/,
1449+
const std::string & /*model_url*/,
1450+
const std::string & /*local_path*/,
1451+
const std::string & /*hf_token*/,
14521452
const struct llama_model_params & /*params*/) {
14531453
LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
14541454
return nullptr;
14551455
}
14561456

14571457
struct llama_model * common_load_model_from_hf(
1458-
const char * /*repo*/,
1459-
const char * /*model*/,
1460-
const char * /*path_model*/,
1461-
const char * /*hf_token*/,
1458+
const std::string & /*repo*/,
1459+
const std::string & /*remote_path*/,
1460+
const std::string & /*local_path*/,
1461+
const std::string & /*hf_token*/,
14621462
const struct llama_model_params & /*params*/) {
14631463
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
14641464
return nullptr;

common/common.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,17 @@ struct llama_model_params common_model_params_to_llama ( common_params
470470
struct llama_context_params common_context_params_to_llama(const common_params & params);
471471
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
472472

473-
struct llama_model * common_load_model_from_url(const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params);
474-
struct llama_model * common_load_model_from_hf(const char * repo, const char * file, const char * path_model, const char * hf_token, const struct llama_model_params & params);
473+
struct llama_model * common_load_model_from_url(
474+
const std::string & model_url,
475+
const std::string & local_path,
476+
const std::string & hf_token,
477+
const struct llama_model_params & params);
478+
struct llama_model * common_load_model_from_hf(
479+
const std::string & repo,
480+
const std::string & remote_path,
481+
const std::string & local_path,
482+
const std::string & hf_token,
483+
const struct llama_model_params & params);
475484

476485
// clear LoRA adapters from context, then apply new list of adapters
477486
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_container> & lora_adapters);

docs/android.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ $ curl -L {model-url} -o ~/{model}.gguf
2323
Then, if you are not already in the repo directory, `cd` into `llama.cpp` and:
2424

2525
```
26-
$ ./build/bin/llama-simple -m ~/{model}.gguf -c {context-size} -p "{your-prompt}"
26+
$ ./build/bin/llama-cli -m ~/{model}.gguf -c {context-size} -p "{your-prompt}"
2727
```
2828

29-
Here, we show `llama-simple`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal.
29+
Here, we show `llama-cli`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal.
3030

3131
To see what it might look like visually, here's an old demo of an interactive session running on a Pixel 5 phone:
3232

docs/backend/CANN.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
2323

2424
## News
2525

26+
- 2024.11
27+
- Support F16 and F32 data type model for Ascend 310P NPU.
2628
- 2024.8
2729
- Support `Q4_0` and `Q8_0` data type for Ascend NPU.
2830
- 2024.7
@@ -40,9 +42,11 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
4042
### Ascend NPU
4143

4244
**Verified devices**
45+
4346
| Ascend NPU | Status |
4447
|:-----------------------------:|:-------:|
4548
| Atlas 300T A2 | Support |
49+
| Atlas 300I Duo | Support |
4650

4751
*Notes:*
4852

examples/llava/clip.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,17 @@
4040
#include <cinttypes>
4141
#include <limits>
4242

43-
#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
44-
#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
45-
#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
46-
#define LOG_DBG(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
43+
#if defined(LLAVA_LOG_OFF)
44+
# define LOG_INF(...)
45+
# define LOG_WRN(...)
46+
# define LOG_ERR(...)
47+
# define LOG_DBG(...)
48+
#else // defined(LLAVA_LOG_OFF)
49+
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
50+
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
51+
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
52+
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
53+
#endif // defined(LLAVA_LOG_OFF)
4754

4855
//#define CLIP_DEBUG_FUNCTIONS
4956

examples/llava/llava.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,17 @@
1111
#include <limits>
1212
#include <vector>
1313

14-
#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
15-
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
16-
17-
#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
18-
#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
19-
#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
20-
#define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
14+
#if defined(LLAVA_LOG_OFF)
15+
# define LOG_INF(...)
16+
# define LOG_WRN(...)
17+
# define LOG_ERR(...)
18+
# define LOG_DBG(...)
19+
#else // defined(LLAVA_LOG_OFF)
20+
# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
21+
# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
22+
# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
23+
# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
24+
#endif // defined(LLAVA_LOG_OFF)
2125

2226
// RGB uint8 image
2327
struct clip_image_u8 {
@@ -498,10 +502,16 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
498502
errno = 0;
499503
size_t ret = fread(buffer, 1, fileSize, file); // Read the file into the buffer
500504
if (ferror(file)) {
501-
die_fmt("read error: %s", strerror(errno));
505+
LOG_ERR("read error: %s", strerror(errno));
506+
free(buffer);
507+
fclose(file);
508+
return false;
502509
}
503510
if (ret != (size_t) fileSize) {
504-
die("unexpectedly reached end of file");
511+
LOG_ERR("unexpectedly reached end of file");
512+
free(buffer);
513+
fclose(file);
514+
return false;
505515
}
506516
fclose(file); // Close the file
507517

examples/server/tests/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ aiohttp~=3.9.3
22
pytest~=8.3.3
33
huggingface_hub~=0.23.2
44
numpy~=1.26.4
5-
openai~=1.30.3
5+
openai~=1.55.3
66
prometheus-client~=0.20.0
77
requests~=2.32.3

0 commit comments

Comments
 (0)