Skip to content

Commit 65a431d

Browse files
committed
Merge branch 'master' into xsn/mergekit_extract_lora_compat
2 parents f564e02 + 80ccf5d commit 65a431d

File tree

11 files changed

+837
-83
lines changed

11 files changed

+837
-83
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ jobs:
665665
- build: 'llvm-arm64'
666666
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
667667
- build: 'msvc-arm64'
668-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=O'
668+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
669669
- build: 'llvm-arm64-opencl-adreno'
670670
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
671671

.github/workflows/docker.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
101101
- name: Free Disk Space (Ubuntu)
102102
if: ${{ matrix.config.free_disk_space == true }}
103-
uses: jlumbroso/free-disk-space@main
103+
uses: jlumbroso/free-disk-space@v1.3.1
104104
with:
105105
# this might remove tools that are actually needed,
106106
# if set to "true" but frees about 6 GB

.github/workflows/editorconfig.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,7 @@ jobs:
2323
runs-on: ubuntu-latest
2424
steps:
2525
- uses: actions/checkout@v4
26-
- uses: editorconfig-checker/action-editorconfig-checker@main
26+
- uses: editorconfig-checker/action-editorconfig-checker@v2
27+
with:
28+
version: v3.0.3
2729
- run: editorconfig-checker

common/arg.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
2222
return *this;
2323
}
2424

25+
common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
26+
this->excludes = std::move(excludes);
27+
return *this;
28+
}
29+
2530
common_arg & common_arg::set_env(const char * env) {
2631
help = help + "\n(env: " + env + ")";
2732
this->env = env;
@@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) {
3742
return examples.find(ex) != examples.end();
3843
}
3944

45+
bool common_arg::is_exclude(enum llama_example ex) {
46+
return excludes.find(ex) != excludes.end();
47+
}
48+
4049
bool common_arg::get_value_from_env(std::string & output) {
4150
if (env == nullptr) return false;
4251
char * value = std::getenv(env);
@@ -420,7 +429,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
420429
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
421430
*/
422431
auto add_opt = [&](common_arg arg) {
423-
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
432+
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
424433
ctx_arg.options.push_back(std::move(arg));
425434
}
426435
};
@@ -649,7 +658,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
649658
[](common_params & params, const std::string & value) {
650659
params.prompt = value;
651660
}
652-
));
661+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
653662
add_opt(common_arg(
654663
{"--no-perf"},
655664
string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
@@ -673,7 +682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
673682
params.prompt.pop_back();
674683
}
675684
}
676-
));
685+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
677686
add_opt(common_arg(
678687
{"--in-file"}, "FNAME",
679688
"an input file (repeat to specify multiple files)",
@@ -700,7 +709,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
700709
params.prompt = ss.str();
701710
fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
702711
}
703-
));
712+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
704713
add_opt(common_arg(
705714
{"-e", "--escape"},
706715
string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),

common/arg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
struct common_arg {
1414
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
15+
std::set<enum llama_example> excludes = {};
1516
std::vector<const char *> args;
1617
const char * value_hint = nullptr; // help text or example for arg value
1718
const char * value_hint_2 = nullptr; // for second arg value
@@ -53,9 +54,11 @@ struct common_arg {
5354
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
5455

5556
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
57+
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
5658
common_arg & set_env(const char * env);
5759
common_arg & set_sparam();
5860
bool in_example(enum llama_example ex);
61+
bool is_exclude(enum llama_example ex);
5962
bool get_value_from_env(std::string & output);
6063
bool has_value_from_env();
6164
std::string to_string();

examples/server/README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,7 @@ The project is under active development, and we are [looking for feedback and co
4545
| `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
4646
| `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
4747
| `-fa, --flash-attn` | enable Flash Attention (default: disabled)<br/>(env: LLAMA_ARG_FLASH_ATTN) |
48-
| `-p, --prompt PROMPT` | prompt to start generation with |
4948
| `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
50-
| `-f, --file FNAME` | a file containing the prompt (default: none) |
51-
| `-bf, --binary-file FNAME` | binary file containing the prompt (default: none) |
5249
| `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
5350
| `--no-escape` | do not process escape sequences |
5451
| `--rope-scaling {none,linear,yarn}` | RoPE frequency scaling method, defaults to linear unless specified by the model<br/>(env: LLAMA_ARG_ROPE_SCALING_TYPE) |

0 commit comments

Comments
 (0)