Skip to content

Commit 4da48e4

Browse files
author
prima
committed
Merge remote-tracking branch 'origin/concedo_experimental' into remoteManagement
2 parents bf467f2 + f142221 commit 4da48e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+95917
-544
lines changed

.editorconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,7 @@ end_of_line = unset
4848
charset = unset
4949
trim_trailing_whitespace = unset
5050
insert_final_newline = unset
51+
52+
[tools/mtmd/miniaudio.h]
53+
trim_trailing_whitespace = unset
54+
insert_final_newline = unset

common/arg.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
using json = nlohmann::ordered_json;
4141

4242
std::initializer_list<enum llama_example> mmproj_examples = {
43-
LLAMA_EXAMPLE_LLAVA,
43+
LLAMA_EXAMPLE_MTMD,
4444
LLAMA_EXAMPLE_SERVER,
4545
};
4646

@@ -2234,12 +2234,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22342234
}
22352235
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
22362236
add_opt(common_arg(
2237-
{"--image"}, "FILE",
2238-
"path to an image file. use with multimodal models. Specify multiple times for batching",
2237+
{"--image", "--audio"}, "FILE",
2238+
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
22392239
[](common_params & params, const std::string & value) {
22402240
params.image.emplace_back(value);
22412241
}
2242-
).set_examples({LLAMA_EXAMPLE_LLAVA}));
2242+
).set_examples({LLAMA_EXAMPLE_MTMD}));
22432243
if (llama_supports_rpc()) {
22442244
add_opt(common_arg(
22452245
{"--rpc"}, "SERVERS",
@@ -2869,7 +2869,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28692869
[](common_params & params, const std::string & value) {
28702870
params.chat_template = value;
28712871
}
2872-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LLAVA}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
2872+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
28732873
add_opt(common_arg(
28742874
{"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
28752875
string_format(

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ enum llama_example {
7272
LLAMA_EXAMPLE_SERVER,
7373
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
7474
LLAMA_EXAMPLE_EXPORT_LORA,
75-
LLAMA_EXAMPLE_LLAVA,
75+
LLAMA_EXAMPLE_MTMD,
7676
LLAMA_EXAMPLE_LOOKUP,
7777
LLAMA_EXAMPLE_PARALLEL,
7878
LLAMA_EXAMPLE_TTS,

convert_hf_to_gguf.py

Lines changed: 118 additions & 42 deletions
Large diffs are not rendered by default.

docs/multimodal.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ llama.cpp supports multimodal input via `libmtmd`. Currently, there are 2 tools
44
- [llama-mtmd-cli](../tools/mtmd/README.md)
55
- [llama-server](../tools/server/README.md) via OpenAI-compatible `/chat/completions` API
66

7-
To enable it, can use use one of the 2 methods below:
7+
Currently, we support **image** and **audio** input. Audio is highly experimental and may have reduced quality.
8+
9+
To enable it, you can use one of the 2 methods below:
810

911
- Use `-hf` option with a supported model (see a list of pre-quantized model below)
1012
- To load a model using `-hf` while disabling multimodal, use `--no-mmproj`
@@ -37,6 +39,8 @@ Replaces the `(tool_name)` with the name of binary you want to use. For example,
3739

3840
NOTE: some models may require large context window, for example: `-c 8192`
3941

42+
**Vision models**:
43+
4044
```sh
4145
# Gemma 3
4246
(tool_name) -hf ggml-org/gemma-3-4b-it-GGUF
@@ -78,3 +82,11 @@ NOTE: some models may require large context window, for example: `-c 8192`
7882
# Llama 4 Scout
7983
(tool_name) -hf ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF
8084
```
85+
86+
**Audio models**:
87+
88+
```sh
89+
# Ultravox 0.5
90+
(tool_name) -hf ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF
91+
(tool_name) -hf ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF
92+
```

ggml/include/ggml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,15 +534,15 @@ extern "C" {
534534
GGML_UNARY_OP_STEP,
535535
GGML_UNARY_OP_TANH,
536536
GGML_UNARY_OP_ELU,
537+
GGML_UNARY_OP_RELU,
537538
GGML_UNARY_OP_SIGMOID,
538539
GGML_UNARY_OP_GELU,
539-
GGML_UNARY_OP_GELU_ERF,
540540
GGML_UNARY_OP_GELU_QUICK,
541541
GGML_UNARY_OP_SILU,
542542
GGML_UNARY_OP_HARDSWISH,
543543
GGML_UNARY_OP_HARDSIGMOID,
544544
GGML_UNARY_OP_EXP,
545-
GGML_UNARY_OP_RELU,
545+
GGML_UNARY_OP_GELU_ERF,
546546

547547
GGML_UNARY_OP_COUNT,
548548
};

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
212212
}
213213
}
214214
if (__all_sync(0xFFFFFFFF, skip)) {
215+
__syncthreads();
215216
continue;
216217
}
217218
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
217217
}
218218
}
219219
if (__all_sync(0xFFFFFFFF, skip)) {
220+
__syncthreads();
220221
continue;
221222
}
222223
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2197,6 +2197,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
21972197
case GGML_UNARY_OP_SILU:
21982198
ggml_cuda_op_silu(ctx, dst);
21992199
break;
2200+
case GGML_UNARY_OP_GELU_ERF:
2201+
ggml_cuda_op_gelu_erf(ctx, dst);
2202+
break;
22002203
case GGML_UNARY_OP_GELU_QUICK:
22012204
ggml_cuda_op_gelu_quick(ctx, dst);
22022205
break;
@@ -2982,6 +2985,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
29822985
case GGML_UNARY_OP_SIGMOID:
29832986
case GGML_UNARY_OP_HARDSIGMOID:
29842987
case GGML_UNARY_OP_HARDSWISH:
2988+
case GGML_UNARY_OP_GELU_ERF:
29852989
case GGML_UNARY_OP_GELU_QUICK:
29862990
case GGML_UNARY_OP_TANH:
29872991
case GGML_UNARY_OP_EXP:

ggml/src/ggml-cuda/unary.cu

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ static __device__ __forceinline__ float op_gelu(float x) {
2323
return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
2424
}
2525

26+
static __device__ __forceinline__ float op_gelu_erf(float x) {
27+
const float SQRT_2_INV = 0.70710678118654752440084436210484f;
28+
29+
return 0.5f*x*(1.0f + erff(x*SQRT_2_INV));
30+
}
31+
2632
static __device__ __forceinline__ float op_gelu_quick(float x) {
2733
const float GELU_QUICK_COEF = -1.702f;
2834

@@ -134,6 +140,10 @@ void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
134140
ggml_cuda_op_unary<op_gelu>(ctx, dst);
135141
}
136142

143+
void ggml_cuda_op_gelu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
144+
ggml_cuda_op_unary<op_gelu_erf>(ctx, dst);
145+
}
146+
137147
void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
138148
ggml_cuda_op_unary<op_gelu_quick>(ctx, dst);
139149
}

0 commit comments

Comments
 (0)