Skip to content

Commit 7758c83

Browse files
committed
Merge branch 'remoteManagement' into remoteman_stripped
2 parents ba9eb36 + fa1a3e4 commit 7758c83

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+95933
-559
lines changed

.editorconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,7 @@ end_of_line = unset
4848
charset = unset
4949
trim_trailing_whitespace = unset
5050
insert_final_newline = unset
51+
52+
[tools/mtmd/miniaudio.h]
53+
trim_trailing_whitespace = unset
54+
insert_final_newline = unset

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ It offers the following functionalities:
2626
![418190669-f839ed18-0fb9-4319-82d8-03952c5d3f20](https://github.com/user-attachments/assets/2ee0c202-8f68-44d6-aa6f-18ae35f33857)
2727

2828
- Agent thinking (based on prompts from this cool project [here](https://github.com/Wladastic/mini_autogpt))
29-
- Improvements to TextDB, such as VectorDB (embedding) support and document support (including upload of text documents, PDFs (SevenOf9 wrote the parser), OCR using the vision model loaded, and transcription from audio)
29+
- Improvements to TextDB, such as VectorDB (embedding) support and document support (including upload of text documents, PDFs (Vic49 / SevenOf9 wrote the parser), OCR using the vision model loaded, and transcription from audio)
3030
- Export / Import of WI groups from files
3131

3232
![8d3e3a9d-08e0-4caf-9ad8-c2d806fec7e0](https://github.com/user-attachments/assets/8d3e3a9d-08e0-4caf-9ad8-c2d806fec7e0)
@@ -87,7 +87,7 @@ Using this function requires the following conditions to be met:
8787

8888
![image](https://github.com/user-attachments/assets/41ec4f1c-5698-4ef3-ba7c-6998cbc1d8f3)
8989

90-
- Upload document support (including upload of text documents, lorebooks, PDFs (SevenOf9 wrote the parser), OCR using the vision model loaded, and transcription from audio)
90+
- Upload document support (including upload of text documents, lorebooks, PDFs (Vic49 / SevenOf9 wrote the parser), OCR using the vision model loaded, and transcription from audio)
9191
- Export / Import of WI groups from files
9292

9393
## Running the fork

common/arg.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
using json = nlohmann::ordered_json;
4141

4242
std::initializer_list<enum llama_example> mmproj_examples = {
43-
LLAMA_EXAMPLE_LLAVA,
43+
LLAMA_EXAMPLE_MTMD,
4444
LLAMA_EXAMPLE_SERVER,
4545
};
4646

@@ -2234,12 +2234,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22342234
}
22352235
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
22362236
add_opt(common_arg(
2237-
{"--image"}, "FILE",
2238-
"path to an image file. use with multimodal models. Specify multiple times for batching",
2237+
{"--image", "--audio"}, "FILE",
2238+
"path to an image or audio file. use with multimodal models, can be repeated if you have multiple files\n",
22392239
[](common_params & params, const std::string & value) {
22402240
params.image.emplace_back(value);
22412241
}
2242-
).set_examples({LLAMA_EXAMPLE_LLAVA}));
2242+
).set_examples({LLAMA_EXAMPLE_MTMD}));
22432243
if (llama_supports_rpc()) {
22442244
add_opt(common_arg(
22452245
{"--rpc"}, "SERVERS",
@@ -2869,7 +2869,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28692869
[](common_params & params, const std::string & value) {
28702870
params.chat_template = value;
28712871
}
2872-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LLAVA}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
2872+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
28732873
add_opt(common_arg(
28742874
{"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
28752875
string_format(

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ enum llama_example {
7272
LLAMA_EXAMPLE_SERVER,
7373
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
7474
LLAMA_EXAMPLE_EXPORT_LORA,
75-
LLAMA_EXAMPLE_LLAVA,
75+
LLAMA_EXAMPLE_MTMD,
7676
LLAMA_EXAMPLE_LOOKUP,
7777
LLAMA_EXAMPLE_PARALLEL,
7878
LLAMA_EXAMPLE_TTS,

convert_hf_to_gguf.py

Lines changed: 118 additions & 42 deletions
Large diffs are not rendered by default.

docs/multimodal.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ llama.cpp supports multimodal input via `libmtmd`. Currently, there are 2 tools
44
- [llama-mtmd-cli](../tools/mtmd/README.md)
55
- [llama-server](../tools/server/README.md) via OpenAI-compatible `/chat/completions` API
66

7-
To enable it, can use use one of the 2 methods below:
7+
Currently, we support **image** and **audio** input. Audio is highly experimental and may have reduced quality.
8+
9+
To enable it, you can use one of the 2 methods below:
810

911
- Use `-hf` option with a supported model (see a list of pre-quantized model below)
1012
- To load a model using `-hf` while disabling multimodal, use `--no-mmproj`
@@ -37,6 +39,8 @@ Replaces the `(tool_name)` with the name of binary you want to use. For example,
3739

3840
NOTE: some models may require large context window, for example: `-c 8192`
3941

42+
**Vision models**:
43+
4044
```sh
4145
# Gemma 3
4246
(tool_name) -hf ggml-org/gemma-3-4b-it-GGUF
@@ -78,3 +82,11 @@ NOTE: some models may require large context window, for example: `-c 8192`
7882
# Llama 4 Scout
7983
(tool_name) -hf ggml-org/Llama-4-Scout-17B-16E-Instruct-GGUF
8084
```
85+
86+
**Audio models**:
87+
88+
```sh
89+
# Ultravox 0.5
90+
(tool_name) -hf ggml-org/ultravox-v0_5-llama-3_2-1b-GGUF
91+
(tool_name) -hf ggml-org/ultravox-v0_5-llama-3_1-8b-GGUF
92+
```

ggml/include/ggml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,15 +534,15 @@ extern "C" {
534534
GGML_UNARY_OP_STEP,
535535
GGML_UNARY_OP_TANH,
536536
GGML_UNARY_OP_ELU,
537+
GGML_UNARY_OP_RELU,
537538
GGML_UNARY_OP_SIGMOID,
538539
GGML_UNARY_OP_GELU,
539-
GGML_UNARY_OP_GELU_ERF,
540540
GGML_UNARY_OP_GELU_QUICK,
541541
GGML_UNARY_OP_SILU,
542542
GGML_UNARY_OP_HARDSWISH,
543543
GGML_UNARY_OP_HARDSIGMOID,
544544
GGML_UNARY_OP_EXP,
545-
GGML_UNARY_OP_RELU,
545+
GGML_UNARY_OP_GELU_ERF,
546546

547547
GGML_UNARY_OP_COUNT,
548548
};

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static __global__ void flash_attn_vec_ext_f16(
212212
}
213213
}
214214
if (__all_sync(0xFFFFFFFF, skip)) {
215+
__syncthreads();
215216
continue;
216217
}
217218
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static __global__ void flash_attn_vec_ext_f32(
217217
}
218218
}
219219
if (__all_sync(0xFFFFFFFF, skip)) {
220+
__syncthreads();
220221
continue;
221222
}
222223
#endif // GGML_USE_HIP

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
21822182
case GGML_UNARY_OP_SILU:
21832183
ggml_cuda_op_silu(ctx, dst);
21842184
break;
2185+
case GGML_UNARY_OP_GELU_ERF:
2186+
ggml_cuda_op_gelu_erf(ctx, dst);
2187+
break;
21852188
case GGML_UNARY_OP_GELU_QUICK:
21862189
ggml_cuda_op_gelu_quick(ctx, dst);
21872190
break;
@@ -2967,6 +2970,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
29672970
case GGML_UNARY_OP_SIGMOID:
29682971
case GGML_UNARY_OP_HARDSIGMOID:
29692972
case GGML_UNARY_OP_HARDSWISH:
2973+
case GGML_UNARY_OP_GELU_ERF:
29702974
case GGML_UNARY_OP_GELU_QUICK:
29712975
case GGML_UNARY_OP_TANH:
29722976
case GGML_UNARY_OP_EXP:

0 commit comments

Comments
 (0)