Skip to content

Commit 5fea3e3

Browse files
committed
Merge branch 'heads/RMv1.14.2' into crokeso
2 parents e21331f + 6a03fe5 commit 5fea3e3

File tree

129 files changed

+38044
-12014
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

129 files changed

+38044
-12014
lines changed

CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ add_compile_definitions(GGML_USE_CPU)
117117
add_compile_definitions(GGML_USE_CPU_AARCH64)
118118
add_compile_definitions(NOMINMAX)
119119

120+
if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12)
121+
add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
122+
endif()
123+
120124
if (MSVC)
121125
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
122126
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
@@ -968,7 +972,9 @@ add_library(common2
968972
src/unicode.cpp
969973
src/unicode-data.cpp
970974
otherarch/utils.cpp
971-
otherarch/utils.h)
975+
otherarch/utils.h
976+
tools/mtmd/mtmd-audio.cpp
977+
tools/mtmd/mtmd-audio.h)
972978
target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
973979
target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
974980
target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})

Makefile

Lines changed: 119 additions & 173 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provid
308308
- Other models for Whisper (speech recognition), Image Generation, Text to Speech or Image Recognition [can be found on the Wiki](https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported)
309309

310310
## Improving Performance
311-
- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
311+
- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecuda` flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
312312
- **GPU Layer Offloading**: Add `--gpulayers` to offload model layers to the GPU. The more layers you offload to VRAM, the faster generation speed will become. Experiment to determine number of layers to offload, and reduce by a few if you run out of memory.
313313
- **Increasing Context Size**: Use `--contextsize (number)` to increase context size, allowing the model to read more text. Note that you may also need to increase the max context in the KoboldAI Lite UI as well (click and edit the number text field).
314314
- **Old CPU Compatibility**: If you are having crashes or issues, you can try running in a non-avx2 compatibility mode by adding the `--noavx2` flag. You can also try reducing your `--blasbatchssize` (set -1 to avoid batching)

colab.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@
175175
" print(f\"Please open the above link, and input the password '{ltpw}'\\nYour KoboldCpp will start shortly...\")\n",
176176
" print(\"=================\")\n",
177177
" !sleep 10\n",
178-
"!./koboldcpp_linux $ModelCommand --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
178+
"!./koboldcpp_linux $ModelCommand --usecuda 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
179179
]
180180
}
181181
],

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2737,6 +2737,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27372737
params.public_path = value;
27382738
}
27392739
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
2740+
add_opt(common_arg(
2741+
{"--api-prefix"}, "PREFIX",
2742+
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
2743+
[](common_params & params, const std::string & value) {
2744+
params.api_prefix = value;
2745+
}
2746+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
27402747
add_opt(common_arg(
27412748
{"--no-webui"},
27422749
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ struct common_params {
366366

367367
std::string hostname = "127.0.0.1";
368368
std::string public_path = ""; // NOLINT
369+
std::string api_prefix = ""; // NOLINT
369370
std::string chat_template = ""; // NOLINT
370371
bool use_jinja = false; // NOLINT
371372
bool enable_chat_template = true;

0 commit comments

Comments
 (0)