Nexesenex
diff --git a/‎CMakeLists.txt‎
Lines changed: 7 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 119 additions & 173 deletions b/‎Makefile‎
Lines changed: 119 additions & 173 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎colab.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎colab.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions b/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎common/common.h‎
Lines changed: 1 addition & 0 deletions b/‎common/common.h‎
Lines changed: 1 addition & 0 deletions
@@ -117,6 +117,10 @@ add_compile_definitions(GGML_USE_CPU)
 add_compile_definitions(GGML_USE_CPU_AARCH64)
 add_compile_definitions(NOMINMAX)
 
+if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12)
+    add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12)
+endif()
+
 if (MSVC)
     add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
     add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
@@ -968,7 +972,9 @@ add_library(common2
             src/unicode.cpp
             src/unicode-data.cpp
             otherarch/utils.cpp
-            otherarch/utils.h)
+            otherarch/utils.h
+            tools/mtmd/mtmd-audio.cpp
+            tools/mtmd/mtmd-audio.h)
 target_include_directories(common2 PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools ./vendor/stb ./vendor ./otherarch/sdcpp ./otherarch/sdcpp/thirdparty ./tools ./common)
 target_compile_features(common2 PUBLIC cxx_std_17) # don't bump
 target_link_libraries(common2 PRIVATE ggml ${LLAMA_EXTRA_LIBS})
 
@@ -308,7 +308,7 @@ KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provid
 - Other models for Whisper (speech recognition), Image Generation, Text to Speech or Image Recognition [can be found on the Wiki](https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported)
 
 ## Improving Performance
-- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas`  flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
+- **GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecuda`  flag (Nvidia Only), or `--usevulkan` (Any GPU), make sure you select the correct .exe with CUDA support.
 - **GPU Layer Offloading**: Add `--gpulayers` to offload model layers to the GPU. The more layers you offload to VRAM, the faster generation speed will become. Experiment to determine number of layers to offload, and reduce by a few if you run out of memory.
 - **Increasing Context Size**: Use `--contextsize (number)` to increase context size, allowing the model to read more text. Note that you may also need to increase the max context in the KoboldAI Lite UI as well (click and edit the number text field).
 - **Old CPU Compatibility**: If you are having crashes or issues, you can try running in a non-avx2 compatibility mode by adding the `--noavx2` flag. You can also try reducing your `--blasbatchssize` (set -1 to avoid batching)
 
@@ -175,7 +175,7 @@
     "  print(f\"Please open the above link, and input the password '{ltpw}'\\nYour KoboldCpp will start shortly...\")\n",
     "  print(\"=================\")\n",
     "  !sleep 10\n",
-    "!./koboldcpp_linux $ModelCommand --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
+    "!./koboldcpp_linux $ModelCommand --usecuda 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
    ]
   }
  ],
 
@@ -2737,6 +2737,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.public_path = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
+    add_opt(common_arg(
+        {"--api-prefix"}, "PREFIX",
+        string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
+        [](common_params & params, const std::string & value) {
+            params.api_prefix = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
     add_opt(common_arg(
         {"--no-webui"},
         string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
 
@@ -366,6 +366,7 @@ struct common_params {
 
     std::string hostname      = "127.0.0.1";
     std::string public_path   = "";                                                                         // NOLINT
+    std::string api_prefix    = "";                                                                         // NOLINT
     std::string chat_template = "";                                                                         // NOLINT
     bool use_jinja = false;                                                                                 // NOLINT
     bool enable_chat_template = true;
Original file line number	Diff line number	Diff line change
`@@ -175,7 +175,7 @@`
`175`	`175`	`" print(f\"Please open the above link, and input the password '{ltpw}'\\nYour KoboldCpp will start shortly...\")\n",`
`176`	`176`	`" print(\"=================\")\n",`
`177`	`177`	`" !sleep 10\n",`
`178`		`- "!./koboldcpp_linux $ModelCommand --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"`
	`178`	`+ "!./koboldcpp_linux $ModelCommand --usecuda 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"`
`179`	`179`	`]`
`180`	`180`	`}`
`181`	`181`	`],`