Nexesenex
diff --git a/‎.github/workflows/kcpp-build-release-linux-rocm.yaml‎
Lines changed: 14 additions & 0 deletions b/‎.github/workflows/kcpp-build-release-linux-rocm.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎android_install.sh‎
Lines changed: 37 additions & 9 deletions b/‎android_install.sh‎
Lines changed: 37 additions & 9 deletions
diff --git a/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 8 additions & 0 deletions b/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎kcpp_adapters/AutoGuess.json‎
Lines changed: 9 additions & 6 deletions b/‎kcpp_adapters/AutoGuess.json‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎kcpp_adapters/Jamba.json‎
Lines changed: 9 additions & 0 deletions b/‎kcpp_adapters/Jamba.json‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎kcpp_adapters/Mistral-NonTekken.json‎
Lines changed: 2 additions & 1 deletion b/‎kcpp_adapters/Mistral-NonTekken.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎kcpp_adapters/RWKV-World.json‎
Lines changed: 2 additions & 1 deletion b/‎kcpp_adapters/RWKV-World.json‎
Lines changed: 2 additions & 1 deletion
@@ -7,6 +7,11 @@ on:
         description: 'Optional commit hash to build from'
         required: false
         default: ''
+      tag_name:
+        description: 'Optional version tag (e.g. v1.57.1) for stable release file'
+        required: false
+        default: ''
+
 
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -77,3 +82,12 @@ jobs:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           gh release upload rocm-rolling dist/koboldcpp-linux-x64-rocm --clobber
+
+      - name: Upload version pinned tagged binary
+        if: ${{ inputs.tag_name != '' }}
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          TAG_NAME: ${{ inputs.tag_name }}
+        run: |
+          cp dist/koboldcpp-linux-x64-rocm "dist/koboldcpp-linux-x64-rocm-${TAG_NAME}"
+          gh release upload rocm-rolling "dist/koboldcpp-linux-x64-rocm-${TAG_NAME}" --clobber
@@ -19,19 +19,53 @@ elif [ -t 0 ]; then
     # Running interactively
     echo "[1] - Proceed to install and launch with default options - no model is loaded by default, but can be downloaded in the UI"
     echo "[2] - Proceed to install and not run."
-    echo "[3] - Exit script"
+    echo "[3] - Select existing model to load (Requires already installed)"
+    echo "[4] - Exit script"
     echo "--------------------------------------------"
-    read -p "Enter your choice [1-3]: " choice
+    read -p "Enter your choice [1-4]: " choice
 else
     # Non-interactive, default to choice 1
     echo "Defaulting to normal install and model download. Run script interactively for other options. Install will start in 3 seconds."
     choice="1"
     sleep 3
 fi
 
-if [ "$choice" = "3" ]; then
+# Determine script directory (works for both curl|sh and ./install.sh)
+if [ -f "$0" ]; then
+    SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"  # Normal execution (./install.sh)
+else
+    SCRIPT_DIR="$(pwd)"  # Piped execution (curl | sh)
+fi
+
+# handle user choice
+if [ "$choice" = "4" ]; then
     echo "Exiting script. Goodbye!"
     exit 0
+elif [ "$choice" = "3" ]; then
+    echo "[*] Searching for .gguf model files in $SCRIPT_DIR..."
+    MODEL_FILES=$(find "$SCRIPT_DIR" -type f -maxdepth 1 -name "*.gguf" 2>/dev/null)
+    if [ -z "$MODEL_FILES" ]; then
+        echo "No .gguf model files found in $SCRIPT_DIR"
+        exit 1
+    fi
+    echo "Available model files:"
+    i=1
+    for file in $MODEL_FILES; do
+        echo "[$i] $file"
+        eval "MODEL_$i=\"$file\""
+        i=$((i+1))
+    done
+    read -p "Enter the number of the model you want to load: " model_choice
+    # Validate input
+    if ! [[ "$model_choice" =~ ^[0-9]+$ ]] || [ "$model_choice" -lt 1 ] || [ "$model_choice" -ge "$i" ]; then
+        echo "Invalid selection."
+        exit 1
+    fi
+    eval "SELECTED_MODEL=\$MODEL_$model_choice"
+    echo "Now launching with model $SELECTED_MODEL"
+    python koboldcpp.py --model $SELECTED_MODEL
+    exit 0
+
 elif [ "$choice" = "2" ]; then
     echo "[*] Install without model download..."
     INSTALL_MODEL=false
@@ -57,12 +91,6 @@ else
     pkg upgrade -o Dpkg::Options::="--force-confold" -y
 fi
 
-# Determine script directory (works for both curl|sh and ./install.sh)
-if [ -f "$0" ]; then
-    SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"  # Normal execution (./install.sh)
-else
-    SCRIPT_DIR="$(pwd)"  # Piped execution (curl | sh)
-fi
 # Check if koboldcpp.py already exists nearby
 if [ -f "$SCRIPT_DIR/koboldcpp.py" ]; then
     echo "[*] Detected existing koboldcpp.py in $SCRIPT_DIR"
 
@@ -2061,6 +2061,14 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
     bool use_batched_cublas_bf16 = src0->type == GGML_TYPE_BF16 && bf16_mma_hardware_available(cc);
     bool use_batched_cublas_f32  = src0->type == GGML_TYPE_F32;
 
+    if(ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING)
+    {
+        //kcpp: https://github.com/ggml-org/llama.cpp/pull/14361 broke oldpc mode without this.
+        use_batched_cublas_bf16 = false;
+        use_batched_cublas_f32 = false;
+        use_batched_cublas_f16 = false;
+    }
+
     if (!split && use_mul_mat_vec_f) {
         // the custom F16 vector kernel can be used over batched cuBLAS GEMM
         // but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention)
 
@@ -22,7 +22,7 @@
         "assistant_end": "<|im_end|>\n"
     }
 }, {
-    "search": ["<|im_user|>user<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
+    "search": ["<|im_user|>{{role_name}}<|im_middle|>", "<|im_assistant|>{{role_name}}<|im_middle|>", "<|im_end|>"],
     "name": "ChatML (Kimi)",
     "adapter": {
         "system_start": "<|im_system|>system<|im_middle|>",
@@ -106,7 +106,8 @@
         "system_end": "[/INST]",
         "user_start": "[INST] ",
         "user_end": "",
-        "assistant_start": "[/INST]",
+        "assistant_start": "[/INST] ",
+        "assistant_gen": "[/INST]",
         "assistant_end": "</s>"
     }
 }, {
@@ -168,11 +169,12 @@
     "search": ["<|bom|>","is_last_checked_defined"],
     "name": "Jamba",
     "adapter": {
-        "system_start": "<|bom|><|system|>",
+        "system_start": "<|bom|><|system|> ",
         "system_end": "<|eom|>",
-        "user_start": "<|bom|><|user|>",
+        "user_start": "<|bom|><|user|> ",
         "user_end": "<|eom|>",
-        "assistant_start": "<|bom|><|assistant|>",
+        "assistant_start": "<|bom|><|assistant|> ",
+        "assistant_gen": "<|bom|><|assistant|>",
         "assistant_end": "<|eom|>"
     }
 }, {
@@ -191,7 +193,7 @@
     "name": "OpenAI Harmony",
     "adapter": {
         "system_start": "<|start|>developer<|message|># Instructions\n\n",
-        "system_end": "<|end|>",
+        "system_end": "\n\n<|end|>",
         "user_start": "<|start|>user<|message|>",
         "user_end": "<|end|>",
         "assistant_start": "<|start|>assistant<|channel|>final<|message|>",
@@ -206,6 +208,7 @@
         "user_start": "User: ",
         "user_end": "\n\n",
         "assistant_start": "Assistant: ",
+        "assistant_gen": "Assistant:",
         "assistant_end": "\n\n"
     }
 }, {
 
@@ -0,0 +1,9 @@
+{
+  "system_start": "<|bom|><|system|> ",
+  "system_end": "<|eom|>",
+  "user_start": "<|bom|><|user|> ",
+  "user_end": "<|eom|>",
+  "assistant_start": "<|bom|><|assistant|> ",
+  "assistant_gen": "<|bom|><|assistant|>",
+  "assistant_end": "<|eom|>"
+}
@@ -3,6 +3,7 @@
   "system_end": "",
   "user_start": "[INST] ",
   "user_end": "",
-  "assistant_start": "[/INST]",
+  "assistant_start": "[/INST] ",
+  "assistant_gen": "[/INST]",
   "assistant_end": "</s>"
 }
@@ -3,6 +3,7 @@
   "system_end": "\n\n",
   "user_start": "User: ",
   "user_end": "\n\n",
-  "assistant_start": "Assistant:",
+  "assistant_start": "Assistant: ",
+  "assistant_gen": "Assistant:",
   "assistant_end": "\n\n"
 }
Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`"system_end": "",`
`4`	`4`	`"user_start": "[INST] ",`
`5`	`5`	`"user_end": "",`
`6`		`- "assistant_start": "[/INST]",`
	`6`	`+ "assistant_start": "[/INST] ",`
	`7`	`+ "assistant_gen": "[/INST]",`
`7`	`8`	`"assistant_end": "</s>"`
`8`	`9`	`}`
Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`"system_end": "\n\n",`
`4`	`4`	`"user_start": "User: ",`
`5`	`5`	`"user_end": "\n\n",`
`6`		`- "assistant_start": "Assistant:",`
	`6`	`+ "assistant_start": "Assistant: ",`
	`7`	`+ "assistant_gen": "Assistant:",`
`7`	`8`	`"assistant_end": "\n\n"`
`8`	`9`	`}`