Skip to content

Commit 98b50a1

Browse files
author
prima
committed
Merge remote-tracking branch 'origin/concedo_experimental' into remoteManagement
2 parents d7c3049 + 5de7ed3 commit 98b50a1

38 files changed

+2491
-1929
lines changed

.github/workflows/kcpp-build-release-linux-rocm.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ on:
77
description: 'Optional commit hash to build from'
88
required: false
99
default: ''
10+
tag_name:
11+
description: 'Optional version tag (e.g. v1.57.1) for stable release file'
12+
required: false
13+
default: ''
14+
1015

1116
env:
1217
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -77,3 +82,12 @@ jobs:
7782
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
7883
run: |
7984
gh release upload rocm-rolling dist/koboldcpp-linux-x64-rocm --clobber
85+
86+
- name: Upload version pinned tagged binary
87+
if: ${{ inputs.tag_name != '' }}
88+
env:
89+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
90+
TAG_NAME: ${{ inputs.tag_name }}
91+
run: |
92+
cp dist/koboldcpp-linux-x64-rocm "dist/koboldcpp-linux-x64-rocm-${TAG_NAME}"
93+
gh release upload rocm-rolling "dist/koboldcpp-linux-x64-rocm-${TAG_NAME}" --clobber

android_install.sh

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,53 @@ elif [ -t 0 ]; then
1919
# Running interactively
2020
echo "[1] - Proceed to install and launch with default options - no model is loaded by default, but can be downloaded in the UI"
2121
echo "[2] - Proceed to install and not run."
22-
echo "[3] - Exit script"
22+
echo "[3] - Select existing model to load (Requires already installed)"
23+
echo "[4] - Exit script"
2324
echo "--------------------------------------------"
24-
read -p "Enter your choice [1-3]: " choice
25+
read -p "Enter your choice [1-4]: " choice
2526
else
2627
# Non-interactive, default to choice 1
2728
echo "Defaulting to normal install and model download. Run script interactively for other options. Install will start in 3 seconds."
2829
choice="1"
2930
sleep 3
3031
fi
3132

32-
if [ "$choice" = "3" ]; then
33+
# Determine script directory (works for both curl|sh and ./install.sh)
34+
if [ -f "$0" ]; then
35+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" # Normal execution (./install.sh)
36+
else
37+
SCRIPT_DIR="$(pwd)" # Piped execution (curl | sh)
38+
fi
39+
40+
# handle user choice
41+
if [ "$choice" = "4" ]; then
3342
echo "Exiting script. Goodbye!"
3443
exit 0
44+
elif [ "$choice" = "3" ]; then
45+
echo "[*] Searching for .gguf model files in $SCRIPT_DIR..."
46+
MODEL_FILES=$(find "$SCRIPT_DIR" -type f -maxdepth 1 -name "*.gguf" 2>/dev/null)
47+
if [ -z "$MODEL_FILES" ]; then
48+
echo "No .gguf model files found in $SCRIPT_DIR"
49+
exit 1
50+
fi
51+
echo "Available model files:"
52+
i=1
53+
for file in $MODEL_FILES; do
54+
echo "[$i] $file"
55+
eval "MODEL_$i=\"$file\""
56+
i=$((i+1))
57+
done
58+
read -p "Enter the number of the model you want to load: " model_choice
59+
# Validate input
60+
if ! [[ "$model_choice" =~ ^[0-9]+$ ]] || [ "$model_choice" -lt 1 ] || [ "$model_choice" -ge "$i" ]; then
61+
echo "Invalid selection."
62+
exit 1
63+
fi
64+
eval "SELECTED_MODEL=\$MODEL_$model_choice"
65+
echo "Now launching with model $SELECTED_MODEL"
66+
python koboldcpp.py --model $SELECTED_MODEL
67+
exit 0
68+
3569
elif [ "$choice" = "2" ]; then
3670
echo "[*] Install without model download..."
3771
INSTALL_MODEL=false
@@ -57,12 +91,6 @@ else
5791
pkg upgrade -o Dpkg::Options::="--force-confold" -y
5892
fi
5993

60-
# Determine script directory (works for both curl|sh and ./install.sh)
61-
if [ -f "$0" ]; then
62-
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" # Normal execution (./install.sh)
63-
else
64-
SCRIPT_DIR="$(pwd)" # Piped execution (curl | sh)
65-
fi
6694
# Check if koboldcpp.py already exists nearby
6795
if [ -f "$SCRIPT_DIR/koboldcpp.py" ]; then
6896
echo "[*] Detected existing koboldcpp.py in $SCRIPT_DIR"

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2061,6 +2061,14 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
20612061
bool use_batched_cublas_bf16 = src0->type == GGML_TYPE_BF16 && bf16_mma_hardware_available(cc);
20622062
bool use_batched_cublas_f32 = src0->type == GGML_TYPE_F32;
20632063

2064+
if(ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING)
2065+
{
2066+
//kcpp: https://github.com/ggml-org/llama.cpp/pull/14361 broke oldpc mode without this.
2067+
use_batched_cublas_bf16 = false;
2068+
use_batched_cublas_f32 = false;
2069+
use_batched_cublas_f16 = false;
2070+
}
2071+
20642072
if (!split && use_mul_mat_vec_f) {
20652073
// the custom F16 vector kernel can be used over batched cuBLAS GEMM
20662074
// but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention)

kcpp_adapters/AutoGuess.json

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"assistant_end": "<|im_end|>\n"
2323
}
2424
}, {
25-
"search": ["<|im_user|>user<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
25+
"search": ["<|im_user|>{{role_name}}<|im_middle|>", "<|im_assistant|>{{role_name}}<|im_middle|>", "<|im_end|>"],
2626
"name": "ChatML (Kimi)",
2727
"adapter": {
2828
"system_start": "<|im_system|>system<|im_middle|>",
@@ -106,7 +106,8 @@
106106
"system_end": "[/INST]",
107107
"user_start": "[INST] ",
108108
"user_end": "",
109-
"assistant_start": "[/INST]",
109+
"assistant_start": "[/INST] ",
110+
"assistant_gen": "[/INST]",
110111
"assistant_end": "</s>"
111112
}
112113
}, {
@@ -168,11 +169,12 @@
168169
"search": ["<|bom|>","is_last_checked_defined"],
169170
"name": "Jamba",
170171
"adapter": {
171-
"system_start": "<|bom|><|system|>",
172+
"system_start": "<|bom|><|system|> ",
172173
"system_end": "<|eom|>",
173-
"user_start": "<|bom|><|user|>",
174+
"user_start": "<|bom|><|user|> ",
174175
"user_end": "<|eom|>",
175-
"assistant_start": "<|bom|><|assistant|>",
176+
"assistant_start": "<|bom|><|assistant|> ",
177+
"assistant_gen": "<|bom|><|assistant|>",
176178
"assistant_end": "<|eom|>"
177179
}
178180
}, {
@@ -191,7 +193,7 @@
191193
"name": "OpenAI Harmony",
192194
"adapter": {
193195
"system_start": "<|start|>developer<|message|># Instructions\n\n",
194-
"system_end": "<|end|>",
196+
"system_end": "\n\n<|end|>",
195197
"user_start": "<|start|>user<|message|>",
196198
"user_end": "<|end|>",
197199
"assistant_start": "<|start|>assistant<|channel|>final<|message|>",
@@ -206,6 +208,7 @@
206208
"user_start": "User: ",
207209
"user_end": "\n\n",
208210
"assistant_start": "Assistant: ",
211+
"assistant_gen": "Assistant:",
209212
"assistant_end": "\n\n"
210213
}
211214
}, {

kcpp_adapters/Jamba.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"system_start": "<|bom|><|system|> ",
3+
"system_end": "<|eom|>",
4+
"user_start": "<|bom|><|user|> ",
5+
"user_end": "<|eom|>",
6+
"assistant_start": "<|bom|><|assistant|> ",
7+
"assistant_gen": "<|bom|><|assistant|>",
8+
"assistant_end": "<|eom|>"
9+
}

kcpp_adapters/Mistral-NonTekken.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"system_end": "",
44
"user_start": "[INST] ",
55
"user_end": "",
6-
"assistant_start": "[/INST]",
6+
"assistant_start": "[/INST] ",
7+
"assistant_gen": "[/INST]",
78
"assistant_end": "</s>"
89
}

kcpp_adapters/RWKV-World.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"system_end": "\n\n",
44
"user_start": "User: ",
55
"user_end": "\n\n",
6-
"assistant_start": "Assistant:",
6+
"assistant_start": "Assistant: ",
7+
"assistant_gen": "Assistant:",
78
"assistant_end": "\n\n"
89
}

0 commit comments

Comments
 (0)