Skip to content

Commit f97a844

Browse files
author
prima
committed
Merge remote-tracking branch 'origin/concedo_experimental' into remoteManagement
2 parents 19a6f48 + 5c4ad39 commit f97a844

File tree

3 files changed

+59
-7
lines changed

3 files changed

+59
-7
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,12 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
139139
- LLM text generation (Supports all GGML and GGUF models, backwards compatibility with ALL past models)
140140
- Image Generation (Stable Diffusion 1.5, SDXL, SD3, Flux)
141141
- Speech-To-Text (Voice Recognition) via Whisper
142-
- Text-To-Speech (Voice Generation) via OuteTTS
142+
- Text-To-Speech (Voice Generation) via OuteTTS, Kokoro, Parler and Dia
143143
- Provides many compatible APIs endpoints for many popular webservices (KoboldCppApi OpenAiApi OllamaApi A1111ForgeApi ComfyUiApi WhisperTranscribeApi XttsApi OpenAiSpeechApi)
144144
- Bundled KoboldAI Lite UI with editing tools, save formats, memory, world info, author's note, characters, scenarios.
145145
- Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger)
146146
- Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories.
147-
- Many other features including new samplers, regex support, websearch, RAG via TextDB and more.
147+
- Many other features including new samplers, regex support, websearch, RAG via TextDB, image recognition/vision and more.
148148
- Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like Android (via Termux) and Raspberry PI).
149149
- [Need help finding a model? Read this!](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file)
150150

klite.embd

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3466,6 +3466,7 @@ Current version indicated by LITEVER below.
34663466
var custom_claude_model = "";
34673467
var uses_cors_proxy = false; //we start off attempting a direct connection. switch to proxy if that fails
34683468
var synchro_polled_response = null;
3469+
var synchro_polled_respimg = null; //sometimes a LLM response can also include an image.
34693470
var last_stop_reason = ""; //update stop reason if known
34703471
var synchro_pending_stream = ""; //used for storing incomplete streaming text
34713472
var streaming_was_thinking = false; //used as a switch to determine when thinking ends, to wrap output in tags
@@ -5912,10 +5913,13 @@ Current version indicated by LITEVER below.
59125913
if(x.ok)
59135914
{
59145915
return x;
5915-
}else{
5916-
throw new Error('Error occurred while SSE streaming: ' + (x.statusText));
5917-
return null;
59185916
}
5917+
return x.text().then(errorBody => {
5918+
throw new Error(`Error occurred while SSE streaming: ${x.statusText} - ${errorBody}`);
5919+
}).catch(bodyReadError => {
5920+
throw new Error(`${bodyReadError}`);
5921+
});
5922+
59195923
})
59205924
.then(resp => {
59215925
resp.body
@@ -6056,8 +6060,13 @@ Current version indicated by LITEVER below.
60566060
synchro_polled_response = data.candidates[0].output;
60576061
}else if (custom_gemini_key != "" && data.candidates != null && data.candidates.length>0 && data.candidates[0].content && data.candidates[0].content.parts != null && data.candidates[0].content.parts.length>0) {
60586062
synchro_polled_response = "";
6063+
synchro_polled_respimg = null;
60596064
for(let x=0;x<data.candidates[0].content.parts.length;++x)
60606065
{
6066+
if(!synchro_polled_respimg && data.candidates[0].content.parts[x].inlineData && data.candidates[0].content.parts[x].inlineData.data)
6067+
{
6068+
synchro_polled_respimg = data.candidates[0].content.parts[x].inlineData.data;
6069+
}
60616070
if(!data.candidates[0].content.parts[x].text)
60626071
{
60636072
continue;
@@ -6221,6 +6230,10 @@ Current version indicated by LITEVER below.
62216230
{
62226231
for(let x=0;x<event.data.candidates[0].content.parts.length;++x)
62236232
{
6233+
if(!synchro_polled_respimg && event.data.candidates[0].content.parts[x].inlineData && event.data.candidates[0].content.parts[x].inlineData.data)
6234+
{
6235+
synchro_polled_respimg = event.data.candidates[0].content.parts[x].inlineData.data;
6236+
}
62246237
if(event.data.candidates[0].content.parts[x].thought)
62256238
{
62266239
streaming_was_thinking = true;
@@ -15027,6 +15040,7 @@ Current version indicated by LITEVER below.
1502715040
pending_response_id = "";
1502815041
poll_in_progress = false;
1502915042
synchro_polled_response = null;
15043+
synchro_polled_respimg = null;
1503015044
last_stop_reason = "";
1503115045
synchro_pending_stream = "";
1503215046
streaming_was_thinking = false;
@@ -15052,6 +15066,7 @@ Current version indicated by LITEVER below.
1505215066
nextgeneratedimagemilestone = generateimagesinterval;
1505315067
pending_response_id = "";
1505415068
synchro_polled_response = null;
15069+
synchro_polled_respimg = null;
1505515070
last_stop_reason = "";
1505615071
synchro_pending_stream = "";
1505715072
streaming_was_thinking = false;
@@ -16778,7 +16793,12 @@ Current version indicated by LITEVER below.
1677816793
retry_in_progress = false;
1677916794

1678016795
//match the request for creating images in instruct modes
16781-
if(newgen!="" && localsettings.img_gen_from_instruct && localsettings.opmode == 4 && localsettings.generate_images_mode!=0 && localsettings.img_autogen_type!=2 && !newgen.includes("\n"))
16796+
let model_allow_imggen = localsettings.img_gen_from_instruct;
16797+
if(custom_gemini_key!="" && document.getElementById("custom_gemini_model").value=="gemini-2.5-flash-image-preview")
16798+
{
16799+
model_allow_imggen = false; //nano banana interferes with imggen instructions
16800+
}
16801+
if(newgen!="" && model_allow_imggen && localsettings.opmode == 4 && localsettings.generate_images_mode!=0 && localsettings.img_autogen_type!=2 && !newgen.includes("\n"))
1678216802
{
1678316803
let newgenlc = newgen.toLowerCase().trim();
1678416804
if (newgenlc.startsWith("draw ") ||
@@ -17746,6 +17766,7 @@ Current version indicated by LITEVER below.
1774617766
poll_ticks_passed = 0;
1774717767
poll_in_progress = false;
1774817768
synchro_polled_response = null;
17769+
synchro_polled_respimg = null;
1774917770
last_stop_reason = "";
1775017771
synchro_pending_stream = "";
1775117772
streaming_was_thinking = false;
@@ -18290,7 +18311,7 @@ Current version indicated by LITEVER below.
1829018311
payload["tools"] = [{"google_search": {}}];
1829118312
}
1829218313

18293-
if(mdlname.includes("gemini-2.5"))
18314+
if(mdlname.includes("gemini-2.5") && !mdlname.includes("image"))
1829418315
{
1829518316
if(!document.getElementById("usegeminithink").checked)
1829618317
{
@@ -20708,6 +20729,15 @@ Current version indicated by LITEVER below.
2070820729
render_gametext();
2070920730
sync_multiplayer(false);
2071020731
}
20732+
if(synchro_polled_respimg)
20733+
{
20734+
if(!synchro_polled_respimg.startsWith("data:image"))
20735+
{
20736+
synchro_polled_respimg = "data:image/png;base64," + synchro_polled_respimg;
20737+
}
20738+
self_upload_img(synchro_polled_respimg,"output_image");
20739+
synchro_polled_respimg = null;
20740+
}
2071120741
}
2071220742
else {
2071320743
//horde api needs to constantly poll to see if response is done

koboldcpp.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
maxhordelen = 1024
9292
modelbusy = threading.Lock()
9393
requestsinqueue = 0
94+
ratelimitlookup = {}
9495
defaultport = 5001
9596
showsamplerwarning = True
9697
showmaxctxwarning = True
@@ -4904,6 +4905,21 @@ def do_POST(self):
49044905
return
49054906

49064907
reqblocking = False
4908+
#handle rate limiting
4909+
ratelimiter = int(args.ratelimit)
4910+
if ratelimiter > 0:
4911+
client_ip = self.client_address[0]
4912+
lastdone = ratelimitlookup.get(client_ip, datetime.min)
4913+
diff = (datetime.now() - lastdone).total_seconds()
4914+
if diff < ratelimiter:
4915+
self.send_response(503)
4916+
self.end_headers(content_type='application/json')
4917+
self.wfile.write(json.dumps({"detail": {
4918+
"msg": f"You are sending requests too quickly. Please try again in {int(ratelimiter-diff)} seconds.",
4919+
"type": "service_unavailable",
4920+
}}).encode())
4921+
return
4922+
ratelimitlookup[client_ip] = datetime.now()
49074923
muint = int(args.multiuser)
49084924
if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="") or (args.ttsmodel and args.ttsmodel!="") or (args.embeddingsmodel and args.embeddingsmodel!="")):
49094925
muint = 2 # this prevents errors when using voice/img together with text
@@ -5667,6 +5683,7 @@ def hide_tooltip(event):
56675683
ssl_key_var = ctk.StringVar()
56685684
password_var = ctk.StringVar()
56695685
maxrequestsize_var = ctk.StringVar(value=str(32))
5686+
ratelimit_var = ctk.StringVar(value=str(0))
56705687

56715688
sd_model_var = ctk.StringVar()
56725689
sd_lora_var = ctk.StringVar()
@@ -6395,6 +6412,7 @@ def pickpremadetemplate():
63956412
makelabelentry(network_tab, "Password: ", password_var, 10, 200,tooltip="Enter a password required to use this instance.\nThis key will be required for all text endpoints.\nImage endpoints are not secured.")
63966413

63976414
makelabelentry(network_tab, "Max Req. Size (MB):", maxrequestsize_var, row=20, width=50, tooltip="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.")
6415+
makelabelentry(network_tab, "IP Rate Limiter (s):", ratelimit_var, row=22, width=50, tooltip="Rate limits each IP to allow a new request once per X seconds. Do not change if unsure.")
63986416

63996417

64006418
# Horde Tab
@@ -6686,6 +6704,7 @@ def export_vars():
66866704
args.multiplayer = (multiplayer_var.get()==1)
66876705
args.websearch = (websearch_var.get()==1)
66886706
args.maxrequestsize = int(maxrequestsize_var.get()) if maxrequestsize_var.get()!="" else 32
6707+
args.ratelimit = int(ratelimit_var.get()) if ratelimit_var.get()!="" else 0
66896708

66906709
if usehorde_var.get() != 0:
66916710
args.hordemodelname = horde_name_var.get()
@@ -6933,6 +6952,8 @@ def import_vars(dict):
69336952
usehorde_var.set(1 if ("hordekey" in dict and dict["hordekey"]) else 0)
69346953
if "maxrequestsize" in dict and dict["maxrequestsize"]:
69356954
maxrequestsize_var.set(dict["maxrequestsize"])
6955+
if "ratelimit" in dict and dict["ratelimit"]:
6956+
ratelimit_var.set(dict["ratelimit"])
69366957

69376958
sd_model_var.set(dict["sdmodel"] if ("sdmodel" in dict and dict["sdmodel"]) else "")
69386959
sd_clamped_var.set(int(dict["sdclamped"]) if ("sdclamped" in dict and dict["sdclamped"]) else 0)
@@ -8744,6 +8765,7 @@ def range_checker(arg: str):
87448765
advparser.add_argument("--draftgpulayers","--gpu-layers-draft","--n-gpu-layers-draft","-ngld", metavar=('[layers]'), help="How many layers to offload to GPU for the draft model (default=full offload)", type=int, default=999)
87458766
advparser.add_argument("--draftgpusplit", help="GPU layer distribution ratio for draft model (default=same as main). Only works if multi-GPUs selected for MAIN model and tensor_split is set!", metavar=('[Ratios]'), type=float, nargs='+')
87468767
advparser.add_argument("--password", metavar=('[API key]'), help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
8768+
advparser.add_argument("--ratelimit", metavar=('[seconds]'), help="If enabled, rate limit generative request by IP address. Each IP can only send a new request once per X seconds.", type=int, default=0)
87478769
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
87488770
advparser.add_argument("--chatcompletionsadapter", metavar=('[filename]'), help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="AutoGuess")
87498771
advparser.add_argument("--flashattention","--flash-attn","-fa", help="Enables flash attention.", action='store_true')

0 commit comments

Comments
 (0)