Skip to content

Commit 5c4ad39

Browse files
committed
added a new parameter --ratelimit that will apply per-IP based rate limiting (to help prevent abuse of public instances).
1 parent 7e35954 commit 5c4ad39

File tree

3 files changed

+59
-7
lines changed

3 files changed

+59
-7
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
1515
- LLM text generation (Supports all GGML and GGUF models, backwards compatibility with ALL past models)
1616
- Image Generation (Stable Diffusion 1.5, SDXL, SD3, Flux)
1717
- Speech-To-Text (Voice Recognition) via Whisper
18-
- Text-To-Speech (Voice Generation) via OuteTTS
18+
- Text-To-Speech (Voice Generation) via OuteTTS, Kokoro, Parler and Dia
1919
- Provides many compatible APIs endpoints for many popular webservices (KoboldCppApi OpenAiApi OllamaApi A1111ForgeApi ComfyUiApi WhisperTranscribeApi XttsApi OpenAiSpeechApi)
2020
- Bundled KoboldAI Lite UI with editing tools, save formats, memory, world info, author's note, characters, scenarios.
2121
- Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger)
2222
- Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories.
23-
- Many other features including new samplers, regex support, websearch, RAG via TextDB and more.
23+
- Many other features including new samplers, regex support, websearch, RAG via TextDB, image recognition/vision and more.
2424
- Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like Android (via Termux) and Raspberry PI).
2525
- [Need help finding a model? Read this!](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file)
2626

klite.embd

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,6 +3238,7 @@ Current version indicated by LITEVER below.
32383238
var custom_claude_model = "";
32393239
var uses_cors_proxy = false; //we start off attempting a direct connection. switch to proxy if that fails
32403240
var synchro_polled_response = null;
3241+
var synchro_polled_respimg = null; //sometimes a LLM response can also include an image.
32413242
var last_stop_reason = ""; //update stop reason if known
32423243
var synchro_pending_stream = ""; //used for storing incomplete streaming text
32433244
var streaming_was_thinking = false; //used as a switch to determine when thinking ends, to wrap output in tags
@@ -5560,10 +5561,13 @@ Current version indicated by LITEVER below.
55605561
if(x.ok)
55615562
{
55625563
return x;
5563-
}else{
5564-
throw new Error('Error occurred while SSE streaming: ' + (x.statusText));
5565-
return null;
55665564
}
5565+
return x.text().then(errorBody => {
5566+
throw new Error(`Error occurred while SSE streaming: ${x.statusText} - ${errorBody}`);
5567+
}).catch(bodyReadError => {
5568+
throw new Error(`${bodyReadError}`);
5569+
});
5570+
55675571
})
55685572
.then(resp => {
55695573
resp.body
@@ -5704,8 +5708,13 @@ Current version indicated by LITEVER below.
57045708
synchro_polled_response = data.candidates[0].output;
57055709
}else if (custom_gemini_key != "" && data.candidates != null && data.candidates.length>0 && data.candidates[0].content && data.candidates[0].content.parts != null && data.candidates[0].content.parts.length>0) {
57065710
synchro_polled_response = "";
5711+
synchro_polled_respimg = null;
57075712
for(let x=0;x<data.candidates[0].content.parts.length;++x)
57085713
{
5714+
if(!synchro_polled_respimg && data.candidates[0].content.parts[x].inlineData && data.candidates[0].content.parts[x].inlineData.data)
5715+
{
5716+
synchro_polled_respimg = data.candidates[0].content.parts[x].inlineData.data;
5717+
}
57095718
if(!data.candidates[0].content.parts[x].text)
57105719
{
57115720
continue;
@@ -5869,6 +5878,10 @@ Current version indicated by LITEVER below.
58695878
{
58705879
for(let x=0;x<event.data.candidates[0].content.parts.length;++x)
58715880
{
5881+
if(!synchro_polled_respimg && event.data.candidates[0].content.parts[x].inlineData && event.data.candidates[0].content.parts[x].inlineData.data)
5882+
{
5883+
synchro_polled_respimg = event.data.candidates[0].content.parts[x].inlineData.data;
5884+
}
58725885
if(event.data.candidates[0].content.parts[x].thought)
58735886
{
58745887
streaming_was_thinking = true;
@@ -14526,6 +14539,7 @@ Current version indicated by LITEVER below.
1452614539
pending_response_id = "";
1452714540
poll_in_progress = false;
1452814541
synchro_polled_response = null;
14542+
synchro_polled_respimg = null;
1452914543
last_stop_reason = "";
1453014544
synchro_pending_stream = "";
1453114545
streaming_was_thinking = false;
@@ -14550,6 +14564,7 @@ Current version indicated by LITEVER below.
1455014564
nextgeneratedimagemilestone = generateimagesinterval;
1455114565
pending_response_id = "";
1455214566
synchro_polled_response = null;
14567+
synchro_polled_respimg = null;
1455314568
last_stop_reason = "";
1455414569
synchro_pending_stream = "";
1455514570
streaming_was_thinking = false;
@@ -16256,7 +16271,12 @@ Current version indicated by LITEVER below.
1625616271
retry_in_progress = false;
1625716272

1625816273
//match the request for creating images in instruct modes
16259-
if(newgen!="" && localsettings.img_gen_from_instruct && localsettings.opmode == 4 && localsettings.generate_images_mode!=0 && localsettings.img_autogen_type!=2 && !newgen.includes("\n"))
16274+
let model_allow_imggen = localsettings.img_gen_from_instruct;
16275+
if(custom_gemini_key!="" && document.getElementById("custom_gemini_model").value=="gemini-2.5-flash-image-preview")
16276+
{
16277+
model_allow_imggen = false; //nano banana interferes with imggen instructions
16278+
}
16279+
if(newgen!="" && model_allow_imggen && localsettings.opmode == 4 && localsettings.generate_images_mode!=0 && localsettings.img_autogen_type!=2 && !newgen.includes("\n"))
1626016280
{
1626116281
let newgenlc = newgen.toLowerCase().trim();
1626216282
if (newgenlc.startsWith("draw ") ||
@@ -17236,6 +17256,7 @@ Current version indicated by LITEVER below.
1723617256
poll_ticks_passed = 0;
1723717257
poll_in_progress = false;
1723817258
synchro_polled_response = null;
17259+
synchro_polled_respimg = null;
1723917260
last_stop_reason = "";
1724017261
synchro_pending_stream = "";
1724117262
streaming_was_thinking = false;
@@ -17751,7 +17772,7 @@ Current version indicated by LITEVER below.
1775117772
payload["tools"] = [{"google_search": {}}];
1775217773
}
1775317774

17754-
if(mdlname.includes("gemini-2.5"))
17775+
if(mdlname.includes("gemini-2.5") && !mdlname.includes("image"))
1775517776
{
1775617777
if(!document.getElementById("usegeminithink").checked)
1775717778
{
@@ -20182,6 +20203,15 @@ Current version indicated by LITEVER below.
2018220203
render_gametext();
2018320204
sync_multiplayer(false);
2018420205
}
20206+
if(synchro_polled_respimg)
20207+
{
20208+
if(!synchro_polled_respimg.startsWith("data:image"))
20209+
{
20210+
synchro_polled_respimg = "data:image/png;base64," + synchro_polled_respimg;
20211+
}
20212+
self_upload_img(synchro_polled_respimg,"output_image");
20213+
synchro_polled_respimg = null;
20214+
}
2018520215
}
2018620216
else {
2018720217
//horde api needs to constantly poll to see if response is done

koboldcpp.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
maxhordelen = 1024
8787
modelbusy = threading.Lock()
8888
requestsinqueue = 0
89+
ratelimitlookup = {}
8990
defaultport = 5001
9091
showsamplerwarning = True
9192
showmaxctxwarning = True
@@ -3873,6 +3874,21 @@ def do_POST(self):
38733874
return
38743875

38753876
reqblocking = False
3877+
#handle rate limiting
3878+
ratelimiter = int(args.ratelimit)
3879+
if ratelimiter > 0:
3880+
client_ip = self.client_address[0]
3881+
lastdone = ratelimitlookup.get(client_ip, datetime.min)
3882+
diff = (datetime.now() - lastdone).total_seconds()
3883+
if diff < ratelimiter:
3884+
self.send_response(503)
3885+
self.end_headers(content_type='application/json')
3886+
self.wfile.write(json.dumps({"detail": {
3887+
"msg": f"You are sending requests too quickly. Please try again in {int(ratelimiter-diff)} seconds.",
3888+
"type": "service_unavailable",
3889+
}}).encode())
3890+
return
3891+
ratelimitlookup[client_ip] = datetime.now()
38763892
muint = int(args.multiuser)
38773893
if muint<=0 and ((args.whispermodel and args.whispermodel!="") or (args.sdmodel and args.sdmodel!="") or (args.ttsmodel and args.ttsmodel!="") or (args.embeddingsmodel and args.embeddingsmodel!="")):
38783894
muint = 2 # this prevents errors when using voice/img together with text
@@ -4619,6 +4635,7 @@ def hide_tooltip(event):
46194635
ssl_key_var = ctk.StringVar()
46204636
password_var = ctk.StringVar()
46214637
maxrequestsize_var = ctk.StringVar(value=str(32))
4638+
ratelimit_var = ctk.StringVar(value=str(0))
46224639

46234640
sd_model_var = ctk.StringVar()
46244641
sd_lora_var = ctk.StringVar()
@@ -5344,6 +5361,7 @@ def pickpremadetemplate():
53445361
makelabelentry(network_tab, "Password: ", password_var, 10, 200,tooltip="Enter a password required to use this instance.\nThis key will be required for all text endpoints.\nImage endpoints are not secured.")
53455362

53465363
makelabelentry(network_tab, "Max Req. Size (MB):", maxrequestsize_var, row=20, width=50, tooltip="Specify a max request payload size. Any requests to the server larger than this size will be dropped. Do not change if unsure.")
5364+
makelabelentry(network_tab, "IP Rate Limiter (s):", ratelimit_var, row=22, width=50, tooltip="Rate limits each IP to allow a new request once per X seconds. Do not change if unsure.")
53475365

53485366

53495367
# Horde Tab
@@ -5632,6 +5650,7 @@ def export_vars():
56325650
args.multiplayer = (multiplayer_var.get()==1)
56335651
args.websearch = (websearch_var.get()==1)
56345652
args.maxrequestsize = int(maxrequestsize_var.get()) if maxrequestsize_var.get()!="" else 32
5653+
args.ratelimit = int(ratelimit_var.get()) if ratelimit_var.get()!="" else 0
56355654

56365655
if usehorde_var.get() != 0:
56375656
args.hordemodelname = horde_name_var.get()
@@ -5876,6 +5895,8 @@ def import_vars(dict):
58765895
usehorde_var.set(1 if ("hordekey" in dict and dict["hordekey"]) else 0)
58775896
if "maxrequestsize" in dict and dict["maxrequestsize"]:
58785897
maxrequestsize_var.set(dict["maxrequestsize"])
5898+
if "ratelimit" in dict and dict["ratelimit"]:
5899+
ratelimit_var.set(dict["ratelimit"])
58795900

58805901
sd_model_var.set(dict["sdmodel"] if ("sdmodel" in dict and dict["sdmodel"]) else "")
58815902
sd_clamped_var.set(int(dict["sdclamped"]) if ("sdclamped" in dict and dict["sdclamped"]) else 0)
@@ -7650,6 +7671,7 @@ def range_checker(arg: str):
76507671
advparser.add_argument("--draftgpulayers","--gpu-layers-draft","--n-gpu-layers-draft","-ngld", metavar=('[layers]'), help="How many layers to offload to GPU for the draft model (default=full offload)", type=int, default=999)
76517672
advparser.add_argument("--draftgpusplit", help="GPU layer distribution ratio for draft model (default=same as main). Only works if multi-GPUs selected for MAIN model and tensor_split is set!", metavar=('[Ratios]'), type=float, nargs='+')
76527673
advparser.add_argument("--password", metavar=('[API key]'), help="Enter a password required to use this instance. This key will be required for all text endpoints. Image endpoints are not secured.", default=None)
7674+
advparser.add_argument("--ratelimit", metavar=('[seconds]'), help="If enabled, rate limit generative request by IP address. Each IP can only send a new request once per X seconds.", type=int, default=0)
76537675
advparser.add_argument("--ignoremissing", help="Ignores all missing non-essential files, just skipping them instead.", action='store_true')
76547676
advparser.add_argument("--chatcompletionsadapter", metavar=('[filename]'), help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="AutoGuess")
76557677
advparser.add_argument("--flashattention","--flash-attn","-fa", help="Enables flash attention.", action='store_true')

0 commit comments

Comments
 (0)