@@ -1127,7 +1127,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
11271127 if fsize > (10 * 1024 * 1024 ): #dont bother with models < 10mb
11281128 cs = ctxsize
11291129 mem = gpumem
1130- if "-00001-of-0000 " in fname :
1130+ if "-00001-of-00 " in fname :
11311131 match = re .search (r'-(\d{5})-of-(\d{5})\.' , fname )
11321132 if match :
11331133 total_parts = int (match .group (2 ))
@@ -2916,6 +2916,15 @@ def transform_genparams(genparams, api_format):
29162916 # In case of any issues, just do normal gen
29172917 print ("Structured Output not valid - discarded" )
29182918 pass
2919+ elif 'json_schema' in genparams :
2920+ try :
2921+ schema = genparams .get ('json_schema' )
2922+ decoded = convert_json_to_gbnf (schema )
2923+ if decoded :
2924+ genparams ["grammar" ] = decoded
2925+ except Exception :
2926+ print ("Structured Output (old format) not valid - discarded" )
2927+ pass
29192928
29202929 message_index = 0
29212930 for message in messages_array :
@@ -3639,6 +3648,18 @@ async def handle_sse_stream(self, genparams, api_format):
36393648 tokenStr = tokenStr [:sindex ]
36403649
36413650 if tokenStr != "" or streamDone :
3651+ need_split_final_msg = True if (currfinishreason is not None and streamDone and tokenStr != "" ) else False
3652+ if need_split_final_msg : #we need to send one message without the finish reason, then send a finish reason with no msg to follow standards
3653+ if api_format == 4 : # if oai chat, set format to expected openai streaming response
3654+ event_str = json .dumps ({"id" :"koboldcpp" ,"object" :"chat.completion.chunk" ,"created" :int (time .time ()),"model" :friendlymodelname ,"choices" :[{"index" :0 ,"finish_reason" :None ,"delta" :{'role' :'assistant' ,'content' :tokenStr }}]})
3655+ await self .send_oai_sse_event (event_str )
3656+ elif api_format == 3 : # non chat completions
3657+ event_str = json .dumps ({"id" :"koboldcpp" ,"object" :"text_completion" ,"created" :int (time .time ()),"model" :friendlymodelname ,"choices" :[{"index" :0 ,"finish_reason" :None ,"text" :tokenStr }]})
3658+ await self .send_oai_sse_event (event_str )
3659+ else :
3660+ event_str = json .dumps ({"token" : tokenStr , "finish_reason" :None })
3661+ await self .send_kai_sse_event (event_str )
3662+ tokenStr = "" # now the final finish reason can be sent alone
36423663 if api_format == 4 : # if oai chat, set format to expected openai streaming response
36433664 event_str = json .dumps ({"id" :"koboldcpp" ,"object" :"chat.completion.chunk" ,"created" :int (time .time ()),"model" :friendlymodelname ,"choices" :[{"index" :0 ,"finish_reason" :currfinishreason ,"delta" :{'role' :'assistant' ,'content' :tokenStr }}]})
36443665 await self .send_oai_sse_event (event_str )
@@ -6766,7 +6787,7 @@ def save_config_gui():
67666787 def load_config_gui (): #this is used to populate the GUI with a config file, whereas load_config_cli simply overwrites cli args
67676788 file_type = [("KoboldCpp Settings" , "*.kcpps *.kcppt" )]
67686789 global runmode_untouched , zenity_permitted
6769- filename = zentk_askopenfilename (filetypes = file_type , defaultextension = ".kcppt" , initialdir = None )
6790+ filename = zentk_askopenfilename (filetypes = file_type , defaultextension = ".kcppt" , initialdir = None , title = "Select kcpps or kcppt settings config file" )
67706791 if not filename or filename == "" :
67716792 return
67726793 if not os .path .exists (filename ) or os .path .getsize (filename )< 4 or os .path .getsize (filename )> 50000000 : #for sanity, check invaid kcpps
@@ -7177,6 +7198,7 @@ def tunnel_reader():
71777198def reload_from_new_args (newargs ):
71787199 try :
71797200 args .istemplate = False
7201+ newargs = convert_invalid_args (newargs )
71807202 for key , value in newargs .items (): #do not overwrite certain values
71817203 if key not in ["remotetunnel" ,"showgui" ,"port" ,"host" ,"port_param" ,"admin" ,"adminpassword" ,"admindir" ,"admintextmodelsdir" ,"admindatadir" ,"adminallowhf" ,"ssl" ,"nocertify" ,"benchmark" ,"prompt" ,"config" ]:
71827204 setattr (args , key , value )
@@ -7202,6 +7224,7 @@ def load_config_cli(filename):
72027224 print ("Loading .kcpps configuration file..." )
72037225 with open (filename , 'r' , encoding = 'utf-8' , errors = 'ignore' ) as f :
72047226 config = json .load (f )
7227+ config = convert_invalid_args (config )
72057228 if "onready" in config :
72067229 config ["onready" ] = "" #do not allow onready commands from config
72077230 args .istemplate = False
@@ -7358,7 +7381,7 @@ def download_model_from_url(url, permitted_types=[".gguf",".safetensors", ".ggml
73587381 break
73597382 if ((url .startswith ("http://" ) or url .startswith ("https://" )) and end_ext_ok ):
73607383 dlfile = downloader_internal (url , "auto" , False , min_file_size )
7361- if handle_multipart and "-00001-of-0000 " in url : #handle multipart files up to 9 parts
7384+ if handle_multipart and "-00001-of-00 " in url : #handle multipart files up to 9 parts
73627385 match = re .search (r'-(\d{5})-of-(\d{5})\.' , url )
73637386 if match :
73647387 total_parts = int (match .group (2 ))
0 commit comments