7676lastgeneratedcomfyimg = b''
7777lastuploadedcomfyimg = b''
7878fullsdmodelpath = "" #if empty, it's not initialized
79- mmprojpath = "" #if empty, it's not initialized
8079password = "" #if empty, no auth key required
8180fullwhispermodelpath = "" #if empty, it's not initialized
8281ttsmodelpath = "" #if empty, not initialized
106105importvars_in_progress = False
107106has_multiplayer = False
108107has_audio_support = False
108+ has_vision_support = False
109109savedata_obj = None
110110multiplayer_story_data_compressed = None #stores the full compressed story of the current multiplayer session
111111multiplayer_turn_major = 1 # to keep track of when a client needs to sync their stories
@@ -539,6 +539,7 @@ def init_library():
539539 handle .get_stream_count .restype = ctypes .c_int
540540 handle .has_finished .restype = ctypes .c_bool
541541 handle .has_audio_support .restype = ctypes .c_bool
542+ handle .has_vision_support .restype = ctypes .c_bool
542543 handle .get_last_eval_time .restype = ctypes .c_float
543544 handle .get_last_process_time .restype = ctypes .c_float
544545 handle .get_last_token_count .restype = ctypes .c_int
@@ -891,18 +892,17 @@ def convert_json_to_gbnf(json_obj):
891892 return ""
892893
893894def get_capabilities ():
894- global savedata_obj , has_multiplayer , KcppVersion , friendlymodelname , friendlysdmodelname , fullsdmodelpath , mmprojpath , password , fullwhispermodelpath , ttsmodelpath , embeddingsmodelpath , has_audio_support
895+ global savedata_obj , has_multiplayer , KcppVersion , friendlymodelname , friendlysdmodelname , fullsdmodelpath , password , fullwhispermodelpath , ttsmodelpath , embeddingsmodelpath , has_audio_support , has_vision_support
895896 has_llm = not (friendlymodelname == "inactive" )
896897 has_txt2img = not (friendlysdmodelname == "inactive" or fullsdmodelpath == "" )
897- has_vision = (mmprojpath != "" )
898898 has_password = (password != "" )
899899 has_whisper = (fullwhispermodelpath != "" )
900900 has_search = True if args .websearch else False
901901 has_tts = (ttsmodelpath != "" )
902902 has_embeddings = (embeddingsmodelpath != "" )
903903 has_guidance = True if args .enableguidance else False
904904 admin_type = (2 if args .admin and args .admindir and args .adminpassword else (1 if args .admin and args .admindir else 0 ))
905- return {"result" :"KoboldCpp" , "version" :KcppVersion , "protected" :has_password , "llm" :has_llm , "txt2img" :has_txt2img ,"vision" :has_vision ,"audio" :has_audio_support ,"transcribe" :has_whisper ,"multiplayer" :has_multiplayer ,"websearch" :has_search ,"tts" :has_tts , "embeddings" :has_embeddings , "savedata" :(savedata_obj is not None ), "admin" : admin_type , "guidance" : has_guidance }
905+ return {"result" :"KoboldCpp" , "version" :KcppVersion , "protected" :has_password , "llm" :has_llm , "txt2img" :has_txt2img ,"vision" :has_vision_support ,"audio" :has_audio_support ,"transcribe" :has_whisper ,"multiplayer" :has_multiplayer ,"websearch" :has_search ,"tts" :has_tts , "embeddings" :has_embeddings , "savedata" :(savedata_obj is not None ), "admin" : admin_type , "guidance" : has_guidance }
906906
907907def dump_gguf_metadata (file_path ): #if you're gonna copy this into your own project at least credit concedo
908908 chunk_size = 1024 * 1024 * 12 # read first 12mb of file
@@ -3120,7 +3120,7 @@ def noscript_webui(self):
31203120 def do_GET (self ):
31213121 global embedded_kailite , embedded_kcpp_docs , embedded_kcpp_sdui
31223122 global last_req_time , start_time
3123- global savedata_obj , has_multiplayer , multiplayer_turn_major , multiplayer_turn_minor , multiplayer_story_data_compressed , multiplayer_dataformat , multiplayer_lastactive , maxctx , maxhordelen , friendlymodelname , lastuploadedcomfyimg , lastgeneratedcomfyimg , KcppVersion , totalgens , preloaded_story , exitcounter , currentusergenkey , friendlysdmodelname , fullsdmodelpath , mmprojpath , password , friendlyembeddingsmodelname
3123+ global savedata_obj , has_multiplayer , multiplayer_turn_major , multiplayer_turn_minor , multiplayer_story_data_compressed , multiplayer_dataformat , multiplayer_lastactive , maxctx , maxhordelen , friendlymodelname , lastuploadedcomfyimg , lastgeneratedcomfyimg , KcppVersion , totalgens , preloaded_story , exitcounter , currentusergenkey , friendlysdmodelname , fullsdmodelpath , password , friendlyembeddingsmodelname
31243124 self .path = self .path .rstrip ('/' )
31253125 response_body = None
31263126 content_type = 'application/json'
@@ -3370,7 +3370,7 @@ def do_GET(self):
33703370 return
33713371
33723372 def do_POST (self ):
3373- global modelbusy , requestsinqueue , currentusergenkey , totalgens , pendingabortkey , lastuploadedcomfyimg , lastgeneratedcomfyimg , multiplayer_turn_major , multiplayer_turn_minor , multiplayer_story_data_compressed , multiplayer_dataformat , multiplayer_lastactive , net_save_slots
3373+ global modelbusy , requestsinqueue , currentusergenkey , totalgens , pendingabortkey , lastuploadedcomfyimg , lastgeneratedcomfyimg , multiplayer_turn_major , multiplayer_turn_minor , multiplayer_story_data_compressed , multiplayer_dataformat , multiplayer_lastactive , net_save_slots , has_vision_support
33743374 contlenstr = self .headers ['content-length' ]
33753375 content_length = 0
33763376 body = None
@@ -3846,8 +3846,7 @@ def do_POST(self):
38463846 elif self .path .endswith ('/v1/chat/completions' ):
38473847 api_format = 4
38483848 elif self .path .endswith ('/sdapi/v1/interrogate' ):
3849- has_vision = (mmprojpath != "" )
3850- if not has_vision :
3849+ if not has_vision_support :
38513850 self .send_response (503 )
38523851 self .end_headers (content_type = 'application/json' )
38533852 self .wfile .write (json .dumps ({"detail" : {
@@ -6641,7 +6640,7 @@ def main(launch_args, default_args):
66416640
66426641def kcpp_main_process (launch_args , g_memory = None , gui_launcher = False ):
66436642 global embedded_kailite , embedded_kcpp_docs , embedded_kcpp_sdui , start_time , exitcounter , global_memory , using_gui_launcher
6644- global libname , args , friendlymodelname , friendlysdmodelname , fullsdmodelpath , mmprojpath , password , fullwhispermodelpath , ttsmodelpath , embeddingsmodelpath , friendlyembeddingsmodelname , has_audio_support
6643+ global libname , args , friendlymodelname , friendlysdmodelname , fullsdmodelpath , password , fullwhispermodelpath , ttsmodelpath , embeddingsmodelpath , friendlyembeddingsmodelname , has_audio_support , has_vision_support
66456644
66466645 start_server = True
66476646
@@ -6982,9 +6981,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
69826981 exitcounter = 999
69836982 exit_with_error (2 ,f"Cannot find mmproj file: { args .mmproj } " )
69846983 else :
6985- global mmprojpath
69866984 args .mmproj = os .path .abspath (args .mmproj )
6987- mmprojpath = args .mmproj
69886985
69896986 if not args .blasthreads or args .blasthreads <= 0 :
69906987 args .blasthreads = args .threads
@@ -6998,7 +6995,13 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
69986995 print ("WARNING: Selected Text Model does not seem to be a GGUF file! Are you sure you picked the right file?" )
69996996 loadok = load_model (modelname )
70006997 print ("Load Text Model OK: " + str (loadok ))
7001- has_audio_support = handle .has_audio_support () # multimodal audio support is only known at runtime
6998+ if args .mmproj and args .mmproj != "" : # multimodal vision and audio support is only known at runtime
6999+ has_audio_support = handle .has_audio_support ()
7000+ has_vision_support = handle .has_vision_support ()
7001+ else :
7002+ has_audio_support = False
7003+ has_vision_support = False
7004+
70027005 if not loadok :
70037006 exitcounter = 999
70047007 exit_with_error (3 ,"Could not load text model: " + modelname )
@@ -7193,6 +7196,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
71937196 enabledmlist .append ("ImageGeneration" ) if "txt2img" in caps and caps ["txt2img" ] else disabledmlist .append ("ImageGeneration" )
71947197 enabledmlist .append ("VoiceRecognition" ) if "transcribe" in caps and caps ["transcribe" ] else disabledmlist .append ("VoiceRecognition" )
71957198 enabledmlist .append ("MultimodalVision" ) if "vision" in caps and caps ["vision" ] else disabledmlist .append ("MultimodalVision" )
7199+ enabledmlist .append ("MultimodalAudio" ) if "audio" in caps and caps ["audio" ] else disabledmlist .append ("MultimodalAudio" )
71967200 enabledmlist .append ("NetworkMultiplayer" ) if "multiplayer" in caps and caps ["multiplayer" ] else disabledmlist .append ("NetworkMultiplayer" )
71977201 enabledmlist .append ("ApiKeyPassword" ) if "protected" in caps and caps ["protected" ] else disabledmlist .append ("ApiKeyPassword" )
71987202 enabledmlist .append ("WebSearchProxy" ) if "websearch" in caps and caps ["websearch" ] else disabledmlist .append ("WebSearchProxy" )
0 commit comments