@@ -80,6 +80,7 @@ def decode(self, *args, **kwargs):
8080yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
8181deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
8282grok_model_endpoint = "https://api.x.ai/v1/chat/completions"
83+ volcengine_endpoint = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
8384
8485if not AZURE_ENDPOINT .endswith ('/' ): AZURE_ENDPOINT += '/'
8586azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{ AZURE_ENGINE } /chat/completions?api-version=2023-05-15'
@@ -102,6 +103,7 @@ def decode(self, *args, **kwargs):
102103if yimodel_endpoint in API_URL_REDIRECT : yimodel_endpoint = API_URL_REDIRECT [yimodel_endpoint ]
103104if deepseekapi_endpoint in API_URL_REDIRECT : deepseekapi_endpoint = API_URL_REDIRECT [deepseekapi_endpoint ]
104105if grok_model_endpoint in API_URL_REDIRECT : grok_model_endpoint = API_URL_REDIRECT [grok_model_endpoint ]
106+ if volcengine_endpoint in API_URL_REDIRECT : volcengine_endpoint = API_URL_REDIRECT [volcengine_endpoint ]
105107
106108# 获取tokenizer
107109tokenizer_gpt35 = LazyloadTiktoken ("gpt-3.5-turbo" )
@@ -954,7 +956,7 @@ def decode(self, *args, **kwargs):
954956 try :
955957 grok_beta_128k_noui , grok_beta_128k_ui = get_predict_function (
956958 api_key_conf_name = "GROK_API_KEY" , max_output_token = 8192 , disable_proxy = False
957- )
959+ )
958960
959961 model_info .update ({
960962 "grok-beta" : {
@@ -1089,8 +1091,10 @@ def decode(self, *args, **kwargs):
10891091 })
10901092 except :
10911093 logger .error (trimmed_format_exc ())
1094+
10921095# -=-=-=-=-=-=- 幻方-深度求索大模型在线API -=-=-=-=-=-=-
1093- if "deepseek-chat" in AVAIL_LLM_MODELS or "deepseek-coder" in AVAIL_LLM_MODELS or "deepseek-reasoner" in AVAIL_LLM_MODELS :
1096+ claude_models = ["deepseek-chat" , "deepseek-coder" , "deepseek-reasoner" ]
1097+ if any (item in claude_models for item in AVAIL_LLM_MODELS ):
10941098 try :
10951099 deepseekapi_noui , deepseekapi_ui = get_predict_function (
10961100 api_key_conf_name = "DEEPSEEK_API_KEY" , max_output_token = 4096 , disable_proxy = False
@@ -1127,6 +1131,60 @@ def decode(self, *args, **kwargs):
11271131 })
11281132 except :
11291133 logger .error (trimmed_format_exc ())
1134+
1135+ # -=-=-=-=-=-=- 火山引擎 对齐支持 -=-=-=-=-=-=-
1136+ for model in [m for m in AVAIL_LLM_MODELS if m .startswith ("volcengine-" )]:
1137+ # 为了更灵活地接入volcengine多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["volcengine-deepseek-r1-250120(max_token=6666)"]
1138+ # 其中
1139+ # "volcengine-" 是前缀(必要)
1140+ # "deepseek-r1-250120" 是模型名(必要)
1141+ # "(max_token=6666)" 是配置(非必要)
1142+ model_info_extend = model_info
1143+ model_info_extend .update ({
1144+ "deepseek-r1-250120" : {
1145+ "max_token" : 16384 ,
1146+ "enable_reasoning" : True ,
1147+ "can_multi_thread" : True ,
1148+ "endpoint" : volcengine_endpoint ,
1149+ "tokenizer" : tokenizer_gpt35 ,
1150+ "token_cnt" : get_token_num_gpt35 ,
1151+ },
1152+ "deepseek-v3-241226" : {
1153+ "max_token" : 16384 ,
1154+ "enable_reasoning" : False ,
1155+ "can_multi_thread" : True ,
1156+ "endpoint" : volcengine_endpoint ,
1157+ "tokenizer" : tokenizer_gpt35 ,
1158+ "token_cnt" : get_token_num_gpt35 ,
1159+ },
1160+ })
1161+ try :
1162+ origin_model_name , max_token_tmp = read_one_api_model_name (model )
1163+ # 如果是已知模型,则尝试获取其信息
1164+ original_model_info = model_info_extend .get (origin_model_name .replace ("volcengine-" , "" , 1 ), None )
1165+ except :
1166+ logger .error (f"volcengine模型 { model } 的 max_token 配置不是整数,请检查配置文件。" )
1167+ continue
1168+
1169+ volcengine_noui , volcengine_ui = get_predict_function (api_key_conf_name = "ARK_API_KEY" , max_output_token = 8192 , disable_proxy = True , model_remove_prefix = ["volcengine-" ])
1170+
1171+ this_model_info = {
1172+ "fn_with_ui" : volcengine_ui ,
1173+ "fn_without_ui" : volcengine_noui ,
1174+ "endpoint" : volcengine_endpoint ,
1175+ "can_multi_thread" : True ,
1176+ "max_token" : 64000 ,
1177+ "tokenizer" : tokenizer_gpt35 ,
1178+ "token_cnt" : get_token_num_gpt35 ,
1179+ }
1180+
1181+ # 同步已知模型的其他信息
1182+ attribute = "has_multimodal_capacity"
1183+ if original_model_info is not None and original_model_info .get (attribute , None ) is not None : this_model_info .update ({attribute : original_model_info .get (attribute , None )})
1184+ attribute = "enable_reasoning"
1185+ if original_model_info is not None and original_model_info .get (attribute , None ) is not None : this_model_info .update ({attribute : original_model_info .get (attribute , None )})
1186+ model_info .update ({model : this_model_info })
1187+
11301188# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
11311189for model in [m for m in AVAIL_LLM_MODELS if m .startswith ("one-api-" )]:
11321190 # 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
0 commit comments