11import requests
2- import os
3-
4- def model_on_modelscope (proj : str , fn : str , user : str = 'judd2024' ) -> dict :
5- url = f"https://modelscope.cn/api/v1/models/{ user } /{ proj } /repo?Revision=master&FilePath={ fn } "
6- return { 'fn' : fn , 'url' : url }
7-
8- all_models = {
9- 'qwen2' : {
10- 'default' : '1.5b' ,
11- 'brief' : 'Qwen2 is a new series of large language models from Alibaba group.' ,
12- 'variants' : {
13- '7b' : {
14- 'default' : 'q8' ,
15- 'quantized' : {
16- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-7b.bin' )
17- }
18- },
19- '1.5b' : {
20- 'default' : 'q8' ,
21- 'quantized' : {
22- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-1.5b.bin' )
23- }
24- },
25- '0.5b' : {
26- 'default' : 'q8' ,
27- 'quantized' : {
28- 'q8' : model_on_modelscope ('chatllm_quantized_qwen2' , 'qwen2-0.5b.bin' )
29- }
30- },
31- }
32- },
33- 'gemma' : {
34- 'default' : '2b' ,
35- 'brief' : 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.' ,
36- 'variants' : {
37- '2b' : {
38- 'default' : 'q8' ,
39- 'quantized' : {
40- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'gemma-1.1-2b.bin' )
41- }
42- },
43- }
44- },
45- 'llama3' : {
46- 'default' : '8b' ,
47- 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
48- 'variants' : {
49- '8b' : {
50- 'default' : 'q4_1' ,
51- 'quantized' : {
52- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'llama3-8b-q4_1.bin' )
53- }
54- },
55- }
56- },
57- 'minicpm' : {
58- 'default' : '2b-sft' ,
59- 'brief' : 'Meta Llama 3: The most capable openly available LLM to date.' ,
60- 'variants' : {
61- '2b-sft' : {
62- 'default' : 'q8' ,
63- 'quantized' : {
64- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm_sft_q8.bin' )
65- }
66- },
67- '2b-dpo' : {
68- 'default' : 'q4_1' ,
69- 'quantized' : {
70- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'minicpm-dpo-q4_1.bin' )
71- }
72- },
73- }
74- },
75- 'qwen1.5' : {
76- 'default' : 'moe' ,
77- 'brief' : 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.' ,
78- 'variants' : {
79- '1.8b' : {
80- 'default' : 'q8' ,
81- 'quantized' : {
82- 'q8' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-1.8b.bin' )
83- }
84- },
85- 'moe' : {
86- 'default' : 'q4_1' ,
87- 'quantized' : {
88- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen1.5-moe-q4_1.bin' )
89- }
90- },
91- }
92- },
93- 'qanything' : {
94- 'default' : '7b' ,
95- 'brief' : 'QAnything is a local knowledge base question-answering system based on QwenLM.' ,
96- 'variants' : {
97- '7b' : {
98- 'default' : 'q4_1' ,
99- 'quantized' : {
100- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'qwen-qany-7b-q4_1.bin' )
101- }
102- },
103- }
104- },
105- 'starling-lm' : {
106- 'default' : '7b' ,
107- 'brief' : 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.' ,
108- 'variants' : {
109- '7b' : {
110- 'default' : 'q4_1' ,
111- 'quantized' : {
112- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'starling-7b-q4_1.bin' )
113- }
114- },
115- }
116- },
117- 'yi-1' : {
118- 'default' : '34b' ,
119- 'brief' : 'Yi (v1) is a high-performing, bilingual language model.' ,
120- 'variants' : {
121- '34b' : {
122- 'default' : 'q4_1' ,
123- 'quantized' : {
124- 'q4_1' : model_on_modelscope ('chatllm_quantized_models' , 'yi-34b-q4.bin' )
125- }
126- },
127- }
128- },
129- }
2+ import os , json
3+
4+ import binding
5+
6+ def get_model_url_on_modelscope (proj : str , fn : str , user : str = 'judd2024' ) -> str :
7+ return f"https://modelscope.cn/api/v1/models/{ user } /{ proj } /repo?Revision=master&FilePath={ fn } "
8+
9+ with open (os .path .join (binding .PATH_SCRIPTS , 'models.json' )) as f :
10+ all_models = json .load (f )
13011
13112def print_progress_bar (iteration , total , prefix = '' , suffix = '' , decimals = 1 , length = 60 , fill = '█' , printEnd = "\r " , auto_nl = True ):
13213 percent = ("{0:." + str (decimals ) + "f}" ).format (100 * (iteration / float (total )))
@@ -166,6 +47,7 @@ def show_variants(info, default):
16647 def show_model (m ):
16748 info = all_models [m ]
16849 print (f"**{ m } **: { info ['brief' ]} " )
50+ print (f"License : { info ['license' ]} " )
16951 show_variants (info ['variants' ], info ['default' ])
17052 print ()
17153
@@ -176,8 +58,12 @@ def parse_model_id(model_id: str):
17658 parts = model_id .split (':' )
17759 model = all_models [parts [0 ]]
17860 variants = model ['variants' ]
179- var = variants [parts [1 ]] if len (parts ) >= 2 else variants ['default' ]
180- return var ['quantized' ][var ['default' ]]
61+ var = variants [parts [1 ] if len (parts ) >= 2 else model ['default' ]]
62+ r = var ['quantized' ][var ['default' ]]
63+ url = r ['url' ].split ('/' )
64+ r ['url' ] = get_model_url_on_modelscope (* url )
65+ r ['fn' ] = url [1 ]
66+ return r
18167
18268def get_model (model_id , storage_dir ):
18369 if not os .path .isdir (storage_dir ):
@@ -187,9 +73,13 @@ def get_model(model_id, storage_dir):
18773 info = parse_model_id (model_id )
18874 fn = os .path .join (storage_dir , info ['fn' ])
18975 if os .path .isfile (fn ):
190- return fn
76+ if os .path .getsize (fn ) == info ['size' ]:
77+ return fn
78+ else :
79+ print (f"{ fn } is incomplete, download again" )
19180
19281 assert download_file (info ['url' ], fn , model_id ), f"failed to download { model_id } "
82+ assert os .path .getsize (fn ) == info ['size' ], f"downloaded file size mismatch!"
19383
19484 return fn
19585
@@ -200,13 +90,13 @@ def find_index(l: list, x) -> int:
20090 return - 1
20191
20292def preprocess_args (args : list [str ], storage_dir ) -> list [str ]:
203- i = find_index ( args , '-m' )
204- if i < 0 :
205- i = find_index (args , '--model' )
206- if i < 0 :
207- return args
208- if args [i + 1 ].startswith (':' ):
209- args [i + 1 ] = get_model (args [i + 1 ][1 :], storage_dir )
93+ candidates = [ '-m' , '--model' , '--embedding_model' , '--reranker_model' ]
94+ for param in candidates :
95+ i = find_index (args , param )
96+ if i < 0 : continue
97+
98+ if args [i + 1 ].startswith (':' ):
99+ args [i + 1 ] = get_model (args [i + 1 ][1 :], storage_dir )
210100
211101 return args
212102
0 commit comments