1+ # coding=utf-8
2+ """
3+ @project: maxkb
4+ @Author:虎
5+ @file: install_model.py
6+ @date:2023/12/18 14:02
7+ @desc:
8+ """
9+ import json
10+ import os .path
11+ from pycrawlers import huggingface
12+ from transformers import GPT2TokenizerFast
13+ hg = huggingface ()
14+ prefix_dir = "./model"
15+ model_config = [
16+ {
17+ 'download_params' : {
18+ 'cache_dir' : os .path .join (prefix_dir , 'base/hub' ),
19+ 'pretrained_model_name_or_path' : 'gpt2'
20+ },
21+ 'download_function' : GPT2TokenizerFast .from_pretrained
22+ },
23+ {
24+ 'download_params' : {
25+ 'cache_dir' : os .path .join (prefix_dir , 'base/hub' ),
26+ 'pretrained_model_name_or_path' : 'gpt2-medium'
27+ },
28+ 'download_function' : GPT2TokenizerFast .from_pretrained
29+ },
30+ {
31+ 'download_params' : {
32+ 'cache_dir' : os .path .join (prefix_dir , 'base/hub' ),
33+ 'pretrained_model_name_or_path' : 'gpt2-large'
34+ },
35+ 'download_function' : GPT2TokenizerFast .from_pretrained
36+ },
37+ {
38+ 'download_params' : {
39+ 'cache_dir' : os .path .join (prefix_dir , 'base/hub' ),
40+ 'pretrained_model_name_or_path' : 'gpt2-xl'
41+ },
42+ 'download_function' : GPT2TokenizerFast .from_pretrained
43+ },
44+ {
45+ 'download_params' : {
46+ 'cache_dir' : os .path .join (prefix_dir , 'base/hub' ),
47+ 'pretrained_model_name_or_path' : 'distilgpt2'
48+ },
49+ 'download_function' : GPT2TokenizerFast .from_pretrained
50+ },
51+ {
52+ 'download_params' : {
53+ 'urls' : ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main" ],
54+ 'file_save_paths' : [os .path .join (prefix_dir , 'embedding' ,"shibing624_text2vec-base-chinese" )]
55+ },
56+ 'download_function' : hg .get_batch_data
57+ }
58+
59+ ]
60+
61+
62+ def install ():
63+ for model in model_config :
64+ print (json .dumps (model .get ('download_params' )))
65+ model .get ('download_function' )(** model .get ('download_params' ))
66+
67+
68+ if __name__ == '__main__' :
69+ install ()
0 commit comments