Skip to content

Commit db2eaf4

Browse files
build: update dockerfiles.
1 parent 31c92fb commit db2eaf4

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

.github/workflows/build-and-push-vector-model.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,5 @@ jobs:
6565
password: ${{ secrets.GH_TOKEN }}
6666
- name: Docker Buildx (build-and-push)
6767
run: |
68+
rm -f .dockerignore
6869
docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile-vector-model

installer/install_model.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# coding=utf-8
2+
"""
3+
@project: maxkb
4+
@Author:虎
5+
@file: install_model.py
6+
@date:2023/12/18 14:02
7+
@desc:
8+
"""
9+
import json
10+
import os.path
11+
from pycrawlers import huggingface
12+
from transformers import GPT2TokenizerFast
13+
hg = huggingface()
14+
prefix_dir = "./model"
15+
model_config = [
16+
{
17+
'download_params': {
18+
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
19+
'pretrained_model_name_or_path': 'gpt2'
20+
},
21+
'download_function': GPT2TokenizerFast.from_pretrained
22+
},
23+
{
24+
'download_params': {
25+
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
26+
'pretrained_model_name_or_path': 'gpt2-medium'
27+
},
28+
'download_function': GPT2TokenizerFast.from_pretrained
29+
},
30+
{
31+
'download_params': {
32+
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
33+
'pretrained_model_name_or_path': 'gpt2-large'
34+
},
35+
'download_function': GPT2TokenizerFast.from_pretrained
36+
},
37+
{
38+
'download_params': {
39+
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
40+
'pretrained_model_name_or_path': 'gpt2-xl'
41+
},
42+
'download_function': GPT2TokenizerFast.from_pretrained
43+
},
44+
{
45+
'download_params': {
46+
'cache_dir': os.path.join(prefix_dir, 'base/hub'),
47+
'pretrained_model_name_or_path': 'distilgpt2'
48+
},
49+
'download_function': GPT2TokenizerFast.from_pretrained
50+
},
51+
{
52+
'download_params': {
53+
'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"],
54+
'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")]
55+
},
56+
'download_function': hg.get_batch_data
57+
}
58+
59+
]
60+
61+
62+
def install():
63+
for model in model_config:
64+
print(json.dumps(model.get('download_params')))
65+
model.get('download_function')(**model.get('download_params'))
66+
67+
68+
if __name__ == '__main__':
69+
install()

0 commit comments

Comments
 (0)