build: update dockerfiles.

liqiang-fit2cloud · liqiang-fit2cloud · commit db2eaf4681a8 · 2025-06-24T18:11:33.000+08:00
diff --git a/.github/workflows/build-and-push-vector-model.yml b/.github/workflows/build-and-push-vector-model.yml
@@ -65,4 +65,5 @@ jobs:
           password: ${{ secrets.GH_TOKEN }}
       - name: Docker Buildx (build-and-push)
         run: |
+          rm -f .dockerignore
           docker buildx build --output "type=image,push=true" ${{ steps.prepare.outputs.buildx_args }} -f installer/Dockerfile-vector-model
diff --git a/installer/install_model.py b/installer/install_model.py
@@ -0,0 +1,69 @@
+# coding=utf-8
+"""
+    @project: maxkb
+    @Author：虎
+    @file： install_model.py
+    @date：2023/12/18 14:02
+    @desc:
+"""
+import json
+import os.path
+from pycrawlers import huggingface
+from transformers import GPT2TokenizerFast
+hg = huggingface()
+prefix_dir = "./model"
+model_config = [
+    {
+        'download_params': {
+            'cache_dir': os.path.join(prefix_dir, 'base/hub'),
+            'pretrained_model_name_or_path': 'gpt2'
+        },
+        'download_function': GPT2TokenizerFast.from_pretrained
+    },
+    {
+        'download_params': {
+            'cache_dir': os.path.join(prefix_dir, 'base/hub'),
+            'pretrained_model_name_or_path': 'gpt2-medium'
+        },
+        'download_function': GPT2TokenizerFast.from_pretrained
+    },
+    {
+        'download_params': {
+            'cache_dir': os.path.join(prefix_dir, 'base/hub'),
+            'pretrained_model_name_or_path': 'gpt2-large'
+        },
+        'download_function': GPT2TokenizerFast.from_pretrained
+    },
+    {
+        'download_params': {
+            'cache_dir': os.path.join(prefix_dir, 'base/hub'),
+            'pretrained_model_name_or_path': 'gpt2-xl'
+        },
+        'download_function': GPT2TokenizerFast.from_pretrained
+    },
+    {
+        'download_params': {
+            'cache_dir': os.path.join(prefix_dir, 'base/hub'),
+            'pretrained_model_name_or_path': 'distilgpt2'
+        },
+        'download_function': GPT2TokenizerFast.from_pretrained
+    },
+    {
+        'download_params': {
+            'urls': ["https://huggingface.co/shibing624/text2vec-base-chinese/tree/main"],
+            'file_save_paths': [os.path.join(prefix_dir, 'embedding',"shibing624_text2vec-base-chinese")]
+        },
+        'download_function': hg.get_batch_data
+    }
+
+]
+
+
+def install():
+    for model in model_config:
+        print(json.dumps(model.get('download_params')))
+        model.get('download_function')(**model.get('download_params'))
+
+
+if __name__ == '__main__':
+    install()