foldl
diff --git a/‎docs/tool_calling.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/tool_calling.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/binding.py‎
Lines changed: 3 additions & 1 deletion b/‎scripts/binding.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎scripts/model_downloader.py‎
Lines changed: 28 additions & 138 deletions b/‎scripts/model_downloader.py‎
Lines changed: 28 additions & 138 deletions
@@ -7,7 +7,7 @@ Demos of tool calling for these models are provided:
 * [GLM-4](../scripts/tool_glm4.py)
 * [Mistral-Instruct-7B-v0.3](../scripts/tool_mistral.py)
 * [QWen v1.5 & v2](../scripts/tool_qwen.py)
-* [DeepSeek Coder v2](../scripts/tool_deepseekcoder.py) (Note: function calling is *officially* unsupported.)
+* [DeepSeek-Coder v2](../scripts/tool_deepseekcoder.py) (Note: function calling is *officially* unsupported.)
 
 ## Precondition
 
@@ -134,7 +134,7 @@ You  > which city is hotter?
 A.I. > Jinan is hotter than Beijing. Jinan's temperature is 36°C and Beijing's temperature is 33°C.
 ```
 
-### DeepSeek Coder v2
+### DeepSeek-Coder v2
 
 ```
 python tool_deepseekcoder.py -i -m /path/to/deepseekcoder-v2-lite-model.bin
 
@@ -1,5 +1,7 @@
 import sys, os
 
 this_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
-PATH_BINDS = os.path.join(this_dir, '..', 'bindings')
+PATH_APP = os.path.abspath(os.path.join(this_dir, '..'))
+PATH_BINDS = os.path.join(PATH_APP, 'bindings')
+PATH_SCRIPTS = os.path.join(PATH_APP, 'scripts')
 sys.path.append(PATH_BINDS)
@@ -1,132 +1,13 @@
 import requests
-import os
-
-def model_on_modelscope(proj: str, fn: str, user: str = 'judd2024') -> dict:
-    url = f"https://modelscope.cn/api/v1/models/{user}/{proj}/repo?Revision=master&FilePath={fn}"
-    return { 'fn': fn, 'url': url }
-
-all_models = {
-    'qwen2': {
-        'default': '1.5b',
-        'brief': 'Qwen2 is a new series of large language models from Alibaba group.',
-        'variants': {
-            '7b': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-7b.bin')
-                }
-            },
-            '1.5b': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-1.5b.bin')
-                }
-            },
-            '0.5b': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_qwen2', 'qwen2-0.5b.bin')
-                }
-            },
-        }
-    },
-    'gemma': {
-        'default': '2b',
-        'brief': 'Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1.',
-        'variants': {
-            '2b': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_models', 'gemma-1.1-2b.bin')
-                }
-            },
-        }
-    },
-    'llama3': {
-        'default': '8b',
-        'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
-        'variants': {
-            '8b': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'llama3-8b-q4_1.bin')
-                }
-            },
-        }
-    },
-    'minicpm': {
-        'default': '2b-sft',
-        'brief': 'Meta Llama 3: The most capable openly available LLM to date.',
-        'variants': {
-            '2b-sft': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_models', 'minicpm_sft_q8.bin')
-                }
-            },
-            '2b-dpo': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'minicpm-dpo-q4_1.bin')
-                }
-            },
-        }
-    },
-    'qwen1.5': {
-        'default': 'moe',
-        'brief': 'Qwen1.5 is the beta version of Qwen2 from Alibaba group.',
-        'variants': {
-            '1.8b': {
-                'default': 'q8',
-                'quantized': {
-                    'q8': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-1.8b.bin')
-                }
-            },
-            'moe': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen1.5-moe-q4_1.bin')
-                }
-            },
-        }
-    },
-    'qanything': {
-        'default': '7b',
-        'brief': 'QAnything is a local knowledge base question-answering system based on QwenLM.',
-        'variants': {
-            '7b': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'qwen-qany-7b-q4_1.bin')
-                }
-            },
-        }
-    },
-    'starling-lm': {
-        'default': '7b',
-        'brief': 'Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.',
-        'variants': {
-            '7b': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'starling-7b-q4_1.bin')
-                }
-            },
-        }
-    },
-    'yi-1': {
-        'default': '34b',
-        'brief': 'Yi (v1) is a high-performing, bilingual language model.',
-        'variants': {
-            '34b': {
-                'default': 'q4_1',
-                'quantized': {
-                    'q4_1': model_on_modelscope('chatllm_quantized_models', 'yi-34b-q4.bin')
-                }
-            },
-        }
-    },
-}
+import os, json
+
+import binding
+
+def get_model_url_on_modelscope(proj: str, fn: str, user: str = 'judd2024') -> str:
+    return f"https://modelscope.cn/api/v1/models/{user}/{proj}/repo?Revision=master&FilePath={fn}"
+
+with open(os.path.join(binding.PATH_SCRIPTS, 'models.json')) as f:
+    all_models = json.load(f)
 
 def print_progress_bar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 60, fill = '█', printEnd = "\r", auto_nl = True):
     percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
@@ -166,6 +47,7 @@ def show_variants(info, default):
     def show_model(m):
         info = all_models[m]
         print(f"**{m}**: {info['brief']}")
+        print(f"License  : {info['license']}")
         show_variants(info['variants'], info['default'])
         print()
 
@@ -176,8 +58,12 @@ def parse_model_id(model_id: str):
     parts = model_id.split(':')
     model = all_models[parts[0]]
     variants = model['variants']
-    var = variants[parts[1]] if len(parts) >= 2 else variants['default']
-    return var['quantized'][var['default']]
+    var = variants[parts[1] if len(parts) >= 2 else model['default']]
+    r = var['quantized'][var['default']]
+    url = r['url'].split('/')
+    r['url'] = get_model_url_on_modelscope(*url)
+    r['fn'] = url[1]
+    return r
 
 def get_model(model_id, storage_dir):
     if not os.path.isdir(storage_dir):
@@ -187,9 +73,13 @@ def get_model(model_id, storage_dir):
     info = parse_model_id(model_id)
     fn = os.path.join(storage_dir, info['fn'])
     if os.path.isfile(fn):
-        return fn
+        if os.path.getsize(fn) == info['size']:
+            return fn
+        else:
+            print(f"{fn} is incomplete, download again")
 
     assert download_file(info['url'], fn, model_id), f"failed to download {model_id}"
+    assert os.path.getsize(fn) == info['size'], f"downloaded file size mismatch!"
 
     return fn
 
@@ -200,13 +90,13 @@ def find_index(l: list, x) -> int:
         return -1
 
 def preprocess_args(args: list[str], storage_dir) -> list[str]:
-    i = find_index(args, '-m')
-    if i < 0:
-        i = find_index(args, '--model')
-    if i < 0:
-        return args
-    if args[i + 1].startswith(':'):
-        args[i + 1] = get_model(args[i + 1][1:], storage_dir)
+    candidates = ['-m', '--model', '--embedding_model', '--reranker_model']
+    for param in candidates:
+        i = find_index(args, param)
+        if i < 0: continue
+
+        if args[i + 1].startswith(':'):
+            args[i + 1] = get_model(args[i + 1][1:], storage_dir)
 
     return args