[Model] Deliver Qwen2.5-Coder 0.5B and 3B (#627)

CharlieFRuan · web-flow · commit 0c05963f4259 · 2024-11-12T21:15:56.000-08:00
This PR delivers Qwen2.5-Coder 0.5B and 3B, as 1.5B and 7B are already
supported. We add the following models to the model list:
- `Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC`
- `Qwen2.5-Coder-3B-Instruct-q4f32_1-MLC`
- `Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC`
- `Qwen2.5-Coder-0.5B-Instruct-q4f32_1-MLC`
- `Qwen2.5-Coder-0.5B-Instruct-q0f16-MLC`
- `Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC`

Note that the official `config.json` of the coder models are identical
to that of the chat models, so we do not need to compile new WASMs.
diff --git a/src/config.ts b/src/config.ts
@@ -1069,6 +1069,62 @@ export const prebuiltAppConfig: AppConfig = {
       },
     },
     // Qwen2.5-Coder
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC",
+      model_id: "Qwen2.5-Coder-0.5B-Instruct-q4f16_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 944.62,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q4f32_1-MLC",
+      model_id: "Qwen2.5-Coder-0.5B-Instruct-q4f32_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2-0.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 1060.2,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q0f16-MLC",
+      model_id: "Qwen2.5-Coder-0.5B-Instruct-q0f16-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2-0.5B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 1624.12,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
+      model_id: "Qwen2.5-Coder-0.5B-Instruct-q0f32-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2-0.5B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 2654.75,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
     {
       model:
         "https://huggingface.co/mlc-ai/Qwen2.5-Coder-1.5B-Instruct-q4f16_1-MLC",
@@ -1097,6 +1153,34 @@ export const prebuiltAppConfig: AppConfig = {
         context_window_size: 4096,
       },
     },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
+      model_id: "Qwen2.5-Coder-3B-Instruct-q4f16_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2.5-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 2504.76,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
+    {
+      model:
+        "https://huggingface.co/mlc-ai/Qwen2.5-Coder-3B-Instruct-q4f32_1-MLC",
+      model_id: "Qwen2.5-Coder-3B-Instruct-q4f32_1-MLC",
+      model_lib:
+        modelLibURLPrefix +
+        modelVersion +
+        "/Qwen2.5-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+      low_resource_required: true,
+      vram_required_MB: 2893.64,
+      overrides: {
+        context_window_size: 4096,
+      },
+    },
     {
       model:
         "https://huggingface.co/mlc-ai/Qwen2.5-Coder-7B-Instruct-q4f16_1-MLC",