deepseek coder v2 lite without any quantization

mitya52 · mitya52 · commit 4e3b931b8f89 · 2024-07-05T11:34:55.000+02:00
diff --git a/refact_known_models/huggingface.py b/refact_known_models/huggingface.py
@@ -233,4 +233,14 @@
         "T": 8192,
         "filter_caps": ["chat"],
     },
+    "deepseek-coder-v2/16b/instruct": {
+        "backend": "transformers",
+        "model_path": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
+        "model_class_kwargs": {
+            "torch_dtype": "bf16",
+        },
+        "required_memory_mb": 80000,
+        "T": 16384,  # in fact this model can handle 128K context
+        "filter_caps": ["completion", "chat"],
+    },
 }