Skip to content

Commit a995869

Browse files
Properly add mixtral 8x22b (#493)
* Properly add mixtral 8x22b * unit test
1 parent d6088b4 commit a995869

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

docs/model_zoo.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Scale hosts the following models in the LLM Engine Model Zoo:
2222
| `mistral-7b-instruct` ||| vllm | 8000 |
2323
| `mixtral-8x7b` || | vllm | 32768 |
2424
| `mixtral-8x7b-instruct` || | vllm | 32768 |
25+
| `mixtral-8x22b` || | vllm | 65536 |
2526
| `codellama-7b` ||| text-generation-inference, vllm | 16384 |
2627
| `codellama-7b-instruct` ||| text-generation-inference, vllm | 16384 |
2728
| `codellama-13b` ||| text-generation-inference, vllm | 16384 |

model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@
180180
"mistral-7b-instruct",
181181
"mixtral-8x7b",
182182
"mixtral-8x7b-instruct",
183+
"mixtral-8x22b",
183184
"mammoth-coder-llama-2-7b",
184185
"mammoth-coder-llama-2-13b",
185186
"mammoth-coder-llama-2-34b",
@@ -230,7 +231,8 @@
230231
"gemma": {"max_model_len": 8192, "max_num_batched_tokens": 8192},
231232
"llama-2": {"max_model_len": None, "max_num_batched_tokens": 4096},
232233
"mistral": {"max_model_len": 8000, "max_num_batched_tokens": 8000},
233-
"mixtral": {"max_model_len": 32768, "max_num_batched_tokens": 32768},
234+
"mixtral-8x7b": {"max_model_len": 32768, "max_num_batched_tokens": 32768},
235+
"mixtral-8x22b": {"max_model_len": 65536, "max_num_batched_tokens": 65536},
234236
"zephyr": {"max_model_len": 32768, "max_num_batched_tokens": 32768},
235237
}
236238

@@ -2200,6 +2202,12 @@ def infer_hardware_from_model_name(model_name: str) -> CreateDockerImageBatchJob
22002202
memory = "160Gi"
22012203
storage = "160Gi"
22022204
gpu_type = GpuType.NVIDIA_AMPERE_A100E
2205+
elif "mixtral-8x22b" in model_name:
2206+
cpus = "80"
2207+
gpus = 8
2208+
memory = "800Gi"
2209+
storage = "460Gi"
2210+
gpu_type = GpuType.NVIDIA_AMPERE_A100E
22032211
else:
22042212
numbers = re.findall(r"\d+", model_name)
22052213
if len(numbers) == 0:

model-engine/model_engine_server/infra/repositories/live_tokenizer_repository.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def get_default_supported_models_info() -> Dict[str, ModelInfo]:
6262
"mistral-7b-instruct": ModelInfo("mistralai/Mistral-7B-Instruct-v0.1", None),
6363
"mixtral-8x7b": ModelInfo("mistralai/Mixtral-8x7B-v0.1", None),
6464
"mixtral-8x7b-instruct": ModelInfo("mistralai/Mixtral-8x7B-Instruct-v0.1", None),
65+
"mixtral-8x22b": ModelInfo("mistral-community/Mixtral-8x22B-v0.1", None),
6566
"mammoth-coder-llama-2-7b": ModelInfo("TIGER-Lab/MAmmoTH-Coder-7B", None),
6667
"mammoth-coder-llama-2-13b": ModelInfo("TIGER-Lab/MAmmoTH-Coder-13B", None),
6768
"mammoth-coder-llama-2-34b": ModelInfo("TIGER-Lab/MAmmoTH-Coder-34B", None),

model-engine/tests/unit/domain/test_llm_use_cases.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,13 @@ def test_infer_hardware_from_model_name():
17051705
assert hardware.storage == "160Gi"
17061706
assert hardware.gpu_type == GpuType.NVIDIA_AMPERE_A100E
17071707

1708+
hardware = infer_hardware_from_model_name("mixtral-8x22b")
1709+
assert hardware.cpus == "80"
1710+
assert hardware.gpus == 8
1711+
assert hardware.memory == "800Gi"
1712+
assert hardware.storage == "460Gi"
1713+
assert hardware.gpu_type == GpuType.NVIDIA_AMPERE_A100E
1714+
17081715
hardware = infer_hardware_from_model_name("llama-2-7b")
17091716
assert hardware.cpus == "10"
17101717
assert hardware.gpus == 1

0 commit comments

Comments
 (0)