Skip to content

Commit 55d538b

Browse files
Add Llama 3 models (#501)
* Add Llama 3 models * fix
1 parent 4304856 commit 55d538b

File tree

3 files changed

+13
-0
lines changed

3 files changed

+13
-0
lines changed

docs/model_zoo.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ Scale hosts the following models in the LLM Engine Model Zoo:
1111
| `llama-2-13b-chat` || | text-generation-inference, vllm | 4096 |
1212
| `llama-2-70b` ||| text-generation-inference, vllm | 4096 |
1313
| `llama-2-70b-chat` || | text-generation-inference, vllm | 4096 |
14+
| `llama-3-8b` || | vllm | 8192 |
15+
| `llama-3-8b-instruct` || | vllm | 8192 |
16+
| `llama-3-70b` || | vllm | 8192 |
17+
| `llama-3-70b-instruct` || | vllm | 8192 |
1418
| `falcon-7b` || | text-generation-inference, vllm | 2048 |
1519
| `falcon-7b-instruct` || | text-generation-inference, vllm | 2048 |
1620
| `falcon-40b` || | text-generation-inference, vllm | 2048 |

model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@
162162
"llama-2-13b-chat",
163163
"llama-2-70b",
164164
"llama-2-70b-chat",
165+
"llama-3-8b",
166+
"llama-3-8b-instruct",
167+
"llama-3-70b",
168+
"llama-3-70b-instruct",
165169
"falcon-7b",
166170
"falcon-7b-instruct",
167171
"falcon-40b",
@@ -231,6 +235,7 @@
231235
# Can also see 13B, 34B there too
232236
"gemma": {"max_model_len": 8192, "max_num_batched_tokens": 8192},
233237
"llama-2": {"max_model_len": None, "max_num_batched_tokens": 4096},
238+
"llama-3": {"max_model_len": None, "max_num_batched_tokens": 8192},
234239
"mistral": {"max_model_len": 8000, "max_num_batched_tokens": 8000},
235240
"mixtral-8x7b": {"max_model_len": 32768, "max_num_batched_tokens": 32768},
236241
"mixtral-8x22b": {"max_model_len": 65536, "max_num_batched_tokens": 65536},

model-engine/model_engine_server/infra/repositories/live_tokenizer_repository.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def get_default_supported_models_info() -> Dict[str, ModelInfo]:
4040
"llama-2-13b-chat": ModelInfo("meta-llama/Llama-2-13b-chat-hf", None),
4141
"llama-2-70b": ModelInfo("meta-llama/Llama-2-70b-hf", None),
4242
"llama-2-70b-chat": ModelInfo("meta-llama/Llama-2-70b-chat-hf", None),
43+
"llama-3-8b": ModelInfo("meta-llama/Meta-Llama-3-8B", None),
44+
"llama-3-8b-instruct": ModelInfo("meta-llama/Meta-Llama-3-8B-Instruct", None),
45+
"llama-3-70b": ModelInfo("meta-llama/Meta-Llama-3-70B", None),
46+
"llama-3-70b-instruct": ModelInfo("meta-llama/Meta-Llama-3-70B-Instruct", None),
4347
"falcon-7b": ModelInfo("tiiuae/falcon-7b", None),
4448
"falcon-7b-instruct": ModelInfo("tiiuae/falcon-7b-instruct", None),
4549
"falcon-40b": ModelInfo("tiiuae/falcon-40b", None),

0 commit comments

Comments
 (0)