Skip to content

Commit ffb499d

Browse files
authored
Necessary Changes for long context llama-3-8b (#516)
* all necessary changes * tests
1 parent 2e9341e commit ffb499d

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@
169169
"llama-2-70b-chat",
170170
"llama-3-8b",
171171
"llama-3-8b-instruct",
172+
"llama-3-8b-instruct-262k",
172173
"llama-3-70b",
173174
"llama-3-70b-instruct",
174175
"falcon-7b",
@@ -240,6 +241,7 @@
240241
# Can also see 13B, 34B there too
241242
"gemma": {"max_model_len": 8192, "max_num_batched_tokens": 8192},
242243
"llama-2": {"max_model_len": None, "max_num_batched_tokens": 4096},
244+
"llama-3-8b-instruct-262k": {"max_model_len": None, "max_num_batched_tokens": 262144},
243245
"llama-3": {"max_model_len": None, "max_num_batched_tokens": 8192},
244246
"mistral": {"max_model_len": 8000, "max_num_batched_tokens": 8000},
245247
"mixtral-8x7b": {"max_model_len": 32768, "max_num_batched_tokens": 32768},
@@ -2211,6 +2213,12 @@ def infer_hardware_from_model_name(model_name: str) -> CreateDockerImageBatchJob
22112213
memory = "800Gi"
22122214
storage = "460Gi"
22132215
gpu_type = GpuType.NVIDIA_AMPERE_A100E
2216+
elif "llama-3-8b-instruct-262k" in model_name:
2217+
cpus = "20"
2218+
gpus = 2
2219+
memory = "40Gi"
2220+
storage = "40Gi"
2221+
gpu_type = GpuType.NVIDIA_AMPERE_A100E
22142222
else:
22152223
numbers = re.findall(r"\d+", model_name)
22162224
if len(numbers) == 0:

model-engine/model_engine_server/infra/repositories/live_tokenizer_repository.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def get_default_supported_models_info() -> Dict[str, ModelInfo]:
4343
"llama-2-70b-chat": ModelInfo("meta-llama/Llama-2-70b-chat-hf", None),
4444
"llama-3-8b": ModelInfo("meta-llama/Meta-Llama-3-8B", None),
4545
"llama-3-8b-instruct": ModelInfo("meta-llama/Meta-Llama-3-8B-Instruct", None),
46+
"llama-3-8b-instruct-262k": ModelInfo("gradientai/Llama-3-8B-Instruct-262k", None),
4647
"llama-3-70b": ModelInfo("meta-llama/Meta-Llama-3-70B", None),
4748
"llama-3-70b-instruct": ModelInfo("meta-llama/Meta-Llama-3-70B-Instruct", None),
4849
"falcon-7b": ModelInfo("tiiuae/falcon-7b", None),

model-engine/tests/unit/domain/test_llm_use_cases.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,6 +1809,13 @@ def test_infer_hardware_from_model_name():
18091809
assert hardware.storage == "160Gi"
18101810
assert hardware.gpu_type == GpuType.NVIDIA_AMPERE_A100E
18111811

1812+
hardware = infer_hardware_from_model_name("llama-3-8b-instruct-262k")
1813+
assert hardware.cpus == "20"
1814+
assert hardware.gpus == 2
1815+
assert hardware.memory == "40Gi"
1816+
assert hardware.storage == "40Gi"
1817+
assert hardware.gpu_type == GpuType.NVIDIA_AMPERE_A100E
1818+
18121819
with pytest.raises(ObjectHasInvalidValueException):
18131820
infer_hardware_from_model_name("unsupported_model")
18141821

0 commit comments

Comments
 (0)