Skip to content

Commit da66e95

Browse files
added deployment json files
1 parent 14f45a0 commit da66e95

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"recipe_id": "offline_inference_sglang",
3+
"recipe_mode": "job",
4+
"deployment_name": "Offline Inference Benchmark",
5+
"recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
6+
"recipe_node_shape": "VM.GPU.A10.2",
7+
"input_object_storage": [
8+
{
9+
"par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
10+
"mount_location": "/models",
11+
"volume_size_in_gbs": 500,
12+
"include": [
13+
"new_example_sglang.yaml",
14+
"NousResearch/Meta-Llama-3.1-8B"
15+
]
16+
}
17+
],
18+
"output_object_storage": [
19+
{
20+
"bucket_name": "inference_output",
21+
"mount_location": "/mlcommons_output",
22+
"volume_size_in_gbs": 200
23+
}
24+
],
25+
"recipe_container_command_args": [
26+
"/models/new_example_sglang.yaml"
27+
],
28+
"recipe_replica_count": 1,
29+
"recipe_container_port": "8000",
30+
"recipe_nvidia_gpu_count": 2,
31+
"recipe_node_pool_size": 1,
32+
"recipe_node_boot_volume_size_in_gbs": 200,
33+
"recipe_ephemeral_storage_size": 100,
34+
"recipe_shared_memory_volume_size_limit_in_mb": 200
35+
}
36+
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"recipe_id": "offline_inference_vllm",
3+
"recipe_mode": "job",
4+
"deployment_name": "Offline Inference Benchmark vllm",
5+
"recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
6+
"recipe_node_shape": "VM.GPU.A10.2",
7+
"input_object_storage": [
8+
{
9+
"par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
10+
"mount_location": "/models",
11+
"volume_size_in_gbs": 500,
12+
"include": [
13+
"new_example_sglang.yaml",
14+
"NousResearch/Meta-Llama-3.1-8B"
15+
]
16+
}
17+
],
18+
"output_object_storage": [
19+
{
20+
"bucket_name": "inference_output",
21+
"mount_location": "/mlcommons_output",
22+
"volume_size_in_gbs": 200
23+
}
24+
],
25+
"recipe_container_command_args": [
26+
"/models/offline_vllm_example.yaml"
27+
],
28+
"recipe_replica_count": 1,
29+
"recipe_container_port": "8000",
30+
"recipe_nvidia_gpu_count": 2,
31+
"recipe_node_pool_size": 1,
32+
"recipe_node_boot_volume_size_in_gbs": 200,
33+
"recipe_ephemeral_storage_size": 100,
34+
"recipe_shared_memory_volume_size_limit_in_mb": 200
35+
}
36+

0 commit comments

Comments
 (0)