added deployment json files

ssraghavan-oci · ssraghavan-oci · commit da66e9502b5b · 2025-04-22T12:18:56.000-04:00
diff --git a/docs/sample_blueprints/offline-inference-infra/offline_deployment_sglang.json b/docs/sample_blueprints/offline-inference-infra/offline_deployment_sglang.json
@@ -0,0 +1,36 @@
+{
+    "recipe_id": "offline_inference_sglang",
+    "recipe_mode": "job",
+    "deployment_name": "Offline Inference Benchmark",
+    "recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
+    "recipe_node_shape": "VM.GPU.A10.2",
+    "input_object_storage": [
+      {
+        "par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
+        "mount_location": "/models",
+        "volume_size_in_gbs": 500,
+        "include": [
+          "new_example_sglang.yaml",
+          "NousResearch/Meta-Llama-3.1-8B"
+        ]
+      }
+    ],
+    "output_object_storage": [
+      {
+        "bucket_name": "inference_output",
+        "mount_location": "/mlcommons_output",
+        "volume_size_in_gbs": 200
+      }
+    ],
+    "recipe_container_command_args": [
+      "/models/new_example_sglang.yaml"
+    ],
+    "recipe_replica_count": 1,
+    "recipe_container_port": "8000",
+    "recipe_nvidia_gpu_count": 2,
+    "recipe_node_pool_size": 1,
+    "recipe_node_boot_volume_size_in_gbs": 200,
+    "recipe_ephemeral_storage_size": 100,
+    "recipe_shared_memory_volume_size_limit_in_mb": 200
+  }
+  
diff --git a/docs/sample_blueprints/offline-inference-infra/offline_deployment_vllm.json b/docs/sample_blueprints/offline-inference-infra/offline_deployment_vllm.json
@@ -0,0 +1,36 @@
+{
+    "recipe_id": "offline_inference_vllm",
+    "recipe_mode": "job",
+    "deployment_name": "Offline Inference Benchmark vllm",
+    "recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
+    "recipe_node_shape": "VM.GPU.A10.2",
+    "input_object_storage": [
+      {
+        "par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
+        "mount_location": "/models",
+        "volume_size_in_gbs": 500,
+        "include": [
+          "new_example_sglang.yaml",
+          "NousResearch/Meta-Llama-3.1-8B"
+        ]
+      }
+    ],
+    "output_object_storage": [
+      {
+        "bucket_name": "inference_output",
+        "mount_location": "/mlcommons_output",
+        "volume_size_in_gbs": 200
+      }
+    ],
+    "recipe_container_command_args": [
+      "/models/offline_vllm_example.yaml"
+    ],
+    "recipe_replica_count": 1,
+    "recipe_container_port": "8000",
+    "recipe_nvidia_gpu_count": 2,
+    "recipe_node_pool_size": 1,
+    "recipe_node_boot_volume_size_in_gbs": 200,
+    "recipe_ephemeral_storage_size": 100,
+    "recipe_shared_memory_volume_size_limit_in_mb": 200
+  }
+