added sample json files

ssraghavan-oci · ssraghavan-oci · commit 14f45a01f016 · 2025-04-22T12:14:21.000-04:00
diff --git a/docs/sample_blueprints/offline-inference-infra/README.md b/docs/sample_blueprints/offline-inference-infra/README.md
@@ -41,6 +41,7 @@ Offline inference is ideal for:
 
 This blueprint supports benchmark execution via a job-mode recipe using a YAML config file. The recipe mounts a model and config file from Object Storage, runs offline inference, and logs metrics.
 
+Notes : Make sure your output object storage is in the same tenancy as your stack. 
 ---
 
 ### Sample Recipe (Job Mode for Offline SGLang Inference)
diff --git a/docs/sample_blueprints/offline-inference-infra/new_example_sglang.yaml b/docs/sample_blueprints/offline-inference-infra/new_example_sglang.yaml
@@ -0,0 +1,24 @@
+benchmark_type: offline
+offline_backend: sglang
+
+model_path: /models/NousResearch/Meta-Llama-3.1-8B
+tokenizer_path: /models/NousResearch/Meta-Llama-3.1-8B
+trust_remote_code: true
+conv_template: llama-2
+
+input_len: 128
+output_len: 128
+num_prompts: 64
+max_seq_len: 4096
+max_batch_size: 8
+dtype: auto
+temperature: 0.7
+top_p: 0.9
+
+mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
+experiment_name: "sglang-bench-doc-test-new"
+run_name: "llama3-8b-sglang-test"
+
+
+save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json
+
diff --git a/docs/sample_blueprints/offline-inference-infra/offline_vllm_example.yaml b/docs/sample_blueprints/offline-inference-infra/offline_vllm_example.yaml
@@ -0,0 +1,29 @@
+benchmark_type: offline
+model: /models/NousResearch/Meta-Llama-3.1-8B
+tokenizer: /models/NousResearch/Meta-Llama-3.1-8B
+
+input_len: 12
+output_len: 12
+num_prompts: 2
+seed: 42
+tensor_parallel_size: 8
+
+# vLLM-specific
+#quantization: awq
+dtype: half
+gpu_memory_utilization: 0.99
+num_scheduler_steps: 10
+device: cuda
+enforce_eager: true
+kv_cache_dtype: auto
+enable_prefix_caching: true
+distributed_executor_backend: mp
+
+# Output
+#output_json: ./128_128.json
+
+# MLflow
+mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
+experiment_name: test-bm-suite-doc
+run_name: llama3-vllm-test
+save_metrics_path:  /mlcommons_output/benchmark_output_llama3_vllm.json
diff --git a/docs/sample_blueprints/online-inference-infra/online_example.yaml b/docs/sample_blueprints/online-inference-infra/online_example.yaml
@@ -0,0 +1,16 @@
+benchmark_type: online
+model: meta/llama3-8b-instruct
+input_len: 64
+output_len: 32
+max_requests: 5
+timeout: 300
+num_concurrent: 1
+results_dir: /workspace/results_on
+llm_api: openai
+llm_api_key: dummy-key
+llm_api_base: http://localhost:8001/v1
+experiment_name: local-bench
+run_name: llama3-test
+mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
+llmperf_path: /opt/llmperf-src
+metadata: test=localhost