oracle-devrel
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/LICENSE‎
Lines changed: 35 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/LICENSE‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/README.md‎
Lines changed: 168 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/README.md‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/benchmark/Dockerfile‎
Lines changed: 14 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/benchmark/Dockerfile‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/compose.yaml‎
Lines changed: 40 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/compose.yaml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/config.json‎
Lines changed: 7 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/config.json‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/image.png‎
87 KB b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/image.png‎
87 KB
diff --git a/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/plot/Dockerfile‎
Lines changed: 17 additions & 0 deletions b/‎cloud-infrastructure/ai-infra-gpu/ai-infrastructure/llm-benchmark-docker/files/plot/Dockerfile‎
Lines changed: 17 additions & 0 deletions
@@ -0,0 +1,35 @@
+Copyright (c) 2025 Oracle and/or its affiliates.
+
+The Universal Permissive License (UPL), Version 1.0
+
+Subject to the condition set forth below, permission is hereby granted to any
+person obtaining a copy of this software, associated documentation and/or data
+(collectively the "Software"), free of charge and under any and all copyright
+rights in the Software, and any and all patent rights owned or freely
+licensable by each licensor hereunder covering either (i) the unmodified
+Software as contributed to or provided by such licensor, or (ii) the Larger
+Works (as defined below), to deal in both
+
+(a) the Software, and
+(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+one is included with the Software (each a "Larger Work" to which the Software
+is contributed by such licensors),
+
+without restriction, including without limitation the rights to copy, create
+derivative works of, display, perform, and distribute the Software and make,
+use, sell, offer for sale, import, export, have made, and have sold the
+Software and the Larger Work(s), and to sublicense the foregoing rights on
+either these or other terms.
+
+This license is subject to the following condition:
+The above copyright notice and either this complete permission notice or at
+a minimum a reference to the UPL must be included in all copies or
+substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,168 @@
+# LLM Benchmarking with Docker Compose
+
+This repository demonstrates how to benchmark LLM with
+[vLLM](https://vllm.ai)
+and
+[genai-perf](https://docs.nvidia.com/nim/benchmarking/llm/latest/step-by-step.html#using-genai-perf-to-benchmark)
+using
+[Docker Compose](https://docs.docker.com/compose/).
+
+Reviewed: 20.05.2025
+
+# When should this asset be used?
+
+* If you want to evaluate the performance of various LLM models or various shapes on OCI.
+
+# How is this asset used?
+
+## Prerequisites
+
+* You have access to an Orcale Cloud Tenancy.
+* You have access to shapes with NVIDIA GPUs such as the A10.
+* You have a HuggingFace account and access to `meta-llama/Llama-3.1-8B-Instruct`.
+
+## Infrastructure Setup
+
+1. Create a new instance using a GPU shape.
+
+   * Use Ubuntu as the system image for simplicity.
+
+     <img src="files/image.png" alt="Selecting Ubuntu as the OS image" width="75%" />
+
+   * Create a large enough boot volume, e.g., with 200GB space.
+
+2. Log into the machine and install NVIDIA drivers:
+   ```sh
+   sudo apt-get update
+   sudo apt-get install -y ubuntu-drivers-common
+   sudo ubuntu-drivers install --gpgpu nvidia:570-server
+   ```
+   If your shape has a NVLink fabric, also install the matching fabric manager
+   from NVIDIA:
+   ```sh
+   sudo apt-get install -y nvidia-fabricmanager-570
+   ```
+
+3. Install Docker Compose:
+   ```sh
+   sudo apt-install -y docker-compose
+   ```
+   and add yourself to the `docker` group:
+   ```sh
+   sudo usermod -aG docker ubuntu
+   ```
+
+4. Then install and configure the container toolkit.
+   In depth instructions on the NVIDIA container toolkit can be found
+   [on NVIDIA's website](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#with-apt-ubuntu-debian).
+   ```sh
+   curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+   && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+     sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+     sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+   sudo apt-get install -y nvidia-container-toolkit
+   sudo nvidia-ctk runtime configure --runtime=docker
+   ```
+
+5. Reboot the machine.
+
+## Environment Configuration
+
+This section is only needed if you wish to run LLM models from HuggingFace that
+are gated and require an access token.
+
+1. Install `uv`:
+   ```sh
+   sudo snap install --classic astral-uv
+   ```
+
+2. Install the HuggingFace package:
+   ```sh
+   uv venv
+   uv pip install huggingface_hub
+   ```
+
+3. Log into HuggingFace with your access token:
+   ```sh
+   uv run huggingface-cli login
+   ```
+
+## Executing the Benchmarks
+
+1. Download the contents of the folder ["files"](./files).
+   Then build the necessary containers with `docker-compose`:
+   ```sh
+   docker-compose --profile benchmark build
+   ```
+
+2. Edit the configuration file, `config.json`.  This will specify all necessary
+   settings with which the LLM is served.  For example:
+   ```json
+   {
+       "model": "meta-llama/Llama-3.1-8B-Instruct",
+       "gpu_memory_utilization": 0.98,
+       "tensor_parallel_size": 1,
+       "max_model_len": 8192,
+       "max_num_batched_tokens": 8192
+   }
+   ```
+   will run Llama 3.1.  Modify this file to the settings you desire.
+
+3. Launch the LLM in the background:
+   ```sh
+   docker-compose up -d llm
+   ```
+   You can follow the start-up of the vLLM service with:
+   ```sh
+   docker-compose logs -f llm
+   ```
+
+4. Start the benchmarking container:
+   ```sh
+   docker-compose run perf
+   ```
+   This will execute multiple runs of NVIDIA's `genai-perf`, and store the
+   results in the directory `./results`, containing information about the vLLM
+   parameters and the shape used.
+
+   To run only certain scenarios and concurrent request settings, modify
+   [`compose.yaml`](files/compose.yaml) and have the `command` for the `perf`
+   container read, i.e:
+   ```yaml
+       command:
+         - "wait-for-it.sh"
+         - "--timeout=300"
+         - "llm:8000"
+         - "--"
+         - "/appli/scripts/benchmark.py"
+         - "--scenario"
+         - "chatbot"
+         - "--concurrency"
+         - "1"
+         - "4"
+         - "16"
+   ```
+
+5. Run the plotting:
+   ```sh
+   docker-compose run plot
+   ```
+   The output files will be in `./plots`.
+
+6. Shut down all remaining containers:
+   ```sh
+   docker-compose down
+   ```
+
+# Acknowledgments
+
+- **Author** - Omar Awile (GPU Specialist)
+- **Author** - Matthias Wolf (GPU Specialist)
+
+# License
+ 
+Copyright (c) 2025 Oracle and/or its affiliates.
+ 
+Licensed under the Universal Permissive License (UPL), Version 1.0.
+ 
+See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details.
@@ -0,0 +1,14 @@
+# Copyright (c) 2025 Oracle and/or its affiliates.
+FROM ubuntu:24.04
+
+ENV PATH="/root/.local/bin:/root/.local/share/pipx/venvs/genai-perf/bin/:$PATH"
+ARG GENAI_PERF_VERSION=0.0.12
+
+RUN apt-get update \
+ && apt-get install -y curl pipx \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN pipx install genai-perf==${GENAI_PERF_VERSION} && pipx ensurepath
+RUN curl -o /root/.local/bin/wait-for-it.sh https://raw.githubusercontent.com/vishnubob/wait-for-it/refs/heads/master/wait-for-it.sh
+RUN chmod +x /root/.local/bin/wait-for-it.sh
@@ -0,0 +1,40 @@
+# Copyright (c) 2025 Oracle and/or its affiliates.
+version: "3"
+services:
+  llm:
+    image: vllm/vllm-openai:v0.8.5.post1
+    container_name: llm
+    runtime: nvidia
+    volumes:
+      - "$HOME/.cache/huggingface:/huggingface"
+      - "$PWD:/appli"
+    ports:
+      - "127.0.0.1:8000:8000"
+    environment:
+      "HF_HOME": "/huggingface"
+    working_dir: "/appli"
+    entrypoint:
+      - "/appli/scripts/startllm.py"
+  perf:
+    build: benchmark
+    container_name: perf
+    depends_on:
+      - llm
+    volumes:
+      - "$HOME/.cache/huggingface:/huggingface"
+      - "$PWD:/appli"
+    environment:
+      "HF_HOME": "/huggingface"
+    working_dir: "/appli"
+    command:
+      - "wait-for-it.sh"
+      - "--timeout=300"
+      - "llm:8000"
+      - "--"
+      - "/appli/scripts/benchmark.py"
+  plot:
+    build: plot
+    container_name: plot
+    volumes:
+      - "$PWD:/appli"
+    working_dir: "/appli"
@@ -0,0 +1,7 @@
+{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "gpu_memory_utilization": 0.98,
+    "tensor_parallel_size": 1,
+    "max_model_len": 8192,
+    "max_num_batched_tokens": 8192
+}
@@ -0,0 +1,17 @@
+# Copyright (c) 2025 Oracle and/or its affiliates.
+FROM ubuntu:24.04
+
+ENV PATH="/root/.local/bin:$PATH"
+
+RUN apt-get update \
+ && apt-get install -y curl pipx \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /appli
+
+COPY pyproject.toml benchplot.py /appli/
+
+RUN pipx install . && pipx ensurepath
+
+CMD ["benchplot"]