vllm-project
diff --git a/‎.github/workflows/container-maintenance.yml‎
Lines changed: 12 additions & 8 deletions b/‎.github/workflows/container-maintenance.yml‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎.github/workflows/release-candidate.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release-candidate.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/release.yml‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎docs/assets/sample-output1.png‎
324 KB b/‎docs/assets/sample-output1.png‎
324 KB
diff --git a/‎docs/assets/sample-output2.png‎
298 KB b/‎docs/assets/sample-output2.png‎
298 KB
diff --git a/‎docs/assets/sample-output3.png‎
186 KB b/‎docs/assets/sample-output3.png‎
186 KB
diff --git a/‎docs/examples/practice_on_vllm_simulator.md‎
Lines changed: 117 additions & 0 deletions b/‎docs/examples/practice_on_vllm_simulator.md‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎src/guidellm/presentation/data_models.py‎
Lines changed: 2 additions & 2 deletions b/‎src/guidellm/presentation/data_models.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/guidellm/settings.py‎
Lines changed: 2 additions & 2 deletions b/‎src/guidellm/settings.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/ui/lib/components/MetricsSummary/MetricsSummary.component.tsx‎
Lines changed: 15 additions & 15 deletions b/‎src/ui/lib/components/MetricsSummary/MetricsSummary.component.tsx‎
Lines changed: 15 additions & 15 deletions
@@ -9,19 +9,22 @@ on:
 concurrency:
   group: ${{ github.workflow }}
 
+permissions:
+  packages: write
+
 jobs:
   cleanup-container-tags:
     runs-on: ubuntu-latest
     steps:
       - name: Delete PR and untagged images older than 2 weeks
         uses: snok/[email protected]
         with:
-          account: ${{ github.actor }}
+          account: ${{ github.repository_owner }}
           token: ${{ github.token }}
           image-names: ${{ github.event.repository.name }}
           image-tags: "pr-*"
           cut-off: 2w
-          dry-run: true
+          dry-run: false
 
   push-container-tags:
     runs-on: ubuntu-latest
@@ -31,19 +34,20 @@ jobs:
       - name: Log into ghcr.io
         uses: redhat-actions/podman-login@v1
         with:
-          username: ${{ github.actor }}
+          username: ${{ github.repository_owner }}
           password: ${{ github.token }}
           registry: ghcr.io/${{ github.repository_owner }}
       - name: Get list of tags
         run: |
-          skopeo list-tags docker://${{ github.repository }} | jq --raw-output '.Tags[]' > tags
+          set -euo pipefail  # Fail pipe if any command fails
+          skopeo list-tags docker://ghcr.io/${{ github.repository }} | jq --raw-output '.Tags[]' > tags
       - name: Get latest release and rc tags
         run: |
           STABLE_TAG="$(grep -P '^v\d+\.\d+\.\d+$' tags | sort -rV | head -n1)"
-          echo "STABLE_TAG=${STABLE_TAG:-v0.0.0}" >> $GITHUB_ENV
+          echo "stable_tag=${STABLE_TAG:-v0.0.0}" >> $GITHUB_ENV
           LATEST_TAG="$(grep -P '^v\d+\.\d+\.\d+' tags | sort -rV | head -n1)"
-          echo "LATEST_TAG=${LATEST_TAG:-v0.0.0}" >> $GITHUB_ENV
+          echo "latest_tag=${LATEST_TAG:-v0.0.0}" >> $GITHUB_ENV
       - name: Update latest and stable tags
         run: |
-          skopeo copy docker://${{ github.repository }}:${{ env.stable_tag }} docker://${{ github.repository }}:stable
-          skopeo copy docker://${{ github.repository }}:${{ env.latest_tag }} docker://${{ github.repository }}:latest
+          skopeo copy docker://ghcr.io/${{ github.repository }}:${{ env.stable_tag }} docker://ghcr.io/${{ github.repository }}:stable
+          skopeo copy docker://ghcr.io/${{ github.repository }}:${{ env.latest_tag }} docker://ghcr.io/${{ github.repository }}:latest
@@ -228,7 +228,7 @@ jobs:
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./ui/out
+          publish_dir: .src/ui/out
           destination_dir: ui/release/${TAG}
           keep_files: false
           user_name: ${{ github.actor }}
 
@@ -227,7 +227,7 @@ jobs:
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: ./ui/out
+          publish_dir: ./src/ui/out
           destination_dir: ui/${TAG}
           keep_files: false
           user_name: ${{ github.actor }}
@@ -297,7 +297,10 @@ jobs:
         with:
           fetch-depth: 0
       - name: Get version from branch
-        run: echo "PACKAGE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+        run: |
+          GITHUB_REF="${{ github.ref }}"
+          [[ -z "$GITHUB_REF" ]] && exit 1 # Fail if ref is unset
+          echo "PACKAGE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
       - name: Buildah build
         id: build-image
         uses: redhat-actions/buildah-build@v2
 
@@ -0,0 +1,117 @@
+# GuideLLM Benchmark Testing Best Practice
+
+Do first easy-go guidellm benchmark testing from scratch using vLLM Simulator.
+
+## Getting Started
+
+### 📦 1. Benchmark Testing Environment Setup
+
+#### 1.1 Create a Conda Environment (recommended)
+
+```bash
+conda create -n guidellm-bench python=3.11 -y
+conda activate guidellm-bench
+```
+
+#### 1.2 Install Dependencies
+
+```bash
+git clone https://github.com/vllm-project/guidellm.git
+cd guidellm
+pip install guidellm
+```
+
+For more detailed instructions, refer to [GuideLLM README](https://github.com/vllm-project/guidellm/blob/main/README.md).
+
+#### 1.3 Verify Installation
+
+```bash
+guidellm --help
+```
+
+#### 1.4 Startup OpenAI-compatible API in vLLM simulator docker container
+
+```bash
+docker pull ghcr.io/llm-d/llm-d-inference-sim:v0.4.0
+
+docker run --rm --publish 8000:8000 \
+ghcr.io/llm-d/llm-d-inference-sim:v0.4.0  \
+--port 8000 \
+--model "Qwen/Qwen2.5-1.5B-Instruct"  \
+--lora-modules '{"name":"tweet-summary-0"}' '{"name":"tweet-summary-1"}'
+```
+
+For more detailed instructions, refer to: [vLLM Simulator](https://llm-d.ai/docs/architecture/Components/inference-sim)
+
+Docker image versions: [Docker Images](https://github.com/llm-d/llm-d-inference-sim/pkgs/container/llm-d-inference-sim)
+
+Check open-ai api working via curl:
+
+- check /v1/models
+
+```bash
+curl --request GET 'http://localhost:8000/v1/models'
+```
+
+- check /v1/chat/completions
+
+```bash
+curl --request POST 'http://localhost:8000/v1/chat/completions' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "model": "tweet-summary-0",
+    "stream": false,
+    "messages": [{"role": "user", "content": "Say this is a test!"}]
+}'
+```
+
+- check /v1/completions
+
+```bash
+curl --request POST 'http://localhost:8000/v1/completions' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "model": "tweet-summary-0",
+    "stream": false,
+    "prompt": "Say this is a test!",
+    "max_tokens": 128
+}'
+```
+
+#### 1.5 Download Tokenizer
+
+Download Qwen/Qwen2.5-1.5B-Instruct tokenizer files from [Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct/files) save to local path such as ${local_path}/Qwen2.5-1.5B-Instruct
+
+```bash
+ls ./Qwen2.5-1.5B-Instruct
+merges.txt              tokenizer.json          tokenizer_config.json   vocab.json
+```
+
+______________________________________________________________________
+
+## 🚀 2. Running Benchmarks
+
+```bash
+guidellm benchmark \
+--target "http://localhost:8000/" \
+--model "tweet-summary-0" \
+--processor "${local_path}/Qwen2.5-1.5B-Instruct" \
+--rate-type sweep \
+--max-seconds 10 \
+--max-requests 10 \
+--data "prompt_tokens=128,output_tokens=56"
+```
+
+______________________________________________________________________
+
+## 📊 3. Results Interpretation
+
+![alt text](../assets/sample-output1.png) ![alt text](../assets/sample-output2.png) ![alt text](../assets/sample-output3.png)
+
+After the benchmark completes, key results are clear and straightforward, such as:
+
+- **`TTFT`**: Time to First Token
+- **`TPOT`**: Time Per Output Token
+- **`ITL`**: Inter-Token Latency
+
+The first benchmark test complete.
@@ -208,7 +208,7 @@ def from_distribution_summary(
 
 class BenchmarkDatum(BaseModel):
     requests_per_second: float
-    tpot: TabularDistributionSummary
+    itl: TabularDistributionSummary
     ttft: TabularDistributionSummary
     throughput: TabularDistributionSummary
     time_per_request: TabularDistributionSummary
@@ -217,7 +217,7 @@ class BenchmarkDatum(BaseModel):
     def from_benchmark(cls, bm: "GenerativeBenchmark"):
         return cls(
             requests_per_second=bm.metrics.requests_per_second.successful.mean,
-            tpot=TabularDistributionSummary.from_distribution_summary(
+            itl=TabularDistributionSummary.from_distribution_summary(
                 bm.metrics.inter_token_latency_ms.successful
             ),
             ttft=TabularDistributionSummary.from_distribution_summary(
 
@@ -32,8 +32,8 @@ class Environment(str, Enum):
 
 
 ENV_REPORT_MAPPING = {
-    Environment.PROD: "https://blog.vllm.ai/guidellm/ui/latest/index.html",
-    Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/latest/index.html",
+    Environment.PROD: "https://blog.vllm.ai/guidellm/ui/v0.3.0/index.html",
+    Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/v0.3.0/index.html",
     Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",
     Environment.LOCAL: "http://localhost:3000/index.html",
 }
 
@@ -54,15 +54,15 @@ export const Component = () => {
 
   const {
     ttft: ttftSLO,
-    tpot: tpotSLO,
+    itl: itlSLO,
     timePerRequest: timePerRequestSLO,
     throughput: throughputSLO,
     percentile,
     minX,
     maxX,
     errors,
     handleTtft,
-    handleTpot,
+    handleItl,
     handleTimePerRequest,
     handleThroughput,
     handlePercentileChange,
@@ -72,8 +72,8 @@ export const Component = () => {
   const isTtftMatch = Boolean(
     ttftSLO && interpolatedMetricData.ttft.enforcedPercentileValue <= ttftSLO
   );
-  const isTpotMatch = Boolean(
-    tpotSLO && interpolatedMetricData.tpot.enforcedPercentileValue <= tpotSLO
+  const isItlMatch = Boolean(
+    itlSLO && interpolatedMetricData.itl.enforcedPercentileValue <= itlSLO
   );
   const isTprMatch = Boolean(
     timePerRequestSLO &&
@@ -123,7 +123,7 @@ export const Component = () => {
           <FieldsContainer data-id="fields-container">
             <FieldCell data-id="field-cell-1">
               <Input
-                label="TTFT (ms)"
+                label="TIME TO FIRST TOKEN (ms)"
                 value={ttftSLO}
                 onChange={handleTtft}
                 fullWidth
@@ -133,12 +133,12 @@ export const Component = () => {
             </FieldCell>
             <FieldCell data-id="field-cell-2">
               <Input
-                label="TPOT (ms)"
-                value={tpotSLO}
-                onChange={handleTpot}
+                label="INTER-TOKEN LATENCY (ms)"
+                value={itlSLO}
+                onChange={handleItl}
                 fullWidth
                 fontColor={LineColor.Secondary}
-                error={errors?.tpot}
+                error={errors?.itl}
               />
             </FieldCell>
             <FieldCell data-id="field-cell-3">
@@ -212,7 +212,7 @@ export const Component = () => {
         </MiddleColumn>
         <MiddleColumn item xs={3}>
           <MetricValue
-            label="TTFT"
+            label="time to first token"
             value={`${formatNumber(interpolatedMetricData.ttft.enforcedPercentileValue)} ms`}
             match={isTtftMatch}
             valueColor={LineColor.Primary}
@@ -222,17 +222,17 @@ export const Component = () => {
         <MiddleColumn sx={{ paddingLeft: '0px !important' }} item xs={9}>
           <GraphContainer>
             <MetricLine
-              data={[{ id: 'tpot', data: lineDataByRps.tpot || [] }]}
-              threshold={tpotSLO}
+              data={[{ id: 'itl', data: lineDataByRps.itl || [] }]}
+              threshold={itlSLO}
               lineColor={LineColor.Secondary}
             />
           </GraphContainer>
         </MiddleColumn>
         <MiddleColumn item xs={3}>
           <MetricValue
-            label="TPOT"
-            value={`${formatNumber(interpolatedMetricData.tpot.enforcedPercentileValue)} ms`}
-            match={isTpotMatch}
+            label="inter-token latency"
+            value={`${formatNumber(interpolatedMetricData.itl.enforcedPercentileValue)} ms`}
+            match={isItlMatch}
             valueColor={LineColor.Secondary}
           />
         </MiddleColumn>
Original file line number	Diff line number	Diff line change
`@@ -32,8 +32,8 @@ class Environment(str, Enum):`
`32`	`32`
`33`	`33`
`34`	`34`	`ENV_REPORT_MAPPING = {`
`35`		`- Environment.PROD: "https://blog.vllm.ai/guidellm/ui/latest/index.html",`
`36`		`- Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/latest/index.html",`
	`35`	`+ Environment.PROD: "https://blog.vllm.ai/guidellm/ui/v0.3.0/index.html",`
	`36`	`+ Environment.STAGING: "https://blog.vllm.ai/guidellm/ui/release/v0.3.0/index.html",`
`37`	`37`	`Environment.DEV: "https://blog.vllm.ai/guidellm/ui/dev/index.html",`
`38`	`38`	`Environment.LOCAL: "http://localhost:3000/index.html",`
`39`	`39`	`}`