vllm-project
diff --git a/‎.github/workflows/development.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/development.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/main.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 19 additions & 0 deletions b/‎README.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎deploy/Containerfile‎
Lines changed: 16 additions & 22 deletions b/‎deploy/Containerfile‎
Lines changed: 16 additions & 22 deletions
diff --git a/‎deploy/entrypoint.sh‎
Lines changed: 0 additions & 43 deletions b/‎deploy/entrypoint.sh‎
Lines changed: 0 additions & 43 deletions
diff --git a/‎docs/guides/cli.md‎
Lines changed: 36 additions & 1 deletion b/‎docs/guides/cli.md‎
Lines changed: 36 additions & 1 deletion
diff --git a/‎docs/guides/configuration.md‎
Lines changed: 59 additions & 1 deletion b/‎docs/guides/configuration.md‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎docs/guides/data_formats.md‎
Lines changed: 67 additions & 0 deletions b/‎docs/guides/data_formats.md‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/guidellm/__main__.py‎
Lines changed: 5 additions & 2 deletions b/‎src/guidellm/__main__.py‎
Lines changed: 5 additions & 2 deletions
@@ -124,7 +124,7 @@ jobs:
       - name: Install dependencies
         run: pip install tox
       - name: Run unit tests
-        run: tox -e test-unit -- -m "smoke or sanity"
+        run: tox -e test-unit
 
   ui-unit-tests:
     permissions:
 
@@ -125,7 +125,7 @@ jobs:
       - name: Install dependencies
         run: pip install tox
       - name: Run unit tests
-        run: tox -e test-unit -- -m "smoke or sanity"
+        run: tox -e test-unit
 
   ui-unit-tests:
     permissions:
 
@@ -52,6 +52,25 @@ pip install git+https://github.com/vllm-project/guidellm.git
 
 For detailed installation instructions and requirements, see the [Installation Guide](https://github.com/vllm-project/guidellm/blob/main/docs/install.md).
 
+### With Podman / Docker
+
+Alternatively we publish container images at [ghcr.io/vllm-project/guidellm](https://github.com/vllm-project/guidellm/pkgs/container/guidellm). Running a container is (by default) equivalent to `guidellm benchmark run`:
+
+```bash
+podman run \
+  --rm -it \
+  -v "./results:/results:rw" \
+  -e GUIDELLM_TARGET=http://localhost:8000 \
+  -e GUIDELLM_RATE_TYPE=sweep \
+  -e GUIDELLM_MAX_SECONDS=30 \
+  -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
+  ghcr.io/vllm-project/guidellm:latest
+```
+
+> [!TIP] CLI options can also be specified as ENV variables (E.g. `--rate-type sweep` -> `GUIDELLM_RATE_TYPE=sweep`). If both are specified then the CLI option overrides the the ENV.
+
+Replace `latest` with `stable` for the newest tagged release or set a specific release if desired.
+
 ### Quick Start
 
 #### 1. Start an OpenAI Compatible Server (vLLM)
 
@@ -1,26 +1,26 @@
-ARG PYTHON=3.13
+ARG BASE_IMAGE=docker.io/python:3.13-slim
 
 # Use a multi-stage build to create a lightweight production image
-FROM docker.io/python:${PYTHON}-slim as builder
+FROM $BASE_IMAGE as builder
+
+# Ensure files are installed as root
+USER root
 
 # Copy repository files
-COPY / /src
+COPY / /opt/app-root/src
 
 # Create a venv and install guidellm
-RUN python3 -m venv /opt/guidellm \
-    && /opt/guidellm/bin/pip install --no-cache-dir /src
-
-# Copy entrypoint script into the venv bin directory
-RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh
+RUN python3 -m venv /opt/app-root/guidellm \
+    && /opt/app-root/guidellm/bin/pip install --no-cache-dir /opt/app-root/src
 
 # Prod image
-FROM docker.io/python:${PYTHON}-slim
+FROM $BASE_IMAGE
 
 # Copy the virtual environment from the builder stage
-COPY --from=builder /opt/guidellm /opt/guidellm
+COPY --from=builder /opt/app-root/guidellm /opt/app-root/guidellm
 
 # Add guidellm bin to PATH
-ENV PATH="/opt/guidellm/bin:$PATH"
+ENV PATH="/opt/app-root/guidellm/bin:$PATH"
 
 # Create a non-root user
 RUN useradd -md /results guidellm
@@ -35,14 +35,8 @@ WORKDIR /results
 LABEL org.opencontainers.image.source="https://github.com/vllm-project/guidellm" \
       org.opencontainers.image.description="GuideLLM Performance Benchmarking Container"
 
-# Set the environment variable for the benchmark script
-# TODO: Replace with scenario environment variables
-ENV GUIDELLM_TARGET="http://localhost:8000" \
-    GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \
-    GUIDELLM_RATE_TYPE="sweep" \
-    GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
-    GUIDELLM_MAX_REQUESTS="100" \
-    GUIDELLM_MAX_SECONDS="" \
-    GUIDELLM_OUTPUT_PATH="/results/results.json"
-
-ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ]
+# Argument defaults can be set with GUIDELLM_<ARG>
+ENV GUIDELLM_OUTPUT_PATH="/results/benchmarks.json"
+
+ENTRYPOINT [ "/opt/app-root/guidellm/bin/guidellm" ]
+CMD [ "benchmark", "run" ]
@@ -1 +1,36 @@
-# Coming Soon
+# CLI Reference
+
+This page provides a reference for the `guidellm` command-line interface. For more advanced configuration, including environment variables and `.env` files, see the [Configuration Guide](./configuration.md).
+
+## `guidellm benchmark run`
+
+This command is the primary entrypoint for running benchmarks. It has many options that can be specified on the command line or in a scenario file.
+
+### Scenario Configuration
+
+| Option                      | Description                                                                                                                                     |
+| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--scenario <PATH or NAME>` | The name of a builtin scenario or path to a scenario configuration file. Options specified on the command line will override the scenario file. |
+
+### Target and Backend Configuration
+
+These options configure how `guidellm` connects to the system under test.
+
+| Option                  | Description                                                                                                                                                                                                   |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--target <URL>`        | **Required.** The endpoint of the target system, e.g., `http://localhost:8080`. Can also be set with the `GUIDELLM__OPENAI__BASE_URL` environment variable.                                                   |
+| `--backend-type <TYPE>` | The type of backend to use. Defaults to `openai_http`.                                                                                                                                                        |
+| `--backend-args <JSON>` | A JSON string for backend-specific arguments. For example: `--backend-args '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}'` to pass custom headers and disable certificate verification. |
+| `--model <NAME>`        | The ID of the model to benchmark within the backend.                                                                                                                                                          |
+
+### Data and Request Configuration
+
+These options define the data to be used for benchmarking and how requests will be generated.
+
+| Option                    | Description                                                                                                                                                                              |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--data <SOURCE>`         | The data source. This can be a HuggingFace dataset ID, a path to a local data file, or a synthetic data configuration. See the [Data Formats Guide](./data_formats.md) for more details. |
+| `--rate-type <TYPE>`      | The type of request generation strategy to use (e.g., `constant`, `poisson`, `sweep`).                                                                                                   |
+| `--rate <NUMBER>`         | The rate of requests per second for `constant` or `poisson` strategies, or the number of steps for a `sweep`.                                                                            |
+| `--max-requests <NUMBER>` | The maximum number of requests to run for each benchmark.                                                                                                                                |
+| `--max-seconds <NUMBER>`  | The maximum number of seconds to run each benchmark for.                                                                                                                                 |
@@ -1 +1,59 @@
-# Coming Soon
+# Configuration
+
+The `guidellm` application can be configured using command-line arguments, environment variables, or a `.env` file. This page details the file-based and environment variable configuration options.
+
+## Configuration Methods
+
+Settings are loaded with the following priority (highest priority first):
+
+1. Command-line arguments.
+2. Environment variables.
+3. Values in a `.env` file in the directory where the command is run.
+4. Default values.
+
+## Environment Variable Format
+
+All settings can be configured using environment variables. The variables must be prefixed with `GUIDELLM__`, and nested settings are separated by a double underscore `__`.
+
+For example, to set the `api_key` for the `openai` backend, you would use the following environment variable:
+
+```bash
+export GUIDELLM__OPENAI__API_KEY="your-api-key"
+```
+
+### Target and Backend Configuration
+
+You can configure the connection to the target system using environment variables. This is an alternative to using the `--target-*` command-line flags.
+
+| Environment Variable                  | Description                                                                                                                | Example                                                                   |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| `GUIDELLM__OPENAI__BASE_URL`          | The endpoint of the target system. Equivalent to the `--target` CLI option.                                                | `export GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"`               |
+| `GUIDELLM__OPENAI__API_KEY`           | The API key to use for bearer token authentication.                                                                        | `export GUIDELLM__OPENAI__API_KEY="your-secret-api-key"`                  |
+| `GUIDELLM__OPENAI__BEARER_TOKEN`      | The full bearer token to use for authentication.                                                                           | `export GUIDELLM__OPENAI__BEARER_TOKEN="Bearer your-secret-token"`        |
+| `GUIDELLM__OPENAI__HEADERS`           | A JSON string representing a dictionary of headers to send to the target. These headers will override any default headers. | `export GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'` |
+| `GUIDELLM__OPENAI__ORGANIZATION`      | The OpenAI organization to use for requests.                                                                               | `export GUIDELLM__OPENAI__ORGANIZATION="org-12345"`                       |
+| `GUIDELLM__OPENAI__PROJECT`           | The OpenAI project to use for requests.                                                                                    | `export GUIDELLM__OPENAI__PROJECT="proj-67890"`                           |
+| `GUIDELLM__OPENAI__VERIFY`            | Set to `false` or `0` to disable certificate verification.                                                                 | `export GUIDELLM__OPENAI__VERIFY=false`                                   |
+| `GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS` | The default maximum number of tokens to request for completions.                                                           | `export GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS=2048`                         |
+
+### General HTTP Settings
+
+These settings control the behavior of the underlying HTTP client.
+
+| Environment Variable                 | Description                                                                     |
+| ------------------------------------ | ------------------------------------------------------------------------------- |
+| `GUIDELLM__REQUEST_TIMEOUT`          | The timeout in seconds for HTTP requests. Defaults to 300.                      |
+| `GUIDELLM__REQUEST_HTTP2`            | Set to `true` or `1` to enable HTTP/2 support. Defaults to true.                |
+| `GUIDELLM__REQUEST_FOLLOW_REDIRECTS` | Set to `true` or `1` to allow the client to follow redirects. Defaults to true. |
+
+### Using a `.env` file
+
+You can also place these variables in a `.env` file in your project's root directory:
+
+```dotenv
+# .env file
+GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"
+GUIDELLM__OPENAI__API_KEY="your-api-key"
+GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'
+GUIDELLM__OPENAI__VERIFY=false
+```
@@ -0,0 +1,67 @@
+# Data Formats
+
+The `--data` argument for the `guidellm benchmark run` command accepts several different formats for specifying the data to be used for benchmarking.
+
+## Local Data Files
+
+You can provide a path to a local data file in one of the following formats:
+
+- **CSV (.csv)**: A comma-separated values file. The loader will attempt to find a column with a common name for the prompt (e.g., `prompt`, `text`, `instruction`).
+- **JSON (.json)**: A JSON file. The structure should be a list of objects, where each object represents a row of data.
+- **JSON Lines (.jsonl)**: A file where each line is a valid JSON object.
+- **Text (.txt)**: A plain text file, where each line is treated as a separate prompt.
+
+If the prompt column cannot be automatically determined, you can specify it using the `--data-args` option:
+
+```bash
+--data-args '{"text_column": "my_custom_prompt_column"}'
+```
+
+## Synthetic Data
+
+You can generate synthetic data on the fly by providing a configuration string or file.
+
+### Configuration Options
+
+| Parameter             | Description                                                                                                     |
+| --------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `prompt_tokens`       | **Required.** The average number of tokens for the generated prompts.                                           |
+| `output_tokens`       | **Required.** The average number of tokens for the generated outputs.                                           |
+| `samples`             | The total number of samples to generate. Defaults to 1000.                                                      |
+| `source`              | The source text to use for generating the synthetic data. Defaults to a built-in copy of "Pride and Prejudice". |
+| `prompt_tokens_stdev` | The standard deviation of the tokens generated for prompts.                                                     |
+| `prompt_tokens_min`   | The minimum number of text tokens generated for prompts.                                                        |
+| `prompt_tokens_max`   | The maximum number of text tokens generated for prompts.                                                        |
+| `output_tokens_stdev` | The standard deviation of the tokens generated for outputs.                                                     |
+| `output_tokens_min`   | The minimum number of text tokens generated for outputs.                                                        |
+| `output_tokens_max`   | The maximum number of text tokens generated for outputs.                                                        |
+
+### Configuration Formats
+
+You can provide the synthetic data configuration in one of three ways:
+
+1. **Key-Value String:**
+
+   ```bash
+   --data "prompt_tokens=256,output_tokens=128,samples=500"
+   ```
+
+2. **JSON String:**
+
+   ```bash
+   --data '{"prompt_tokens": 256, "output_tokens": 128, "samples": 500}'
+   ```
+
+3. **YAML or Config File:** Create a file (e.g., `my_config.yaml`):
+
+   ```yaml
+   prompt_tokens: 256
+   output_tokens: 128
+   samples: 500
+   ```
+
+   And use it with the `--data` argument:
+
+   ```bash
+   --data my_config.yaml
+   ```
@@ -25,6 +25,7 @@
 
 
 @click.group()
+@click.version_option(package_name="guidellm", message="guidellm version: %(version)s")
 def cli():
     pass
 
@@ -51,7 +52,7 @@ def benchmark():
             readable=True,
             file_okay=True,
             dir_okay=False,
-            path_type=Path,  # type: ignore[type-var]
+            path_type=Path,
         ),
         click.Choice(get_builtin_scenarios()),
     ),
@@ -82,7 +83,9 @@ def benchmark():
     default=GenerativeTextScenario.get_default("backend_args"),
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs."
+        "dict with **kwargs. Headers can be removed by setting their value to "
+        "null. For example: "
+        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
     ),
 )
 @click.option(