fixup from PR review and add tests

kdelee · kdelee · commit d3858ef9e512 · 2025-07-29T14:39:34.000-04:00
A new unit test has been added to explicitly verify that headers can be removed by setting their value to `None`. Another unit test has been added to ensure that this functionality works correctly when using the CLI.

The documentation for the `--backend-args` CLI option has been updated to explicitly mention that headers can be removed by setting their value to `null`.
diff --git a/docs/guides/cli.md b/docs/guides/cli.md
@@ -8,29 +8,29 @@ This command is the primary entrypoint for running benchmarks. It has many optio
 
 ### Scenario Configuration
 
-| Option | Description |
-| --- | --- |
+| Option                      | Description                                                                                                                                     |
+| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
 | `--scenario <PATH or NAME>` | The name of a builtin scenario or path to a scenario configuration file. Options specified on the command line will override the scenario file. |
 
 ### Target and Backend Configuration
 
 These options configure how `guidellm` connects to the system under test.
 
-| Option | Description |
-| --- | --- |
-| `--target <URL>` | **Required.** The endpoint of the target system, e.g., `http://localhost:8080`. Can also be set with the `GUIDELLM__OPENAI__BASE_URL` environment variable. |
-| `--backend-type <TYPE>` | The type of backend to use. Defaults to `openai_http`. |
+| Option                  | Description                                                                                                                                                                                                   |
+| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--target <URL>`        | **Required.** The endpoint of the target system, e.g., `http://localhost:8080`. Can also be set with the `GUIDELLM__OPENAI__BASE_URL` environment variable.                                                   |
+| `--backend-type <TYPE>` | The type of backend to use. Defaults to `openai_http`.                                                                                                                                                        |
 | `--backend-args <JSON>` | A JSON string for backend-specific arguments. For example: `--backend-args '{"headers": {"Authorization": "Bearer my-token"}, "verify": false}'` to pass custom headers and disable certificate verification. |
-| `--model <NAME>` | The ID of the model to benchmark within the backend. |
+| `--model <NAME>`        | The ID of the model to benchmark within the backend.                                                                                                                                                          |
 
 ### Data and Request Configuration
 
 These options define the data to be used for benchmarking and how requests will be generated.
 
-| Option | Description |
-| --- | --- |
-| `--data <SOURCE>` | The data source. This can be a HuggingFace dataset ID, a path to a local data file, or a synthetic data configuration. See the [Data Formats Guide](./data_formats.md) for more details. |
-| `--rate-type <TYPE>` | The type of request generation strategy to use (e.g., `constant`, `poisson`, `sweep`). |
-| `--rate <NUMBER>` | The rate of requests per second for `constant` or `poisson` strategies, or the number of steps for a `sweep`. |
-| `--max-requests <NUMBER>` | The maximum number of requests to run for each benchmark. |
-| `--max-seconds <NUMBER>` | The maximum number of seconds to run each benchmark for. |
+| Option                    | Description                                                                                                                                                                              |
+| ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--data <SOURCE>`         | The data source. This can be a HuggingFace dataset ID, a path to a local data file, or a synthetic data configuration. See the [Data Formats Guide](./data_formats.md) for more details. |
+| `--rate-type <TYPE>`      | The type of request generation strategy to use (e.g., `constant`, `poisson`, `sweep`).                                                                                                   |
+| `--rate <NUMBER>`         | The rate of requests per second for `constant` or `poisson` strategies, or the number of steps for a `sweep`.                                                                            |
+| `--max-requests <NUMBER>` | The maximum number of requests to run for each benchmark.                                                                                                                                |
+| `--max-seconds <NUMBER>`  | The maximum number of seconds to run each benchmark for.                                                                                                                                 |
diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md
@@ -5,16 +5,18 @@ The `guidellm` application can be configured using command-line arguments, envir
 ## Configuration Methods
 
 Settings are loaded with the following priority (highest priority first):
-1.  Command-line arguments.
-2.  Environment variables.
-3.  Values in a `.env` file in the directory where the command is run.
-4.  Default values.
+
+1. Command-line arguments.
+2. Environment variables.
+3. Values in a `.env` file in the directory where the command is run.
+4. Default values.
 
 ## Environment Variable Format
 
 All settings can be configured using environment variables. The variables must be prefixed with `GUIDELLM__`, and nested settings are separated by a double underscore `__`.
 
 For example, to set the `api_key` for the `openai` backend, you would use the following environment variable:
+
 ```bash
 export GUIDELLM__OPENAI__API_KEY="your-api-key"
 ```
@@ -23,28 +25,27 @@ export GUIDELLM__OPENAI__API_KEY="your-api-key"
 
 You can configure the connection to the target system using environment variables. This is an alternative to using the `--target-*` command-line flags.
 
-| Environment Variable | Description | Example |
-| --- | --- | --- |
-| `GUIDELLM__OPENAI__BASE_URL` | The endpoint of the target system. Equivalent to the `--target` CLI option. | `export GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"` |
-| `GUIDELLM__OPENAI__API_KEY` | The API key to use for bearer token authentication. | `export GUIDELLM__OPENAI__API_KEY="your-secret-api-key"` |
-| `GUIDELLM__OPENAI__BEARER_TOKEN` | The full bearer token to use for authentication. | `export GUIDELLM__OPENAI__BEARER_TOKEN="Bearer your-secret-token"` |
-| `GUIDELLM__OPENAI__HEADERS` | A JSON string representing a dictionary of headers to send to the target. These headers will override any default headers. | `export GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'` |
-| `GUIDELLM__OPENAI__ORGANIZATION` | The OpenAI organization to use for requests. | `export GUIDELLM__OPENAI__ORGANIZATION="org-12345"` |
-| `GUIDELLM__OPENAI__PROJECT` | The OpenAI project to use for requests. | `export GUIDELLM__OPENAI__PROJECT="proj-67890"` |
-| `GUIDELLM__OPENAI__VERIFY` | Set to `false` or `0` to disable certificate verification. | `export GUIDELLM__OPENAI__VERIFY=false` |
-| `GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS` | The default maximum number of tokens to request for completions. | `export GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS=2048` |
+| Environment Variable                  | Description                                                                                                                | Example                                                                   |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
+| `GUIDELLM__OPENAI__BASE_URL`          | The endpoint of the target system. Equivalent to the `--target` CLI option.                                                | `export GUIDELLM__OPENAI__BASE_URL="http://localhost:8080"`               |
+| `GUIDELLM__OPENAI__API_KEY`           | The API key to use for bearer token authentication.                                                                        | `export GUIDELLM__OPENAI__API_KEY="your-secret-api-key"`                  |
+| `GUIDELLM__OPENAI__BEARER_TOKEN`      | The full bearer token to use for authentication.                                                                           | `export GUIDELLM__OPENAI__BEARER_TOKEN="Bearer your-secret-token"`        |
+| `GUIDELLM__OPENAI__HEADERS`           | A JSON string representing a dictionary of headers to send to the target. These headers will override any default headers. | `export GUIDELLM__OPENAI__HEADERS='{"Authorization": "Bearer my-token"}'` |
+| `GUIDELLM__OPENAI__ORGANIZATION`      | The OpenAI organization to use for requests.                                                                               | `export GUIDELLM__OPENAI__ORGANIZATION="org-12345"`                       |
+| `GUIDELLM__OPENAI__PROJECT`           | The OpenAI project to use for requests.                                                                                    | `export GUIDELLM__OPENAI__PROJECT="proj-67890"`                           |
+| `GUIDELLM__OPENAI__VERIFY`            | Set to `false` or `0` to disable certificate verification.                                                                 | `export GUIDELLM__OPENAI__VERIFY=false`                                   |
+| `GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS` | The default maximum number of tokens to request for completions.                                                           | `export GUIDELLM__OPENAI__MAX_OUTPUT_TOKENS=2048`                         |
 
 ### General HTTP Settings
 
 These settings control the behavior of the underlying HTTP client.
 
-| Environment Variable | Description |
-| --- | --- |
-| `GUIDELLM__REQUEST_TIMEOUT` | The timeout in seconds for HTTP requests. Defaults to 300. |
-| `GUIDELLM__REQUEST_HTTP2` | Set to `true` or `1` to enable HTTP/2 support. Defaults to true. |
+| Environment Variable                 | Description                                                                     |
+| ------------------------------------ | ------------------------------------------------------------------------------- |
+| `GUIDELLM__REQUEST_TIMEOUT`          | The timeout in seconds for HTTP requests. Defaults to 300.                      |
+| `GUIDELLM__REQUEST_HTTP2`            | Set to `true` or `1` to enable HTTP/2 support. Defaults to true.                |
 | `GUIDELLM__REQUEST_FOLLOW_REDIRECTS` | Set to `true` or `1` to allow the client to follow redirects. Defaults to true. |
 
-
 ### Using a `.env` file
 
 You can also place these variables in a `.env` file in your project's root directory:
diff --git a/docs/guides/data_formats.md b/docs/guides/data_formats.md
@@ -12,6 +12,7 @@ You can provide a path to a local data file in one of the following formats:
 - **Text (.txt)**: A plain text file, where each line is treated as a separate prompt.
 
 If the prompt column cannot be automatically determined, you can specify it using the `--data-args` option:
+
 ```bash
 --data-args '{"text_column": "my_custom_prompt_column"}'
 ```
@@ -22,41 +23,45 @@ You can generate synthetic data on the fly by providing a configuration string o
 
 ### Configuration Options
 
-| Parameter | Description |
-| --- | --- |
-| `prompt_tokens` | **Required.** The average number of tokens for the generated prompts. |
-| `output_tokens` | **Required.** The average number of tokens for the generated outputs. |
-| `samples` | The total number of samples to generate. Defaults to 1000. |
-| `source` | The source text to use for generating the synthetic data. Defaults to a built-in copy of "Pride and Prejudice". |
-| `prompt_tokens_stdev` | The standard deviation of the tokens generated for prompts. |
-| `prompt_tokens_min` | The minimum number of text tokens generated for prompts. |
-| `prompt_tokens_max` | The maximum number of text tokens generated for prompts. |
-| `output_tokens_stdev` | The standard deviation of the tokens generated for outputs. |
-| `output_tokens_min` | The minimum number of text tokens generated for outputs. |
-| `output_tokens_max` | The maximum number of text tokens generated for outputs. |
+| Parameter             | Description                                                                                                     |
+| --------------------- | --------------------------------------------------------------------------------------------------------------- |
+| `prompt_tokens`       | **Required.** The average number of tokens for the generated prompts.                                           |
+| `output_tokens`       | **Required.** The average number of tokens for the generated outputs.                                           |
+| `samples`             | The total number of samples to generate. Defaults to 1000.                                                      |
+| `source`              | The source text to use for generating the synthetic data. Defaults to a built-in copy of "Pride and Prejudice". |
+| `prompt_tokens_stdev` | The standard deviation of the tokens generated for prompts.                                                     |
+| `prompt_tokens_min`   | The minimum number of text tokens generated for prompts.                                                        |
+| `prompt_tokens_max`   | The maximum number of text tokens generated for prompts.                                                        |
+| `output_tokens_stdev` | The standard deviation of the tokens generated for outputs.                                                     |
+| `output_tokens_min`   | The minimum number of text tokens generated for outputs.                                                        |
+| `output_tokens_max`   | The maximum number of text tokens generated for outputs.                                                        |
 
 ### Configuration Formats
 
 You can provide the synthetic data configuration in one of three ways:
 
-1.  **Key-Value String:**
-    ```bash
-    --data "prompt_tokens=256,output_tokens=128,samples=500"
-    ```
-
-2.  **JSON String:**
-    ```bash
-    --data '{"prompt_tokens": 256, "output_tokens": 128, "samples": 500}'
-    ```
-
-3.  **YAML or Config File:**
-    Create a file (e.g., `my_config.yaml`):
-    ```yaml
-    prompt_tokens: 256
-    output_tokens: 128
-    samples: 500
-    ```
-    And use it with the `--data` argument:
-    ```bash
-    --data my_config.yaml
-    ```
+1. **Key-Value String:**
+
+   ```bash
+   --data "prompt_tokens=256,output_tokens=128,samples=500"
+   ```
+
+2. **JSON String:**
+
+   ```bash
+   --data '{"prompt_tokens": 256, "output_tokens": 128, "samples": 500}'
+   ```
+
+3. **YAML or Config File:** Create a file (e.g., `my_config.yaml`):
+
+   ```yaml
+   prompt_tokens: 256
+   output_tokens: 128
+   samples: 500
+   ```
+
+   And use it with the `--data` argument:
+
+   ```bash
+   --data my_config.yaml
+   ```
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -82,7 +82,9 @@ def benchmark():
     default=GenerativeTextScenario.get_default("backend_args"),
     help=(
         "A JSON string containing any arguments to pass to the backend as a "
-        "dict with **kwargs."
+        "dict with **kwargs. Headers can be removed by setting their value to "
+        "null. For example: "
+        """'{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}'"""
     ),
 )
 @click.option(
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -131,9 +131,13 @@ def __init__(
             default_headers["OpenAI-Project"] = self.project
 
         # User-provided headers from kwargs or settings override defaults
-        default_headers.update(settings.openai.headers or {})
-        default_headers.update(headers)
-        self.headers = {k: v for k, v in default_headers.items() if v is not None}
+        merged_headers = default_headers.copy()
+        merged_headers.update(settings.openai.headers or {})
+        if headers:
+            merged_headers.update(headers)
+
+        # Remove headers with None values for backward compatibility and convenience
+        self.headers = {k: v for k, v in merged_headers.items() if v is not None}
 
         self.timeout = timeout if timeout is not None else settings.request_timeout
         self.http2 = http2 if http2 is not None else settings.request_http2
@@ -142,7 +146,7 @@ def __init__(
             if follow_redirects is not None
             else settings.request_follow_redirects
         )
-        self.verify = verify or settings.openai.verify
+        self.verify = verify if verify is not None else settings.openai.verify
         self.max_output_tokens = (
             max_output_tokens
             if max_output_tokens is not None
diff --git a/tests/unit/backend/test_openai_backend_custom_configs.py b/tests/unit/backend/test_openai_backend_custom_configs.py
@@ -62,3 +62,27 @@ def test_openai_http_backend_kwarg_headers_override_settings():
 
     # Reset the settings
     settings.openai.headers = None
+
+
+@pytest.mark.smoke
+def test_openai_http_backend_remove_header_with_none():
+    # Set a default api_key, which would normally create an Authorization header
+    settings.openai.api_key = "default-api-key"
+
+    # Set a custom header and explicitly set Authorization to None to remove it
+    override_headers = {
+        "Authorization": None,
+        "Custom-Header": "Custom-Value",
+    }
+
+    # Initialize the backend
+    backend = OpenAIHTTPBackend(headers=override_headers)
+
+    # Check that the Authorization header is removed and the custom header is present
+    assert "Authorization" not in backend.headers
+    assert backend.headers["Custom-Header"] == "Custom-Value"
+    assert len(backend.headers) == 1
+
+    # Reset the settings
+    settings.openai.api_key = None
+    settings.openai.headers = None
diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
@@ -1,3 +1,7 @@
+import json
+from pathlib import Path
+from unittest.mock import patch
+
 import pytest
 from click.testing import CliRunner
 
@@ -30,3 +34,51 @@ def test_benchmark_run_with_backend_args():
     # but it will pass the header parsing, which is what we want to test.
     assert result.exit_code != 0
     assert "Invalid header format" not in result.output
+
+
+@patch("guidellm.__main__.benchmark_with_scenario")
+def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path):
+    """
+    Tests that --backend-args from the CLI correctly overrides scenario
+    values and that `null` correctly removes a header.
+    """
+    scenario_path = tmp_path / "scenario.json"
+
+    # Create a scenario file with a header that should be overridden and removed
+    scenario_content = {
+        "backend_type": "openai_http",
+        "backend_args": {"headers": {"Authorization": "should-be-removed"}},
+        "data": "prompt_tokens=10,output_tokens=10",
+        "max_requests": 1,
+        "target": "http://dummy-target",
+        "rate_type": "synchronous",
+        "processor": "gpt2",
+    }
+    with scenario_path.open("w") as f:
+        json.dump(scenario_content, f)
+
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "benchmark",
+            "run",
+            "--scenario",
+            str(scenario_path),
+            "--backend-args",
+            '{"headers": {"Authorization": null, "Custom-Header": "Custom-Value"}}',
+        ],
+        catch_exceptions=False,
+    )
+
+    assert result.exit_code == 0, result.output
+
+    # Assert that benchmark_with_scenario was called with the correct scenario
+    mock_benchmark_func.assert_called_once()
+    call_args = mock_benchmark_func.call_args[1]
+    scenario = call_args["scenario"]
+
+    # Verify the backend_args were merged correctly
+    backend_args = scenario.backend_args
+    expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"}
+    assert backend_args["headers"] == expected_headers