Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/cli_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,11 @@ Image file format for generated images. Choose `png` for lossless compression (l
| `jpeg` | | JPEG format. Lossy compression, smaller file sizes, good for photos. |
| `random` | | Randomly select PNG or JPEG for each image. |

#### `--image-source` `<str>`

Source image generation mode. `assets` loads images from the built-in `assets/source_images` directory (ships with a small set of 4 images). `noise` generates random noise images on the fly, providing a larger and more diverse pool without requiring files on disk. Noise mode is useful for stressing multimodal pipelines more realistically. A path to a directory loads images from the given directory (e.g. `--image-source ./source_images`).
<br>_Default: `assets`_

### Video Input

#### `--video-batch-size`, `--batch-size-video` `<int>`
Expand Down
6 changes: 0 additions & 6 deletions src/aiperf/common/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
from aiperf.common.config.base_config import BaseConfig
from aiperf.common.config.cli_parameter import CLIParameter, DisableCLI
from aiperf.common.config.config_defaults import (
AudioDefaults,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the defaults file and inline the defaults directly to reduce cognitive load

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we generally like relegating magic numbers into these default classes?

CLIDefaults,
ConversationDefaults,
EndpointDefaults,
ImageDefaults,
InputDefaults,
InputTokensDefaults,
LoadGeneratorDefaults,
Expand All @@ -23,7 +21,6 @@
TokenizerDefaults,
TurnDefaults,
TurnDelayDefaults,
VideoDefaults,
WorkersDefaults,
)
from aiperf.common.config.conversation_config import (
Expand Down Expand Up @@ -71,7 +68,6 @@

__all__ = [
"AudioConfig",
"AudioDefaults",
"AudioLengthConfig",
"BaseConfig",
"BaseZMQCommunicationConfig",
Expand All @@ -85,7 +81,6 @@
"EndpointDefaults",
"Groups",
"ImageConfig",
"ImageDefaults",
"ImageHeightConfig",
"ImageWidthConfig",
"InputConfig",
Expand Down Expand Up @@ -118,7 +113,6 @@
"TurnDelayDefaults",
"UserConfig",
"VideoConfig",
"VideoDefaults",
"WorkersConfig",
"WorkersDefaults",
"ZMQDualBindConfig",
Expand Down
39 changes: 30 additions & 9 deletions src/aiperf/common/config/audio_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

from typing import Annotated

from pydantic import BeforeValidator, Field
from pydantic import BeforeValidator, Field, model_validator
from typing_extensions import Self

from aiperf.common.config.base_config import BaseConfig
from aiperf.common.config.cli_parameter import CLIParameter
from aiperf.common.config.config_defaults import AudioDefaults
from aiperf.common.config.config_validators import parse_str_or_list_of_positive_values
from aiperf.common.config.groups import Groups
from aiperf.common.enums import AudioFormat
Expand All @@ -23,6 +23,7 @@ class AudioLengthConfig(BaseConfig):
mean: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Mean duration in seconds for synthetically generated audio files. Audio lengths follow a normal distribution "
"around this mean (±`--audio-length-stddev`). Used when `--audio-batch-size` > 0 for multimodal benchmarking. "
Expand All @@ -34,11 +35,12 @@ class AudioLengthConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.LENGTH_MEAN
]

stddev: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Standard deviation for synthetic audio duration in seconds. Creates variability in audio lengths when > 0, "
"simulating mixed-duration audio inputs. Durations follow normal distribution. "
Expand All @@ -50,7 +52,7 @@ class AudioLengthConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.LENGTH_STDDEV
]


class AudioConfig(BaseConfig):
Expand All @@ -63,6 +65,7 @@ class AudioConfig(BaseConfig):
batch_size: Annotated[
int,
Field(
default=1,
ge=0,
description="The number of audio inputs to include in each request. Supported with the `chat` endpoint type for multimodal models.",
),
Expand All @@ -73,13 +76,14 @@ class AudioConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.BATCH_SIZE
]

length: AudioLengthConfig = AudioLengthConfig()

format: Annotated[
AudioFormat,
Field(
default=AudioFormat.WAV,
description="File format for generated audio files. Supports `wav` (uncompressed PCM, larger files) and `mp3` (compressed, smaller files). "
"Format choice affects file size in multimodal requests but not audio characteristics (sample rate, bit depth, duration).",
),
Expand All @@ -89,11 +93,12 @@ class AudioConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.FORMAT
]

depths: Annotated[
list[int],
Field(
default=[16],
min_length=1,
description="List of audio bit depths in bits to randomly select from when generating audio files. Each audio file is assigned "
"a random depth from this list. Common values: `8` (low quality), `16` (CD quality), `24` (professional), `32` (high-end). "
Expand All @@ -106,11 +111,12 @@ class AudioConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.DEPTHS
]

sample_rates: Annotated[
list[float],
Field(
default=[16.0],
min_length=1,
description="A list of audio sample rates to randomly select from in kHz.\n"
"Common sample rates are 16, 44.1, 48, 96, etc.",
Expand All @@ -122,11 +128,12 @@ class AudioConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.SAMPLE_RATES
]

num_channels: Annotated[
int,
Field(
default=1,
ge=1,
le=2,
description="Number of audio channels for synthetic audio generation. `1` = mono (single channel), `2` = stereo (left/right channels). "
Expand All @@ -139,4 +146,18 @@ class AudioConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = AudioDefaults.NUM_CHANNELS
]

@model_validator(mode="after")
def _validate_audio_options(self) -> Self:
"""Validate the audio options."""
audio_options_set = {*self.model_fields_set, *self.length.model_fields_set}
if not self.audio_enabled() and audio_options_set:
raise ValueError(
"Audio generation is disabled but audio options were provided. Please set `--audio-batch-size` and `--audio-length-mean` to enable audio generation."
)
Comment on lines +155 to +158
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

batch_size=0 should be accepted as an explicit disabled state.

Line 155 treats any explicitly set audio option as invalid when disabled, which also rejects batch_size=0. Since audio_enabled() uses batch_size > 0, this should be a valid explicit disable path.

Proposed fix
 `@model_validator`(mode="after")
 def _validate_audio_options(self) -> Self:
     """Validate the audio options."""
+    if self.batch_size == 0:
+        return self
+
     audio_options_set = {*self.model_fields_set, *self.length.model_fields_set}
     if not self.audio_enabled() and audio_options_set:
         raise ValueError(
             "Audio generation is disabled but audio options were provided. Please set `--audio-batch-size` and `--audio-length-mean` to enable audio generation."
         )
     return self
🧰 Tools
🪛 Ruff (0.15.2)

[warning] 156-158: Avoid specifying long messages outside the exception class

(TRY003)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/aiperf/common/config/audio_config.py` around lines 155 - 158, The current
check raises whenever audio options were provided while audio_enabled() is
False, which wrongly rejects an explicit disable via batch_size==0; update the
logic around the raise to allow the explicit disable case by: compute whether
only batch_size was provided (e.g., only_batch_size_provided = audio_options_set
and all other audio option fields like audio_length_mean are unset/None), then
change the condition to raise only when not self.audio_enabled() and
audio_options_set and not (self.batch_size == 0 and only_batch_size_provided);
reference the existing audio_enabled(), audio_options_set, and self.batch_size
symbols when making this change.

return self

def audio_enabled(self) -> bool:
"""Check if audio is enabled."""
return self.length.mean > 0 and self.batch_size > 0
Comment on lines +151 to +163
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added basic sanity checks to prevent users from using multi modal options without enabling that modality

37 changes: 0 additions & 37 deletions src/aiperf/common/config/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@

from aiperf.common.enums import (
AIPerfLogLevel,
AudioFormat,
ConnectionReuseStrategy,
ExportLevel,
ImageFormat,
ModelSelectionStrategy,
ServerMetricsFormat,
VideoFormat,
VideoSynthType,
)
from aiperf.plugin.enums import (
ArrivalPattern,
Expand Down Expand Up @@ -77,39 +73,6 @@ class RankingsDefaults:
QUERY_PROMPT_TOKEN_STDDEV = 0


@dataclass(frozen=True)
class AudioDefaults:
BATCH_SIZE = 1
LENGTH_MEAN = 0.0
LENGTH_STDDEV = 0.0
FORMAT = AudioFormat.WAV
DEPTHS = [16]
SAMPLE_RATES = [16.0]
NUM_CHANNELS = 1


@dataclass(frozen=True)
class ImageDefaults:
BATCH_SIZE = 1
WIDTH_MEAN = 0.0
WIDTH_STDDEV = 0.0
HEIGHT_MEAN = 0.0
HEIGHT_STDDEV = 0.0
FORMAT = ImageFormat.PNG


@dataclass(frozen=True)
class VideoDefaults:
BATCH_SIZE = 1
DURATION = 5.0
FPS = 4
WIDTH = None
HEIGHT = None
SYNTH_TYPE = VideoSynthType.MOVING_SHAPES
FORMAT = VideoFormat.WEBM
CODEC = "libvpx-vp9"


@dataclass(frozen=True)
class PromptDefaults:
BATCH_SIZE = 1
Expand Down
59 changes: 50 additions & 9 deletions src/aiperf/common/config/image_config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from pathlib import Path
from typing import Annotated

from pydantic import Field
from pydantic import Field, model_validator
from typing_extensions import Self

from aiperf.common.config.base_config import BaseConfig
from aiperf.common.config.cli_parameter import CLIParameter
from aiperf.common.config.config_defaults import ImageDefaults
from aiperf.common.config.groups import Groups
from aiperf.common.enums import ImageFormat
from aiperf.common.enums import ImageFormat, ImageSource


class ImageHeightConfig(BaseConfig):
Expand All @@ -22,6 +23,7 @@ class ImageHeightConfig(BaseConfig):
mean: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Mean height in pixels for synthetically generated images. Image heights follow a normal distribution "
"around this mean (±`--image-height-stddev`). Used when `--image-batch-size` > 0 for multimodal vision benchmarking. "
Expand All @@ -33,11 +35,12 @@ class ImageHeightConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.HEIGHT_MEAN
]

stddev: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Standard deviation for synthetic image heights in pixels. Creates variability in vertical resolution when > 0, "
"simulating mixed-resolution image inputs. Heights follow normal distribution. "
Expand All @@ -49,7 +52,7 @@ class ImageHeightConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.HEIGHT_STDDEV
]


class ImageWidthConfig(BaseConfig):
Expand All @@ -62,6 +65,7 @@ class ImageWidthConfig(BaseConfig):
mean: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Mean width in pixels for synthetically generated images. Image widths follow a normal distribution "
"around this mean (±`--image-width-stddev`). Combined with `--image-height-mean` to determine image dimensions "
Expand All @@ -73,11 +77,12 @@ class ImageWidthConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.WIDTH_MEAN
]

stddev: Annotated[
float,
Field(
default=0.0,
ge=0,
description="Standard deviation for synthetic image widths in pixels. Creates variability in horizontal resolution when > 0, "
"simulating mixed-resolution image inputs. Widths follow normal distribution. "
Expand All @@ -89,7 +94,7 @@ class ImageWidthConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.WIDTH_STDDEV
]


class ImageConfig(BaseConfig):
Expand All @@ -104,6 +109,7 @@ class ImageConfig(BaseConfig):
batch_size: Annotated[
int,
Field(
default=1,
ge=0,
description="Number of images to include in each multimodal request. Supported with `chat` endpoint type for vision-language models. "
"Each image is generated by randomly sampling and resizing source images from `assets/source_images` directory to specified dimensions. "
Expand All @@ -116,11 +122,12 @@ class ImageConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.BATCH_SIZE
]

format: Annotated[
ImageFormat,
Field(
default=ImageFormat.PNG,
description="Image file format for generated images. Choose `png` for lossless compression (larger files, best quality), "
"`jpeg` for lossy compression (smaller files, good quality), or `random` to randomly select between PNG and JPEG for each image. "
"Format affects file size in multimodal requests and encoding overhead.",
Expand All @@ -131,4 +138,38 @@ class ImageConfig(BaseConfig):
),
group=_CLI_GROUP,
),
] = ImageDefaults.FORMAT
]

source: Annotated[
ImageSource | Path,
Field(
default=ImageSource.ASSETS,
description="Source image generation mode. `assets` loads images from the built-in `assets/source_images` directory "
"(ships with a small set of 4 images). `noise` generates random noise images on the fly, "
"providing a larger and more diverse pool without requiring files on disk. "
"Noise mode is useful for stressing multimodal pipelines more realistically. "
"A path to a directory loads images from the given directory (e.g. `--image-source ./source_images`).",
),
CLIParameter(
name=("--image-source",),
group=_CLI_GROUP,
),
]

@model_validator(mode="after")
def _validate_image_options(self) -> Self:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have warnings for if someone supplies a --input-file that, say, has images, and they also try to use some synthetic image options?

"""Validate the image options."""
image_options_set = {
*self.model_fields_set,
*self.width.model_fields_set,
*self.height.model_fields_set,
}
if not self.images_enabled() and image_options_set:
raise ValueError(
"Image generation is disabled but image options were provided. Please set `--image-width-mean` and `--image-height-mean` to enable image generation."
)
Comment on lines +167 to +170
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Allow explicit disable via --image-batch-size 0.

Line 167 currently rejects explicitly setting batch_size=0, even though Line 116 documents it as a disable mechanism. This makes a valid disable flow fail at validation time.

Proposed fix
 `@model_validator`(mode="after")
 def _validate_image_options(self) -> Self:
     """Validate the image options."""
+    if self.batch_size == 0:
+        return self
+
     image_options_set = {
         *self.model_fields_set,
         *self.width.model_fields_set,
         *self.height.model_fields_set,
     }
     if not self.images_enabled() and image_options_set:
         raise ValueError(
-            "Image generation is disabled but image options were provided. Please set `--image-width-mean` and `--image-height-mean` to enable image generation."
+            "Image generation is disabled but image options were provided. Please set `--image-batch-size` (>0), `--image-width-mean`, and `--image-height-mean` to enable image generation."
         )
     return self
🧰 Tools
🪛 Ruff (0.15.2)

[warning] 168-170: Avoid specifying long messages outside the exception class

(TRY003)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/aiperf/common/config/image_config.py` around lines 167 - 170, The
validator is rejecting an explicit disable via "--image-batch-size 0"; update
the check around images_enabled() so providing image options is allowed when the
user explicitly set batch size to 0. Concretely, in the block that raises when
not self.images_enabled() and image_options_set, add a condition to skip the
error if the batch-size flag is explicitly zero (e.g. check
self.image_batch_size == 0 or the equivalent attribute/flag used to store
--image-batch-size); alternatively implement images_enabled() to return False
when batch size == 0 and ensure the raise only happens if images are truly
enabled. Reference images_enabled(), image_options_set and the image-batch-size
flag/attribute when making the change.

return self

def images_enabled(self) -> bool:
"""Check if images are enabled."""
return self.width.mean > 0 and self.height.mean > 0 and self.batch_size > 0
Loading