-
Notifications
You must be signed in to change notification settings - Fork 47
feat: --image-source flag to add noise generation and custom dirs #710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,11 +3,11 @@ | |
|
|
||
| from typing import Annotated | ||
|
|
||
| from pydantic import BeforeValidator, Field | ||
| from pydantic import BeforeValidator, Field, model_validator | ||
| from typing_extensions import Self | ||
|
|
||
| from aiperf.common.config.base_config import BaseConfig | ||
| from aiperf.common.config.cli_parameter import CLIParameter | ||
| from aiperf.common.config.config_defaults import AudioDefaults | ||
| from aiperf.common.config.config_validators import parse_str_or_list_of_positive_values | ||
| from aiperf.common.config.groups import Groups | ||
| from aiperf.common.enums import AudioFormat | ||
|
|
@@ -23,6 +23,7 @@ class AudioLengthConfig(BaseConfig): | |
| mean: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Mean duration in seconds for synthetically generated audio files. Audio lengths follow a normal distribution " | ||
| "around this mean (±`--audio-length-stddev`). Used when `--audio-batch-size` > 0 for multimodal benchmarking. " | ||
|
|
@@ -34,11 +35,12 @@ class AudioLengthConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.LENGTH_MEAN | ||
| ] | ||
|
|
||
| stddev: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Standard deviation for synthetic audio duration in seconds. Creates variability in audio lengths when > 0, " | ||
| "simulating mixed-duration audio inputs. Durations follow normal distribution. " | ||
|
|
@@ -50,7 +52,7 @@ class AudioLengthConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.LENGTH_STDDEV | ||
| ] | ||
|
|
||
|
|
||
| class AudioConfig(BaseConfig): | ||
|
|
@@ -63,6 +65,7 @@ class AudioConfig(BaseConfig): | |
| batch_size: Annotated[ | ||
| int, | ||
| Field( | ||
| default=1, | ||
| ge=0, | ||
| description="The number of audio inputs to include in each request. Supported with the `chat` endpoint type for multimodal models.", | ||
| ), | ||
|
|
@@ -73,13 +76,14 @@ class AudioConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.BATCH_SIZE | ||
| ] | ||
|
|
||
| length: AudioLengthConfig = AudioLengthConfig() | ||
|
|
||
| format: Annotated[ | ||
| AudioFormat, | ||
| Field( | ||
| default=AudioFormat.WAV, | ||
| description="File format for generated audio files. Supports `wav` (uncompressed PCM, larger files) and `mp3` (compressed, smaller files). " | ||
| "Format choice affects file size in multimodal requests but not audio characteristics (sample rate, bit depth, duration).", | ||
| ), | ||
|
|
@@ -89,11 +93,12 @@ class AudioConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.FORMAT | ||
| ] | ||
|
|
||
| depths: Annotated[ | ||
| list[int], | ||
| Field( | ||
| default=[16], | ||
| min_length=1, | ||
| description="List of audio bit depths in bits to randomly select from when generating audio files. Each audio file is assigned " | ||
| "a random depth from this list. Common values: `8` (low quality), `16` (CD quality), `24` (professional), `32` (high-end). " | ||
|
|
@@ -106,11 +111,12 @@ class AudioConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.DEPTHS | ||
| ] | ||
|
|
||
| sample_rates: Annotated[ | ||
| list[float], | ||
| Field( | ||
| default=[16.0], | ||
| min_length=1, | ||
| description="A list of audio sample rates to randomly select from in kHz.\n" | ||
| "Common sample rates are 16, 44.1, 48, 96, etc.", | ||
|
|
@@ -122,11 +128,12 @@ class AudioConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.SAMPLE_RATES | ||
| ] | ||
|
|
||
| num_channels: Annotated[ | ||
| int, | ||
| Field( | ||
| default=1, | ||
| ge=1, | ||
| le=2, | ||
| description="Number of audio channels for synthetic audio generation. `1` = mono (single channel), `2` = stereo (left/right channels). " | ||
|
|
@@ -139,4 +146,18 @@ class AudioConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = AudioDefaults.NUM_CHANNELS | ||
| ] | ||
|
|
||
| @model_validator(mode="after") | ||
| def _validate_audio_options(self) -> Self: | ||
| """Validate the audio options.""" | ||
| audio_options_set = {*self.model_fields_set, *self.length.model_fields_set} | ||
| if not self.audio_enabled() and audio_options_set: | ||
| raise ValueError( | ||
| "Audio generation is disabled but audio options were provided. Please set `--audio-batch-size` and `--audio-length-mean` to enable audio generation." | ||
| ) | ||
|
Comment on lines
+155
to
+158
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Line 155 treats any explicitly set audio option as invalid when disabled, which also rejects Proposed fix `@model_validator`(mode="after")
def _validate_audio_options(self) -> Self:
"""Validate the audio options."""
+ if self.batch_size == 0:
+ return self
+
audio_options_set = {*self.model_fields_set, *self.length.model_fields_set}
if not self.audio_enabled() and audio_options_set:
raise ValueError(
"Audio generation is disabled but audio options were provided. Please set `--audio-batch-size` and `--audio-length-mean` to enable audio generation."
)
return self🧰 Tools🪛 Ruff (0.15.2)[warning] 156-158: Avoid specifying long messages outside the exception class (TRY003) 🤖 Prompt for AI Agents |
||
| return self | ||
|
|
||
| def audio_enabled(self) -> bool: | ||
| """Check if audio is enabled.""" | ||
| return self.length.mean > 0 and self.batch_size > 0 | ||
|
Comment on lines
+151
to
+163
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added basic sanity checks to prevent users from using multi modal options without enabling that modality |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,16 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| from pathlib import Path | ||
| from typing import Annotated | ||
|
|
||
| from pydantic import Field | ||
| from pydantic import Field, model_validator | ||
| from typing_extensions import Self | ||
|
|
||
| from aiperf.common.config.base_config import BaseConfig | ||
| from aiperf.common.config.cli_parameter import CLIParameter | ||
| from aiperf.common.config.config_defaults import ImageDefaults | ||
| from aiperf.common.config.groups import Groups | ||
| from aiperf.common.enums import ImageFormat | ||
| from aiperf.common.enums import ImageFormat, ImageSource | ||
|
|
||
|
|
||
| class ImageHeightConfig(BaseConfig): | ||
|
|
@@ -22,6 +23,7 @@ class ImageHeightConfig(BaseConfig): | |
| mean: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Mean height in pixels for synthetically generated images. Image heights follow a normal distribution " | ||
| "around this mean (±`--image-height-stddev`). Used when `--image-batch-size` > 0 for multimodal vision benchmarking. " | ||
|
|
@@ -33,11 +35,12 @@ class ImageHeightConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.HEIGHT_MEAN | ||
| ] | ||
|
|
||
| stddev: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Standard deviation for synthetic image heights in pixels. Creates variability in vertical resolution when > 0, " | ||
| "simulating mixed-resolution image inputs. Heights follow normal distribution. " | ||
|
|
@@ -49,7 +52,7 @@ class ImageHeightConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.HEIGHT_STDDEV | ||
| ] | ||
|
|
||
|
|
||
| class ImageWidthConfig(BaseConfig): | ||
|
|
@@ -62,6 +65,7 @@ class ImageWidthConfig(BaseConfig): | |
| mean: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Mean width in pixels for synthetically generated images. Image widths follow a normal distribution " | ||
| "around this mean (±`--image-width-stddev`). Combined with `--image-height-mean` to determine image dimensions " | ||
|
|
@@ -73,11 +77,12 @@ class ImageWidthConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.WIDTH_MEAN | ||
| ] | ||
|
|
||
| stddev: Annotated[ | ||
| float, | ||
| Field( | ||
| default=0.0, | ||
| ge=0, | ||
| description="Standard deviation for synthetic image widths in pixels. Creates variability in horizontal resolution when > 0, " | ||
| "simulating mixed-resolution image inputs. Widths follow normal distribution. " | ||
|
|
@@ -89,7 +94,7 @@ class ImageWidthConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.WIDTH_STDDEV | ||
| ] | ||
|
|
||
|
|
||
| class ImageConfig(BaseConfig): | ||
|
|
@@ -104,6 +109,7 @@ class ImageConfig(BaseConfig): | |
| batch_size: Annotated[ | ||
| int, | ||
| Field( | ||
| default=1, | ||
| ge=0, | ||
| description="Number of images to include in each multimodal request. Supported with `chat` endpoint type for vision-language models. " | ||
| "Each image is generated by randomly sampling and resizing source images from `assets/source_images` directory to specified dimensions. " | ||
|
|
@@ -116,11 +122,12 @@ class ImageConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.BATCH_SIZE | ||
| ] | ||
|
|
||
| format: Annotated[ | ||
| ImageFormat, | ||
| Field( | ||
| default=ImageFormat.PNG, | ||
| description="Image file format for generated images. Choose `png` for lossless compression (larger files, best quality), " | ||
| "`jpeg` for lossy compression (smaller files, good quality), or `random` to randomly select between PNG and JPEG for each image. " | ||
| "Format affects file size in multimodal requests and encoding overhead.", | ||
|
|
@@ -131,4 +138,38 @@ class ImageConfig(BaseConfig): | |
| ), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] = ImageDefaults.FORMAT | ||
| ] | ||
|
|
||
| source: Annotated[ | ||
| ImageSource | Path, | ||
| Field( | ||
| default=ImageSource.ASSETS, | ||
| description="Source image generation mode. `assets` loads images from the built-in `assets/source_images` directory " | ||
| "(ships with a small set of 4 images). `noise` generates random noise images on the fly, " | ||
| "providing a larger and more diverse pool without requiring files on disk. " | ||
| "Noise mode is useful for stressing multimodal pipelines more realistically. " | ||
| "A path to a directory loads images from the given directory (e.g. `--image-source ./source_images`).", | ||
| ), | ||
| CLIParameter( | ||
| name=("--image-source",), | ||
| group=_CLI_GROUP, | ||
| ), | ||
| ] | ||
|
|
||
| @model_validator(mode="after") | ||
| def _validate_image_options(self) -> Self: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have warnings for if someone supplies a |
||
| """Validate the image options.""" | ||
| image_options_set = { | ||
| *self.model_fields_set, | ||
| *self.width.model_fields_set, | ||
| *self.height.model_fields_set, | ||
| } | ||
| if not self.images_enabled() and image_options_set: | ||
| raise ValueError( | ||
| "Image generation is disabled but image options were provided. Please set `--image-width-mean` and `--image-height-mean` to enable image generation." | ||
| ) | ||
|
Comment on lines
+167
to
+170
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allow explicit disable via Line 167 currently rejects explicitly setting Proposed fix `@model_validator`(mode="after")
def _validate_image_options(self) -> Self:
"""Validate the image options."""
+ if self.batch_size == 0:
+ return self
+
image_options_set = {
*self.model_fields_set,
*self.width.model_fields_set,
*self.height.model_fields_set,
}
if not self.images_enabled() and image_options_set:
raise ValueError(
- "Image generation is disabled but image options were provided. Please set `--image-width-mean` and `--image-height-mean` to enable image generation."
+ "Image generation is disabled but image options were provided. Please set `--image-batch-size` (>0), `--image-width-mean`, and `--image-height-mean` to enable image generation."
)
return self🧰 Tools🪛 Ruff (0.15.2)[warning] 168-170: Avoid specifying long messages outside the exception class (TRY003) 🤖 Prompt for AI Agents |
||
| return self | ||
|
|
||
| def images_enabled(self) -> bool: | ||
| """Check if images are enabled.""" | ||
| return self.width.mean > 0 and self.height.mean > 0 and self.batch_size > 0 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed the defaults file and inline the defaults directly to reduce cognitive load
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought we generally like relegating magic numbers into these default classes?