Skip to content

Commit 3db57b0

Browse files
markurtzCopilotsjmonson
authored
Scheduler refactor [utils]: functions, mixins, statistics, text (#290)
## Summary This PR adds new utility modules for safe math function operations, object info extraction, and reorganizes to consolidate into the utils package. ## Details - **Move statistics classes from `objects` to `utils`**: Relocates `DistributionSummary`, `Percentiles`, `RunningStats`, `StatusDistributionSummary`, and `TimeRunningStats` from `guidellm.objects.statistics` to `guidellm.utils.statistics` - **Add new `functions.py` module**: Implements defensive programming utilities including `safe_getattr`, `safe_divide`, `safe_multiply`, `safe_add`, `safe_format_timestamp`, and `all_defined` for handling None values and edge cases - **Add new `mixins.py` module**: Provides `InfoMixin` class for standardized metadata extraction and object introspection across different class hierarchies - **Enhance `text.py` module**: Adds comprehensive documentation, `format_value_display` function for consistent metric formatting, and improved text processing utilities - **Update import statements**: Modifies all affected modules (`benchmark`, `presentation`) to import statistics classes from their new location in `utils` - **Remove deprecated `objects` package**: Deletes the now-empty `objects` directory and associated test files - **Add comprehensive test coverage**: Includes new test suites for `functions.py`, `mixins.py`, and `statistics.py` - **Update `__init__.py` exports**: Adds new utility functions and classes to the main utils package exports for easy access ## Test Plan - Run the existing test suite to ensure no regressions from the statistics class relocation - Execute new test files: - test_functions.py - Tests for safe operation utilities - test_mixins.py - Tests for InfoMixin functionality - test_statistics.py - Comprehensive tests for statistical analysis utilities - test_text.py - Tests for enhanced text processing functions ## Related Issues - Resolves # --- - [X] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [X] Includes AI-assisted code completion - [X] Includes code generated by an AI application - [ ] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`) --------- Signed-off-by: Mark Kurtz <[email protected]> Co-authored-by: Copilot <[email protected]> Co-authored-by: Samuel Monson <[email protected]>
1 parent 3f7f7ac commit 3db57b0

File tree

16 files changed

+1454
-150
lines changed

16 files changed

+1454
-150
lines changed

src/guidellm/benchmark/aggregator.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@
2222
GenerativeTextResponseStats,
2323
)
2424
from guidellm.config import settings
25-
from guidellm.objects import (
26-
RunningStats,
27-
TimeRunningStats,
28-
)
2925
from guidellm.request import (
3026
GenerationRequest,
3127
GenerativeRequestLoaderDescription,
@@ -38,7 +34,13 @@
3834
SchedulerRequestResult,
3935
WorkerDescription,
4036
)
41-
from guidellm.utils import StandardBaseModel, StatusBreakdown, check_load_processor
37+
from guidellm.utils import (
38+
RunningStats,
39+
StandardBaseModel,
40+
StatusBreakdown,
41+
TimeRunningStats,
42+
check_load_processor,
43+
)
4244

4345
__all__ = [
4446
"AggregatorT",

src/guidellm/benchmark/benchmark.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@
1212
SynchronousProfile,
1313
ThroughputProfile,
1414
)
15-
from guidellm.objects import (
16-
StatusDistributionSummary,
17-
)
1815
from guidellm.request import (
1916
GenerativeRequestLoaderDescription,
2017
RequestLoaderDescription,
@@ -30,7 +27,11 @@
3027
ThroughputStrategy,
3128
WorkerDescription,
3229
)
33-
from guidellm.utils import StandardBaseModel, StatusBreakdown
30+
from guidellm.utils import (
31+
StandardBaseModel,
32+
StatusBreakdown,
33+
StatusDistributionSummary,
34+
)
3435

3536
__all__ = [
3637
"Benchmark",

src/guidellm/benchmark/output.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121
ThroughputProfile,
2222
)
2323
from guidellm.config import settings
24-
from guidellm.objects import (
25-
DistributionSummary,
26-
StatusDistributionSummary,
27-
)
2824
from guidellm.presentation import UIDataBuilder
2925
from guidellm.presentation.injector import create_report
3026
from guidellm.scheduler import strategy_display_str
31-
from guidellm.utils import Colors, StandardBaseModel, split_text_list_by_length
27+
from guidellm.utils import (
28+
Colors,
29+
DistributionSummary,
30+
StandardBaseModel,
31+
StatusDistributionSummary,
32+
split_text_list_by_length,
33+
)
3234

3335
__all__ = [
3436
"GenerativeBenchmarksConsole",

src/guidellm/objects/__init__.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/guidellm/presentation/data_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
if TYPE_CHECKING:
99
from guidellm.benchmark.benchmark import GenerativeBenchmark
1010

11-
from guidellm.objects.statistics import DistributionSummary
11+
from guidellm.utils.statistics import DistributionSummary
1212

1313

1414
class Bucket(BaseModel):

src/guidellm/utils/__init__.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
from .auto_importer import AutoImporterMixin
22
from .colors import Colors
33
from .default_group import DefaultGroupHandler
4+
from .functions import (
5+
all_defined,
6+
safe_add,
7+
safe_divide,
8+
safe_format_timestamp,
9+
safe_getattr,
10+
safe_multiply,
11+
)
412
from .hf_datasets import (
513
SUPPORTED_TYPES,
614
save_dataset_to_file,
@@ -18,11 +26,18 @@
1826
from .random import IntegerRangeSampler
1927
from .registry import RegistryMixin
2028
from .singleton import SingletonMixin, ThreadSafeSingletonMixin
29+
from .statistics import (
30+
DistributionSummary,
31+
Percentiles,
32+
RunningStats,
33+
StatusDistributionSummary,
34+
TimeRunningStats,
35+
)
2136
from .text import (
2237
EndlessTextCreator,
2338
clean_text,
2439
filter_text,
25-
is_puncutation,
40+
is_punctuation,
2641
load_text,
2742
split_text,
2843
split_text_list_by_length,
@@ -33,21 +48,32 @@
3348
"AutoImporterMixin",
3449
"Colors",
3550
"DefaultGroupHandler",
51+
"DistributionSummary",
3652
"EndlessTextCreator",
3753
"IntegerRangeSampler",
54+
"Percentiles",
3855
"PydanticClassRegistryMixin",
3956
"RegistryMixin",
4057
"ReloadableBaseModel",
58+
"RunningStats",
4159
"SingletonMixin",
4260
"StandardBaseDict",
4361
"StandardBaseModel",
4462
"StatusBreakdown",
63+
"StatusDistributionSummary",
4564
"ThreadSafeSingletonMixin",
65+
"TimeRunningStats",
66+
"all_defined",
4667
"check_load_processor",
4768
"clean_text",
4869
"filter_text",
49-
"is_puncutation",
70+
"is_punctuation",
5071
"load_text",
72+
"safe_add",
73+
"safe_divide",
74+
"safe_format_timestamp",
75+
"safe_getattr",
76+
"safe_multiply",
5177
"save_dataset_to_file",
5278
"split_text",
5379
"split_text_list_by_length",

src/guidellm/utils/functions.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""
2+
Utility functions for safe operations and value handling.
3+
4+
Provides defensive programming utilities for common operations that may encounter
5+
None values, invalid inputs, or edge cases. Includes safe arithmetic operations,
6+
attribute access, and timestamp formatting.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
from datetime import datetime
12+
from typing import Any
13+
14+
__all__ = [
15+
"all_defined",
16+
"safe_add",
17+
"safe_divide",
18+
"safe_format_timestamp",
19+
"safe_getattr",
20+
"safe_multiply",
21+
]
22+
23+
24+
def safe_getattr(obj: Any | None, attr: str, default: Any = None) -> Any:
25+
"""
26+
Safely get an attribute from an object with None handling.
27+
28+
:param obj: Object to get the attribute from, or None
29+
:param attr: Name of the attribute to retrieve
30+
:param default: Value to return if object is None or attribute doesn't exist
31+
:return: Attribute value or default if not found or object is None
32+
"""
33+
if obj is None:
34+
return default
35+
36+
return getattr(obj, attr, default)
37+
38+
39+
def all_defined(*values: Any | None) -> bool:
40+
"""
41+
Check if all provided values are defined (not None).
42+
43+
:param values: Variable number of values to check for None
44+
:return: True if all values are not None, False otherwise
45+
"""
46+
return all(value is not None for value in values)
47+
48+
49+
def safe_divide(
50+
numerator: int | float | None,
51+
denominator: int | float | None,
52+
num_default: float = 0.0,
53+
den_default: float = 1.0,
54+
) -> float:
55+
"""
56+
Safely divide two numbers with None handling and zero protection.
57+
58+
:param numerator: Number to divide, or None to use num_default
59+
:param denominator: Number to divide by, or None to use den_default
60+
:param num_default: Default value for numerator if None
61+
:param den_default: Default value for denominator if None
62+
:return: Division result with protection against division by zero
63+
"""
64+
numerator = numerator if numerator is not None else num_default
65+
denominator = denominator if denominator is not None else den_default
66+
67+
return numerator / (denominator or 1e-10)
68+
69+
70+
def safe_multiply(*values: int | float | None, default: float = 1.0) -> float:
71+
"""
72+
Safely multiply multiple numbers with None handling.
73+
74+
:param values: Variable number of values to multiply, None values treated as 1.0
75+
:param default: Starting value for multiplication
76+
:return: Product of all non-None values multiplied by default
77+
"""
78+
result = default
79+
for val in values:
80+
result *= val if val is not None else 1.0
81+
return result
82+
83+
84+
def safe_add(
85+
*values: int | float | None, signs: list[int] | None = None, default: float = 0.0
86+
) -> float:
87+
"""
88+
Safely add multiple numbers with None handling and optional signs.
89+
90+
:param values: Variable number of values to add, None values use default
91+
:param signs: Optional list of 1 (add) or -1 (subtract) for each value.
92+
If None, all values are added. Must match length of values.
93+
:param default: Value to substitute for None values
94+
:return: Result of adding all values safely (default used when value is None)
95+
"""
96+
if not values:
97+
return default
98+
99+
values = list(values)
100+
101+
if signs is None:
102+
signs = [1] * len(values)
103+
104+
if len(signs) != len(values):
105+
raise ValueError("Length of signs must match length of values")
106+
107+
result = values[0] if values[0] is not None else default
108+
109+
for ind in range(1, len(values)):
110+
val = values[ind] if values[ind] is not None else default
111+
result += signs[ind] * val
112+
113+
return result
114+
115+
116+
def safe_format_timestamp(
117+
timestamp: float | None, format_: str = "%H:%M:%S", default: str = "N/A"
118+
) -> str:
119+
"""
120+
Safely format a timestamp with error handling and validation.
121+
122+
:param timestamp: Unix timestamp to format, or None
123+
:param format_: Strftime format string for timestamp formatting
124+
:param default: Value to return if timestamp is invalid or None
125+
:return: Formatted timestamp string or default value
126+
"""
127+
try:
128+
return datetime.fromtimestamp(timestamp).strftime(format_)
129+
except (ValueError, TypeError, OverflowError, OSError):
130+
return default

0 commit comments

Comments
 (0)