Skip to content

Commit 48a7167

Browse files
committed
add dataset and model size to run info
Signed-off-by: dalthecow <[email protected]>
1 parent 000b39e commit 48a7167

File tree

3 files changed

+180
-6
lines changed

3 files changed

+180
-6
lines changed

src/guidellm/presentation/data_models.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
import random
22
from collections import defaultdict
33
from math import ceil
4-
from typing import TYPE_CHECKING, Optional, Union
4+
from typing import TYPE_CHECKING, Any, Optional, Union
55

6+
import httpx
67
from pydantic import BaseModel, computed_field
78

89
if TYPE_CHECKING:
910
from guidellm.benchmark.benchmark import GenerativeBenchmark
1011

12+
from guidellm.dataset.file import FileDatasetCreator
13+
from guidellm.dataset.hf_datasets import HFDatasetsCreator
14+
from guidellm.dataset.in_memory import InMemoryDatasetCreator
15+
from guidellm.dataset.synthetic import SyntheticDatasetConfig, SyntheticDatasetCreator
1116
from guidellm.objects.statistics import DistributionSummary
1217

1318

@@ -58,6 +63,39 @@ class Model(BaseModel):
5863
class Dataset(BaseModel):
5964
name: str
6065

66+
@classmethod
67+
def from_data(cls, request_loader: Any):
68+
creators = [
69+
InMemoryDatasetCreator,
70+
SyntheticDatasetCreator,
71+
FileDatasetCreator,
72+
HFDatasetsCreator,
73+
]
74+
dataset_name = None
75+
data = request_loader.data
76+
data_args = request_loader.data_args
77+
processor = request_loader.processor
78+
processor_args = request_loader.processor_args
79+
80+
for creator in creators:
81+
if not creator.is_supported(data, None):
82+
continue
83+
random_seed = 42
84+
dataset = creator.handle_create(
85+
data, data_args, processor, processor_args, random_seed
86+
)
87+
dataset_name = creator.extract_dataset_name(dataset)
88+
if dataset_name is None or dataset_name == "":
89+
if creator == SyntheticDatasetCreator:
90+
data_dict = SyntheticDatasetConfig.parse_str(data)
91+
dataset_name = data_dict.source
92+
if creator in (FileDatasetCreator, HFDatasetsCreator):
93+
dataset_name = data
94+
if creator == InMemoryDatasetCreator:
95+
dataset_name = "In-memory"
96+
break
97+
return cls(name=dataset_name or "")
98+
6199

62100
class RunInfo(BaseModel):
63101
model: Model
@@ -71,11 +109,14 @@ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
71109
timestamp = max(
72110
bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
73111
)
112+
response = httpx.get(f"https://huggingface.co/api/models/{model}")
113+
model_json = response.json()
114+
74115
return cls(
75-
model=Model(name=model, size=0),
116+
model=Model(name=model, size=model_json.get("usedStorage", 0)),
76117
task="N/A",
77118
timestamp=timestamp,
78-
dataset=Dataset(name="N/A"),
119+
dataset=Dataset.from_data(benchmarks[0].request_loader),
79120
)
80121

81122

src/ui/lib/components/PageHeader/PageHeader.component.tsx

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import { Box, Typography } from '@mui/material';
33

44
import { useGetRunInfoQuery } from '../../store/slices/runInfo';
5-
import { formateDate } from '../../utils/helpers';
5+
import { formateDate, getFileSize } from '../../utils/helpers';
66
import { SpecBadge } from '../SpecBadge';
77
import { HeaderCell, HeaderWrapper } from './PageHeader.styles';
88

99
export const Component = () => {
1010
const { data } = useGetRunInfoQuery();
11+
const modelSize = getFileSize(data?.model?.size || 0);
12+
1113
return (
1214
<Box py={2}>
1315
<Typography variant="subtitle2" color="surface.onSurfaceAccent">
@@ -24,11 +26,24 @@ export const Component = () => {
2426
variant="metric2"
2527
withTooltip
2628
/>
29+
<SpecBadge
30+
label="Model size"
31+
value={data?.model?.size ? `${modelSize?.size} ${modelSize?.units}` : '0B'}
32+
variant="body1"
33+
/>
34+
</HeaderCell>
35+
<HeaderCell item xs={5} withDivider>
36+
<SpecBadge
37+
label="Dataset"
38+
value={data?.dataset?.name || 'N/A'}
39+
variant="caption"
40+
withTooltip
41+
/>
2742
</HeaderCell>
2843
<HeaderCell item xs={2} sx={{ paddingRight: 0 }}>
2944
<SpecBadge
3045
label="Time Stamp"
31-
value={data?.timestamp ? formateDate(data?.timestamp) : 'n/a'}
46+
value={data?.timestamp ? formateDate(data?.timestamp) : 'N/A'}
3247
variant="caption"
3348
/>
3449
</HeaderCell>

tests/unit/presentation/test_data_models.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
from unittest.mock import MagicMock, patch
2+
13
import pytest
24

3-
from guidellm.presentation.data_models import Bucket
5+
from guidellm.dataset.file import FileDatasetCreator
6+
from guidellm.dataset.hf_datasets import HFDatasetsCreator
7+
from guidellm.dataset.in_memory import InMemoryDatasetCreator
8+
from guidellm.dataset.synthetic import SyntheticDatasetCreator
9+
from guidellm.presentation.data_models import Bucket, Dataset
10+
from tests.unit.mock_benchmark import mock_generative_benchmark
411

512

613
@pytest.mark.smoke
@@ -18,3 +25,114 @@ def test_bucket_from_data():
1825
assert buckets[1].value == 8.0
1926
assert buckets[1].count == 5
2027
assert bucket_width == 1
28+
29+
30+
def mock_processor(cls):
31+
return mock_generative_benchmark().request_loader.processor
32+
33+
34+
def new_handle_create(cls, *args, **kwargs):
35+
return MagicMock()
36+
37+
38+
def new_extract_dataset_name(cls, *args, **kwargs):
39+
return "data:prideandprejudice.txt.gz"
40+
41+
42+
@pytest.mark.smoke
43+
def test_dataset_from_data_uses_extracted_dataset_name():
44+
mock_benchmark = mock_generative_benchmark()
45+
with (
46+
patch.object(SyntheticDatasetCreator, "handle_create", new=new_handle_create),
47+
patch.object(
48+
SyntheticDatasetCreator,
49+
"extract_dataset_name",
50+
new=new_extract_dataset_name,
51+
),
52+
):
53+
dataset = Dataset.from_data(mock_benchmark.request_loader)
54+
assert dataset.name == "data:prideandprejudice.txt.gz"
55+
56+
57+
def new_is_supported(cls, *args, **kwargs):
58+
return True
59+
60+
61+
@pytest.mark.smoke
62+
def test_dataset_from_data_with_in_memory_dataset():
63+
mock_benchmark = mock_generative_benchmark()
64+
with patch.object(InMemoryDatasetCreator, "is_supported", new=new_is_supported):
65+
dataset = Dataset.from_data(mock_benchmark.request_loader)
66+
assert dataset.name == "In-memory"
67+
68+
69+
def hardcoded_isnt_supported(cls, *args, **kwargs):
70+
return False
71+
72+
73+
def new_extract_dataset_name_none(cls, *args, **kwargs):
74+
return None
75+
76+
77+
@pytest.mark.smoke
78+
def test_dataset_from_data_with_synthetic_dataset():
79+
mock_benchmark = mock_generative_benchmark()
80+
with (
81+
patch.object(SyntheticDatasetCreator, "handle_create", new=new_handle_create),
82+
patch.object(
83+
InMemoryDatasetCreator, "is_supported", new=hardcoded_isnt_supported
84+
),
85+
patch.object(SyntheticDatasetCreator, "is_supported", new=new_is_supported),
86+
patch.object(
87+
SyntheticDatasetCreator,
88+
"extract_dataset_name",
89+
new=new_extract_dataset_name_none,
90+
),
91+
):
92+
dataset = Dataset.from_data(mock_benchmark.request_loader)
93+
assert dataset.name == "data:prideandprejudice.txt.gz"
94+
95+
96+
@pytest.mark.smoke
97+
def test_dataset_from_data_with_file_dataset():
98+
mock_benchmark = mock_generative_benchmark()
99+
mock_benchmark.request_loader.data = "dataset.yaml"
100+
with (
101+
patch.object(FileDatasetCreator, "handle_create", new=new_handle_create),
102+
patch.object(
103+
InMemoryDatasetCreator, "is_supported", new=hardcoded_isnt_supported
104+
),
105+
patch.object(
106+
SyntheticDatasetCreator, "is_supported", new=hardcoded_isnt_supported
107+
),
108+
patch.object(FileDatasetCreator, "is_supported", new=new_is_supported),
109+
patch.object(
110+
FileDatasetCreator,
111+
"extract_dataset_name",
112+
new=new_extract_dataset_name_none,
113+
),
114+
):
115+
dataset = Dataset.from_data(mock_benchmark.request_loader)
116+
assert dataset.name == "dataset.yaml"
117+
118+
119+
@pytest.mark.smoke
120+
def test_dataset_from_data_with_hf_dataset():
121+
mock_benchmark = mock_generative_benchmark()
122+
mock_benchmark.request_loader.data = "openai/gsm8k"
123+
with (
124+
patch.object(HFDatasetsCreator, "handle_create", new=new_handle_create),
125+
patch.object(
126+
InMemoryDatasetCreator, "is_supported", new=hardcoded_isnt_supported
127+
),
128+
patch.object(
129+
SyntheticDatasetCreator, "is_supported", new=hardcoded_isnt_supported
130+
),
131+
patch.object(FileDatasetCreator, "is_supported", new=hardcoded_isnt_supported),
132+
patch.object(HFDatasetsCreator, "is_supported", new=new_is_supported),
133+
patch.object(
134+
HFDatasetsCreator, "extract_dataset_name", new=new_extract_dataset_name_none
135+
),
136+
):
137+
dataset = Dataset.from_data(mock_benchmark.request_loader)
138+
assert dataset.name == "openai/gsm8k"

0 commit comments

Comments
 (0)