1- import httpx
21import random
32from collections import defaultdict
43from math import ceil
54from typing import TYPE_CHECKING , Any , Optional , Union
65
6+ import httpx
77from pydantic import BaseModel , computed_field
88
99if TYPE_CHECKING :
1414from guidellm .dataset .in_memory import InMemoryDatasetCreator
1515from guidellm .dataset .synthetic import SyntheticDatasetConfig , SyntheticDatasetCreator
1616from guidellm .objects .statistics import DistributionSummary
17- from guidellm .preprocess .dataset import TokensConfig
18-
1917
2018
2119class Bucket (BaseModel ):
@@ -73,29 +71,32 @@ def from_data(cls, request_loader: Any):
7371 FileDatasetCreator ,
7472 HFDatasetsCreator ,
7573 ]
76- dataset_name = ""
74+ dataset_name = None
7775 data = request_loader .data
7876 data_args = request_loader .data_args
7977 processor = request_loader .processor
8078 processor_args = request_loader .processor_args
81-
79+
8280 for creator in creators :
83- if creator .is_supported (data , None ):
84- random_seed = 42
85- dataset = creator .handle_create (data , data_args , processor , processor_args , random_seed )
86- dataset_name = creator .extract_dataset_name (dataset )
87- if dataset_name is None or dataset_name == "" :
88- if creator == SyntheticDatasetCreator :
89- data_dict = SyntheticDatasetConfig .parse_str (data )
90- dataset_name = data_dict .source
91- if creator == FileDatasetCreator or isinstance (creator , HFDatasetsCreator ):
92- dataset_name = data
93- if creator == InMemoryDatasetCreator :
94- dataset_name = "In-memory"
81+ if not creator .is_supported (data , None ):
82+ continue
83+ random_seed = 42
84+ dataset = creator .handle_create (
85+ data , data_args , processor , processor_args , random_seed
86+ )
87+ dataset_name = creator .extract_dataset_name (dataset )
88+ if dataset_name is None or dataset_name == "" :
89+ if creator == SyntheticDatasetCreator :
90+ data_dict = SyntheticDatasetConfig .parse_str (data )
91+ dataset_name = data_dict .source
92+ if creator == FileDatasetCreator or isinstance (
93+ creator , HFDatasetsCreator
94+ ):
95+ dataset_name = data
96+ if creator == InMemoryDatasetCreator :
97+ dataset_name = "In-memory"
9598 break
96- return cls (
97- name = dataset_name or ""
98- )
99+ return cls (name = dataset_name or "" )
99100
100101
101102class RunInfo (BaseModel ):
@@ -111,15 +112,15 @@ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
111112 bm .run_stats .start_time for bm in benchmarks if bm .start_time is not None
112113 )
113114 response = httpx .get (f"https://huggingface.co/api/models/{ model } " )
114- modelJson = response .json ()
115-
115+ model_json = response .json ()
116+
116117 return cls (
117- model = Model (name = model , size = modelJson .get ("usedStorage" , 0 )),
118+ model = Model (name = model , size = model_json .get ("usedStorage" , 0 )),
118119 task = "N/A" ,
119120 timestamp = timestamp ,
120121 dataset = Dataset .from_data (benchmarks [0 ].request_loader ),
121122 )
122-
123+
123124
124125class Distribution (BaseModel ):
125126 statistics : Optional [DistributionSummary ] = None
0 commit comments