151151 "until the user exits. "
152152 ),
153153)
154+ @click .option (
155+ "--subset-size" ,
156+ type = int ,
157+ default = None ,
158+ help = (
159+ "The number of subsets to use from the dataset. "
160+ "If not provided, all subsets will be used."
161+ ),
162+ )
154163def generate_benchmark_report_cli (
155164 target : str ,
156165 backend : BackendEnginePublic ,
@@ -164,6 +173,7 @@ def generate_benchmark_report_cli(
164173 max_requests : Union [Literal ["dataset" ], int , None ],
165174 output_path : str ,
166175 enable_continuous_refresh : bool ,
176+ subset_size : Optional [int ],
167177):
168178 """
169179 Generate a benchmark report for a specified backend and dataset.
@@ -181,6 +191,7 @@ def generate_benchmark_report_cli(
181191 max_requests = max_requests ,
182192 output_path = output_path ,
183193 cont_refresh_table = enable_continuous_refresh ,
194+ subset_size = subset_size ,
184195 )
185196
186197
@@ -197,6 +208,7 @@ def generate_benchmark_report(
197208 max_requests : Union [Literal ["dataset" ], int , None ],
198209 output_path : str ,
199210 cont_refresh_table : bool ,
211+ subset_size : Optional [int ],
200212) -> GuidanceReport :
201213 """
202214 Generate a benchmark report for a specified backend and dataset.
@@ -251,7 +263,7 @@ def generate_benchmark_report(
251263 request_generator = FileRequestGenerator (path = data , tokenizer = tokenizer_inst )
252264 elif data_type == "transformers" :
253265 request_generator = TransformersDatasetRequestGenerator (
254- dataset = data , tokenizer = tokenizer_inst
266+ dataset = data , tokenizer = tokenizer_inst , subset_size = subset_size
255267 )
256268 else :
257269 raise ValueError (f"Unknown data type: { data_type } " )
0 commit comments