@@ -33,7 +33,7 @@ def tokenizer_mock():
3333 )
3434 return tokenizer
3535
36-
36+ @ pytest . mark . smoke
3737@patch (f"{ process_dataset .__module__ } .guidellm_load_dataset" )
3838@patch (f"{ process_dataset .__module__ } .check_load_processor" )
3939@patch (f"{ process_dataset .__module__ } .Dataset" )
@@ -68,51 +68,51 @@ def test_strategy_handler_called(
6868 mock_load_dataset .assert_called_once ()
6969 mock_check_processor .assert_called_once ()
7070
71-
71+ @ pytest . mark . sanity
7272def test_handle_ignore_strategy_too_short (tokenizer_mock ):
7373 result = handle_ignore_strategy ("short" , 10 , tokenizer_mock )
7474 assert result is None
7575 tokenizer_mock .encode .assert_called_with ("short" )
7676
77-
77+ @ pytest . mark . sanity
7878def test_handle_ignore_strategy_sufficient_length (tokenizer_mock ):
7979 result = handle_ignore_strategy ("long prompt" , 5 , tokenizer_mock )
8080 assert result == "long prompt"
8181 tokenizer_mock .encode .assert_called_with ("long prompt" )
8282
83-
83+ @ pytest . mark . sanity
8484def test_handle_concatenate_strategy_enough_prompts (tokenizer_mock ):
8585 dataset_iter = iter ([{"prompt" : "longer" }])
8686 result = handle_concatenate_strategy (
8787 "short" , 10 , dataset_iter , "prompt" , tokenizer_mock , "\n "
8888 )
8989 assert result == "short\n longer"
9090
91-
91+ @ pytest . mark . sanity
9292def test_handle_concatenate_strategy_not_enough_prompts (tokenizer_mock ):
9393 dataset_iter : Iterator = iter ([])
9494 result = handle_concatenate_strategy (
9595 "short" , 10 , dataset_iter , "prompt" , tokenizer_mock , ""
9696 )
9797 assert result is None
9898
99-
99+ @ pytest . mark . sanity
100100def test_handle_pad_strategy (tokenizer_mock ):
101101 result = handle_pad_strategy ("short" , 10 , tokenizer_mock , "p" )
102102 assert result == "shortppppp"
103103
104-
104+ @ pytest . mark . sanity
105105def test_handle_error_strategy_valid_prompt (tokenizer_mock ):
106106 result = handle_error_strategy ("valid prompt" , 5 , tokenizer_mock )
107107 assert result == "valid prompt"
108108 tokenizer_mock .encode .assert_called_with ("valid prompt" )
109109
110-
110+ @ pytest . mark . sanity
111111def test_handle_error_strategy_too_short_prompt (tokenizer_mock ):
112112 with pytest .raises (PromptTooShortError ):
113113 handle_error_strategy ("short" , 10 , tokenizer_mock )
114114
115-
115+ @ pytest . mark . smoke
116116@patch ("guidellm.preprocess.dataset.save_dataset_to_file" )
117117@patch ("guidellm.preprocess.dataset.Dataset" )
118118@patch ("guidellm.preprocess.dataset.guidellm_load_dataset" )
@@ -159,7 +159,7 @@ def test_process_dataset_non_empty(
159159 assert "output_tokens_count" in item
160160 assert len (tokenizer_mock .encode (item ["prompt" ])) <= 3
161161
162-
162+ @ pytest . mark . sanity
163163@patch (f"{ process_dataset .__module__ } .Dataset" )
164164@patch (f"{ process_dataset .__module__ } .guidellm_load_dataset" )
165165@patch (f"{ process_dataset .__module__ } .check_load_processor" )
@@ -188,7 +188,7 @@ def test_process_dataset_empty_after_processing(
188188 mock_check_processor .assert_called_once ()
189189 mock_dataset_class .from_list .assert_not_called ()
190190
191-
191+ @ pytest . mark . smoke
192192@patch (f"{ process_dataset .__module__ } .push_dataset_to_hub" )
193193@patch (f"{ process_dataset .__module__ } .Dataset" )
194194@patch (f"{ process_dataset .__module__ } .guidellm_load_dataset" )
@@ -221,7 +221,7 @@ def test_process_dataset_push_to_hub_called(
221221 )
222222 mock_push .assert_called_once_with ("id123" , mock_dataset_obj )
223223
224-
224+ @ pytest . mark . sanity
225225@patch (f"{ process_dataset .__module__ } .push_dataset_to_hub" )
226226@patch (f"{ process_dataset .__module__ } .Dataset" )
227227@patch (f"{ process_dataset .__module__ } .guidellm_load_dataset" )
@@ -253,29 +253,29 @@ def test_process_dataset_push_to_hub_not_called(
253253 )
254254 mock_push .assert_not_called ()
255255
256-
256+ @ pytest . mark . regression
257257def test_push_dataset_to_hub_success ():
258258 os .environ ["HF_TOKEN" ] = "token"
259259 mock_dataset = MagicMock (spec = Dataset )
260260 push_dataset_to_hub ("dataset_id" , mock_dataset )
261261 mock_dataset .push_to_hub .assert_called_once_with ("dataset_id" , token = "token" )
262262
263-
263+ @ pytest . mark . regression
264264def test_push_dataset_to_hub_error_no_env ():
265265 if "HF_TOKEN" in os .environ :
266266 del os .environ ["HF_TOKEN" ]
267267 mock_dataset = MagicMock (spec = Dataset )
268268 with pytest .raises (ValueError , match = "hub_dataset_id and HF_TOKEN" ):
269269 push_dataset_to_hub ("dataset_id" , mock_dataset )
270270
271-
271+ @ pytest . mark . regression
272272def test_push_dataset_to_hub_error_no_id ():
273273 os .environ ["HF_TOKEN" ] = "token"
274274 mock_dataset = MagicMock (spec = Dataset )
275275 with pytest .raises (ValueError , match = "hub_dataset_id and HF_TOKEN" ):
276276 push_dataset_to_hub (None , mock_dataset )
277277
278-
278+ @ pytest . mark . regression
279279@patch .object (Path , "mkdir" )
280280def test_save_dataset_to_file_csv (mock_mkdir ):
281281 mock_dataset = MagicMock (spec = Dataset )
@@ -284,7 +284,7 @@ def test_save_dataset_to_file_csv(mock_mkdir):
284284 mock_dataset .to_csv .assert_called_once_with (output_path )
285285 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
286286
287-
287+ @ pytest . mark . regression
288288@patch .object (Path , "mkdir" )
289289def test_save_dataset_to_file_csv_capitalized (mock_mkdir ):
290290 mock_dataset = MagicMock (spec = Dataset )
@@ -293,7 +293,7 @@ def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
293293 mock_dataset .to_csv .assert_called_once_with (output_path )
294294 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
295295
296-
296+ @ pytest . mark . regression
297297@patch .object (Path , "mkdir" )
298298def test_save_dataset_to_file_json (mock_mkdir ):
299299 mock_dataset = MagicMock (spec = Dataset )
@@ -302,7 +302,7 @@ def test_save_dataset_to_file_json(mock_mkdir):
302302 mock_dataset .to_json .assert_called_once_with (output_path )
303303 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
304304
305-
305+ @ pytest . mark . regression
306306@patch .object (Path , "mkdir" )
307307def test_save_dataset_to_file_json_capitalized (mock_mkdir ):
308308 mock_dataset = MagicMock (spec = Dataset )
@@ -311,7 +311,7 @@ def test_save_dataset_to_file_json_capitalized(mock_mkdir):
311311 mock_dataset .to_json .assert_called_once_with (output_path )
312312 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
313313
314-
314+ @ pytest . mark . regression
315315@patch .object (Path , "mkdir" )
316316def test_save_dataset_to_file_jsonl (mock_mkdir ):
317317 mock_dataset = MagicMock (spec = Dataset )
@@ -320,7 +320,7 @@ def test_save_dataset_to_file_jsonl(mock_mkdir):
320320 mock_dataset .to_json .assert_called_once_with (output_path )
321321 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
322322
323-
323+ @ pytest . mark . regression
324324@patch .object (Path , "mkdir" )
325325def test_save_dataset_to_file_jsonl_capitalized (mock_mkdir ):
326326 mock_dataset = MagicMock (spec = Dataset )
@@ -329,7 +329,7 @@ def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
329329 mock_dataset .to_json .assert_called_once_with (output_path )
330330 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
331331
332-
332+ @ pytest . mark . regression
333333@patch .object (Path , "mkdir" )
334334def test_save_dataset_to_file_parquet (mock_mkdir ):
335335 mock_dataset = MagicMock (spec = Dataset )
@@ -338,7 +338,7 @@ def test_save_dataset_to_file_parquet(mock_mkdir):
338338 mock_dataset .to_parquet .assert_called_once_with (output_path )
339339 mock_mkdir .assert_called_once_with (parents = True , exist_ok = True )
340340
341-
341+ @ pytest . mark . regression
342342@patch .object (Path , "mkdir" )
343343def test_save_dataset_to_file_unsupported_type (mock_mkdir ):
344344 mock_dataset = MagicMock (spec = Dataset )
0 commit comments