Skip to content

Commit 40c1118

Browse files
committed
Added pytest mark to UTs
1 parent a9f4fa6 commit 40c1118

File tree

1 file changed

+23
-23
lines changed

1 file changed

+23
-23
lines changed

tests/unit/preprocess/test_dataset.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def tokenizer_mock():
3333
)
3434
return tokenizer
3535

36-
36+
@pytest.mark.smoke
3737
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
3838
@patch(f"{process_dataset.__module__}.check_load_processor")
3939
@patch(f"{process_dataset.__module__}.Dataset")
@@ -68,51 +68,51 @@ def test_strategy_handler_called(
6868
mock_load_dataset.assert_called_once()
6969
mock_check_processor.assert_called_once()
7070

71-
71+
@pytest.mark.sanity
7272
def test_handle_ignore_strategy_too_short(tokenizer_mock):
7373
result = handle_ignore_strategy("short", 10, tokenizer_mock)
7474
assert result is None
7575
tokenizer_mock.encode.assert_called_with("short")
7676

77-
77+
@pytest.mark.sanity
7878
def test_handle_ignore_strategy_sufficient_length(tokenizer_mock):
7979
result = handle_ignore_strategy("long prompt", 5, tokenizer_mock)
8080
assert result == "long prompt"
8181
tokenizer_mock.encode.assert_called_with("long prompt")
8282

83-
83+
@pytest.mark.sanity
8484
def test_handle_concatenate_strategy_enough_prompts(tokenizer_mock):
8585
dataset_iter = iter([{"prompt": "longer"}])
8686
result = handle_concatenate_strategy(
8787
"short", 10, dataset_iter, "prompt", tokenizer_mock, "\n"
8888
)
8989
assert result == "short\nlonger"
9090

91-
91+
@pytest.mark.sanity
9292
def test_handle_concatenate_strategy_not_enough_prompts(tokenizer_mock):
9393
dataset_iter: Iterator = iter([])
9494
result = handle_concatenate_strategy(
9595
"short", 10, dataset_iter, "prompt", tokenizer_mock, ""
9696
)
9797
assert result is None
9898

99-
99+
@pytest.mark.sanity
100100
def test_handle_pad_strategy(tokenizer_mock):
101101
result = handle_pad_strategy("short", 10, tokenizer_mock, "p")
102102
assert result == "shortppppp"
103103

104-
104+
@pytest.mark.sanity
105105
def test_handle_error_strategy_valid_prompt(tokenizer_mock):
106106
result = handle_error_strategy("valid prompt", 5, tokenizer_mock)
107107
assert result == "valid prompt"
108108
tokenizer_mock.encode.assert_called_with("valid prompt")
109109

110-
110+
@pytest.mark.sanity
111111
def test_handle_error_strategy_too_short_prompt(tokenizer_mock):
112112
with pytest.raises(PromptTooShortError):
113113
handle_error_strategy("short", 10, tokenizer_mock)
114114

115-
115+
@pytest.mark.smoke
116116
@patch("guidellm.preprocess.dataset.save_dataset_to_file")
117117
@patch("guidellm.preprocess.dataset.Dataset")
118118
@patch("guidellm.preprocess.dataset.guidellm_load_dataset")
@@ -159,7 +159,7 @@ def test_process_dataset_non_empty(
159159
assert "output_tokens_count" in item
160160
assert len(tokenizer_mock.encode(item["prompt"])) <= 3
161161

162-
162+
@pytest.mark.sanity
163163
@patch(f"{process_dataset.__module__}.Dataset")
164164
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
165165
@patch(f"{process_dataset.__module__}.check_load_processor")
@@ -188,7 +188,7 @@ def test_process_dataset_empty_after_processing(
188188
mock_check_processor.assert_called_once()
189189
mock_dataset_class.from_list.assert_not_called()
190190

191-
191+
@pytest.mark.smoke
192192
@patch(f"{process_dataset.__module__}.push_dataset_to_hub")
193193
@patch(f"{process_dataset.__module__}.Dataset")
194194
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
@@ -221,7 +221,7 @@ def test_process_dataset_push_to_hub_called(
221221
)
222222
mock_push.assert_called_once_with("id123", mock_dataset_obj)
223223

224-
224+
@pytest.mark.sanity
225225
@patch(f"{process_dataset.__module__}.push_dataset_to_hub")
226226
@patch(f"{process_dataset.__module__}.Dataset")
227227
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
@@ -253,29 +253,29 @@ def test_process_dataset_push_to_hub_not_called(
253253
)
254254
mock_push.assert_not_called()
255255

256-
256+
@pytest.mark.regression
257257
def test_push_dataset_to_hub_success():
258258
os.environ["HF_TOKEN"] = "token"
259259
mock_dataset = MagicMock(spec=Dataset)
260260
push_dataset_to_hub("dataset_id", mock_dataset)
261261
mock_dataset.push_to_hub.assert_called_once_with("dataset_id", token="token")
262262

263-
263+
@pytest.mark.regression
264264
def test_push_dataset_to_hub_error_no_env():
265265
if "HF_TOKEN" in os.environ:
266266
del os.environ["HF_TOKEN"]
267267
mock_dataset = MagicMock(spec=Dataset)
268268
with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
269269
push_dataset_to_hub("dataset_id", mock_dataset)
270270

271-
271+
@pytest.mark.regression
272272
def test_push_dataset_to_hub_error_no_id():
273273
os.environ["HF_TOKEN"] = "token"
274274
mock_dataset = MagicMock(spec=Dataset)
275275
with pytest.raises(ValueError, match="hub_dataset_id and HF_TOKEN"):
276276
push_dataset_to_hub(None, mock_dataset)
277277

278-
278+
@pytest.mark.regression
279279
@patch.object(Path, "mkdir")
280280
def test_save_dataset_to_file_csv(mock_mkdir):
281281
mock_dataset = MagicMock(spec=Dataset)
@@ -284,7 +284,7 @@ def test_save_dataset_to_file_csv(mock_mkdir):
284284
mock_dataset.to_csv.assert_called_once_with(output_path)
285285
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
286286

287-
287+
@pytest.mark.regression
288288
@patch.object(Path, "mkdir")
289289
def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
290290
mock_dataset = MagicMock(spec=Dataset)
@@ -293,7 +293,7 @@ def test_save_dataset_to_file_csv_capitalized(mock_mkdir):
293293
mock_dataset.to_csv.assert_called_once_with(output_path)
294294
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
295295

296-
296+
@pytest.mark.regression
297297
@patch.object(Path, "mkdir")
298298
def test_save_dataset_to_file_json(mock_mkdir):
299299
mock_dataset = MagicMock(spec=Dataset)
@@ -302,7 +302,7 @@ def test_save_dataset_to_file_json(mock_mkdir):
302302
mock_dataset.to_json.assert_called_once_with(output_path)
303303
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
304304

305-
305+
@pytest.mark.regression
306306
@patch.object(Path, "mkdir")
307307
def test_save_dataset_to_file_json_capitalized(mock_mkdir):
308308
mock_dataset = MagicMock(spec=Dataset)
@@ -311,7 +311,7 @@ def test_save_dataset_to_file_json_capitalized(mock_mkdir):
311311
mock_dataset.to_json.assert_called_once_with(output_path)
312312
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
313313

314-
314+
@pytest.mark.regression
315315
@patch.object(Path, "mkdir")
316316
def test_save_dataset_to_file_jsonl(mock_mkdir):
317317
mock_dataset = MagicMock(spec=Dataset)
@@ -320,7 +320,7 @@ def test_save_dataset_to_file_jsonl(mock_mkdir):
320320
mock_dataset.to_json.assert_called_once_with(output_path)
321321
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
322322

323-
323+
@pytest.mark.regression
324324
@patch.object(Path, "mkdir")
325325
def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
326326
mock_dataset = MagicMock(spec=Dataset)
@@ -329,7 +329,7 @@ def test_save_dataset_to_file_jsonl_capitalized(mock_mkdir):
329329
mock_dataset.to_json.assert_called_once_with(output_path)
330330
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
331331

332-
332+
@pytest.mark.regression
333333
@patch.object(Path, "mkdir")
334334
def test_save_dataset_to_file_parquet(mock_mkdir):
335335
mock_dataset = MagicMock(spec=Dataset)
@@ -338,7 +338,7 @@ def test_save_dataset_to_file_parquet(mock_mkdir):
338338
mock_dataset.to_parquet.assert_called_once_with(output_path)
339339
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
340340

341-
341+
@pytest.mark.regression
342342
@patch.object(Path, "mkdir")
343343
def test_save_dataset_to_file_unsupported_type(mock_mkdir):
344344
mock_dataset = MagicMock(spec=Dataset)

0 commit comments

Comments
 (0)