11import os
22from pathlib import Path
3- from typing import Iterator
3+ from typing import TYPE_CHECKING
44from unittest .mock import MagicMock , patch
55
6+ if TYPE_CHECKING :
7+ from collections .abc import Iterator
8+
69import pytest
710from datasets import Dataset
811from transformers import PreTrainedTokenizerBase
@@ -29,21 +32,16 @@ def tokenizer_mock():
2932 return tokenizer
3033
3134
32- from unittest .mock import MagicMock , patch
33- from guidellm .preprocess .dataset import process_dataset , STRATEGY_HANDLERS , ShortPromptStrategy
34- from datasets import Dataset
35-
36-
3735@patch (f"{ process_dataset .__module__ } .guidellm_load_dataset" )
3836@patch (f"{ process_dataset .__module__ } .check_load_processor" )
3937@patch (f"{ process_dataset .__module__ } .Dataset" )
4038@patch (f"{ process_dataset .__module__ } .IntegerRangeSampler" )
4139def test_strategy_handler_called (
42- mock_sampler ,
43- mock_dataset_class ,
44- mock_check_processor ,
45- mock_load_dataset ,
46- tokenizer_mock ,
40+ mock_sampler ,
41+ mock_dataset_class ,
42+ mock_check_processor ,
43+ mock_load_dataset ,
44+ tokenizer_mock ,
4745):
4846 mock_handler = MagicMock (return_value = "processed_prompt" )
4947 with patch .dict (STRATEGY_HANDLERS , {ShortPromptStrategy .IGNORE : mock_handler }):
@@ -106,12 +104,12 @@ def test_handle_pad_strategy(tokenizer_mock):
106104@patch ("guidellm.preprocess.dataset.check_load_processor" )
107105@patch ("guidellm.preprocess.dataset.IntegerRangeSampler" )
108106def test_process_dataset_non_empty (
109- mock_sampler ,
110- mock_check_processor ,
111- mock_load_dataset ,
112- mock_dataset_class ,
113- mock_save_to_file ,
114- tokenizer_mock ,
107+ mock_sampler ,
108+ mock_check_processor ,
109+ mock_load_dataset ,
110+ mock_dataset_class ,
111+ mock_save_to_file ,
112+ tokenizer_mock ,
115113):
116114 from guidellm .preprocess .dataset import process_dataset
117115
@@ -146,11 +144,11 @@ def test_process_dataset_non_empty(
146144@patch (f"{ process_dataset .__module__ } .check_load_processor" )
147145@patch (f"{ process_dataset .__module__ } .IntegerRangeSampler" )
148146def test_process_dataset_empty_after_processing (
149- mock_sampler ,
150- mock_check_processor ,
151- mock_load_dataset ,
152- mock_dataset_class ,
153- tokenizer_mock ,
147+ mock_sampler ,
148+ mock_check_processor ,
149+ mock_load_dataset ,
150+ mock_dataset_class ,
151+ tokenizer_mock ,
154152):
155153 mock_dataset = [{"prompt" : "" }]
156154 mock_load_dataset .return_value = (mock_dataset , {"prompt_column" : "prompt" })
@@ -170,12 +168,12 @@ def test_process_dataset_empty_after_processing(
170168@patch (f"{ process_dataset .__module__ } .check_load_processor" )
171169@patch (f"{ process_dataset .__module__ } .IntegerRangeSampler" )
172170def test_process_dataset_push_to_hub_called (
173- mock_sampler ,
174- mock_check_processor ,
175- mock_load_dataset ,
176- mock_dataset_class ,
177- mock_push ,
178- tokenizer_mock ,
171+ mock_sampler ,
172+ mock_check_processor ,
173+ mock_load_dataset ,
174+ mock_dataset_class ,
175+ mock_push ,
176+ tokenizer_mock ,
179177):
180178 mock_dataset = [{"prompt" : "abc" }]
181179 mock_load_dataset .return_value = (mock_dataset , {"prompt_column" : "prompt" })
@@ -201,12 +199,12 @@ def test_process_dataset_push_to_hub_called(
201199@patch (f"{ process_dataset .__module__ } .check_load_processor" )
202200@patch (f"{ process_dataset .__module__ } .IntegerRangeSampler" )
203201def test_process_dataset_push_to_hub_not_called (
204- mock_sampler ,
205- mock_check_processor ,
206- mock_load_dataset ,
207- mock_dataset_class ,
208- mock_push ,
209- tokenizer_mock ,
202+ mock_sampler ,
203+ mock_check_processor ,
204+ mock_load_dataset ,
205+ mock_dataset_class ,
206+ mock_push ,
207+ tokenizer_mock ,
210208):
211209 mock_dataset = [{"prompt" : "abc" }]
212210 mock_load_dataset .return_value = (mock_dataset , {"prompt_column" : "prompt" })
0 commit comments