Skip to content

Commit 4d624a2

Browse files
committed
Fixed code styling
1 parent b5a4877 commit 4d624a2

File tree

2 files changed

+35
-35
lines changed

2 files changed

+35
-35
lines changed

src/guidellm/preprocess/dataset.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,9 @@ def process_dataset(
212212
logger.info(f"Pushed dataset to: {hub_dataset_id}")
213213

214214

215-
def push_dataset_to_hub(hub_dataset_id: Optional[str], processed_dataset: Dataset) -> None:
215+
def push_dataset_to_hub(
216+
hub_dataset_id: Optional[str], processed_dataset: Dataset,
217+
) -> None:
216218
hf_token = os.environ.get("HF_TOKEN")
217219
if not hub_dataset_id or not hf_token:
218220
raise ValueError(

tests/unit/preprocess/test_dataset.py

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import os
22
from pathlib import Path
3-
from typing import Iterator
3+
from typing import TYPE_CHECKING
44
from unittest.mock import MagicMock, patch
55

6+
if TYPE_CHECKING:
7+
from collections.abc import Iterator
8+
69
import pytest
710
from datasets import Dataset
811
from transformers import PreTrainedTokenizerBase
@@ -29,21 +32,16 @@ def tokenizer_mock():
2932
return tokenizer
3033

3134

32-
from unittest.mock import MagicMock, patch
33-
from guidellm.preprocess.dataset import process_dataset, STRATEGY_HANDLERS, ShortPromptStrategy
34-
from datasets import Dataset
35-
36-
3735
@patch(f"{process_dataset.__module__}.guidellm_load_dataset")
3836
@patch(f"{process_dataset.__module__}.check_load_processor")
3937
@patch(f"{process_dataset.__module__}.Dataset")
4038
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
4139
def test_strategy_handler_called(
42-
mock_sampler,
43-
mock_dataset_class,
44-
mock_check_processor,
45-
mock_load_dataset,
46-
tokenizer_mock,
40+
mock_sampler,
41+
mock_dataset_class,
42+
mock_check_processor,
43+
mock_load_dataset,
44+
tokenizer_mock,
4745
):
4846
mock_handler = MagicMock(return_value="processed_prompt")
4947
with patch.dict(STRATEGY_HANDLERS, {ShortPromptStrategy.IGNORE: mock_handler}):
@@ -106,12 +104,12 @@ def test_handle_pad_strategy(tokenizer_mock):
106104
@patch("guidellm.preprocess.dataset.check_load_processor")
107105
@patch("guidellm.preprocess.dataset.IntegerRangeSampler")
108106
def test_process_dataset_non_empty(
109-
mock_sampler,
110-
mock_check_processor,
111-
mock_load_dataset,
112-
mock_dataset_class,
113-
mock_save_to_file,
114-
tokenizer_mock,
107+
mock_sampler,
108+
mock_check_processor,
109+
mock_load_dataset,
110+
mock_dataset_class,
111+
mock_save_to_file,
112+
tokenizer_mock,
115113
):
116114
from guidellm.preprocess.dataset import process_dataset
117115

@@ -146,11 +144,11 @@ def test_process_dataset_non_empty(
146144
@patch(f"{process_dataset.__module__}.check_load_processor")
147145
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
148146
def test_process_dataset_empty_after_processing(
149-
mock_sampler,
150-
mock_check_processor,
151-
mock_load_dataset,
152-
mock_dataset_class,
153-
tokenizer_mock,
147+
mock_sampler,
148+
mock_check_processor,
149+
mock_load_dataset,
150+
mock_dataset_class,
151+
tokenizer_mock,
154152
):
155153
mock_dataset = [{"prompt": ""}]
156154
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -170,12 +168,12 @@ def test_process_dataset_empty_after_processing(
170168
@patch(f"{process_dataset.__module__}.check_load_processor")
171169
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
172170
def test_process_dataset_push_to_hub_called(
173-
mock_sampler,
174-
mock_check_processor,
175-
mock_load_dataset,
176-
mock_dataset_class,
177-
mock_push,
178-
tokenizer_mock,
171+
mock_sampler,
172+
mock_check_processor,
173+
mock_load_dataset,
174+
mock_dataset_class,
175+
mock_push,
176+
tokenizer_mock,
179177
):
180178
mock_dataset = [{"prompt": "abc"}]
181179
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})
@@ -201,12 +199,12 @@ def test_process_dataset_push_to_hub_called(
201199
@patch(f"{process_dataset.__module__}.check_load_processor")
202200
@patch(f"{process_dataset.__module__}.IntegerRangeSampler")
203201
def test_process_dataset_push_to_hub_not_called(
204-
mock_sampler,
205-
mock_check_processor,
206-
mock_load_dataset,
207-
mock_dataset_class,
208-
mock_push,
209-
tokenizer_mock,
202+
mock_sampler,
203+
mock_check_processor,
204+
mock_load_dataset,
205+
mock_dataset_class,
206+
mock_push,
207+
tokenizer_mock,
210208
):
211209
mock_dataset = [{"prompt": "abc"}]
212210
mock_load_dataset.return_value = (mock_dataset, {"prompt_column": "prompt"})

0 commit comments

Comments
 (0)