1
- import pytest
2
1
from unittest .mock import MagicMock , patch
3
2
3
+ import pytest
4
+
4
5
from guidellm .dataset .file import FileDatasetCreator
5
6
from guidellm .dataset .hf_datasets import HFDatasetsCreator
6
7
from guidellm .dataset .in_memory import InMemoryDatasetCreator
@@ -25,79 +26,113 @@ def test_bucket_from_data():
25
26
assert buckets [1 ].count == 5
26
27
assert bucket_width == 1
27
28
29
+
28
30
def mock_processor (cls ):
29
31
return mock_generative_benchmark ().request_loader .processor
30
32
33
+
31
34
def new_handle_create (cls , * args , ** kwargs ):
32
35
return MagicMock ()
33
36
37
+
34
38
def new_extract_dataset_name (cls , * args , ** kwargs ):
35
39
return "data:prideandprejudice.txt.gz"
36
40
41
+
37
42
@pytest .mark .smoke
38
43
def test_dataset_from_data_uses_extracted_dataset_name ():
39
44
mock_benchmark = mock_generative_benchmark ()
40
45
with (
41
- patch .object (SyntheticDatasetCreator , 'handle_create' , new = new_handle_create ),
42
- patch .object (SyntheticDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name )
46
+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
47
+ patch .object (
48
+ SyntheticDatasetCreator ,
49
+ "extract_dataset_name" ,
50
+ new = new_extract_dataset_name ,
51
+ ),
43
52
):
44
53
dataset = Dataset .from_data (mock_benchmark .request_loader )
45
54
assert dataset .name == "data:prideandprejudice.txt.gz"
46
- # with unittest.mock.patch.object(PreTrainedTokenizerBase, 'processor', new=mock_processor):
55
+
47
56
48
57
def new_is_supported (cls , * args , ** kwargs ):
49
58
return True
50
59
60
+
51
61
@pytest .mark .smoke
52
62
def test_dataset_from_data_with_in_memory_dataset ():
53
63
mock_benchmark = mock_generative_benchmark ()
54
- with patch .object (InMemoryDatasetCreator , ' is_supported' , new = new_is_supported ):
64
+ with patch .object (InMemoryDatasetCreator , " is_supported" , new = new_is_supported ):
55
65
dataset = Dataset .from_data (mock_benchmark .request_loader )
56
66
assert dataset .name == "In-memory"
57
67
68
+
58
69
def hardcoded_isnt_supported (cls , * args , ** kwargs ):
59
70
return False
60
71
72
+
61
73
def new_extract_dataset_name_none (cls , * args , ** kwargs ):
62
74
return None
63
75
76
+
64
77
@pytest .mark .smoke
65
78
def test_dataset_from_data_with_synthetic_dataset ():
66
79
mock_benchmark = mock_generative_benchmark ()
67
80
with (
68
- patch .object (SyntheticDatasetCreator , 'handle_create' , new = new_handle_create ),
69
- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
70
- patch .object (SyntheticDatasetCreator , 'is_supported' , new = new_is_supported ),
71
- patch .object (SyntheticDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
81
+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
82
+ patch .object (
83
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
84
+ ),
85
+ patch .object (SyntheticDatasetCreator , "is_supported" , new = new_is_supported ),
86
+ patch .object (
87
+ SyntheticDatasetCreator ,
88
+ "extract_dataset_name" ,
89
+ new = new_extract_dataset_name_none ,
90
+ ),
72
91
):
73
92
dataset = Dataset .from_data (mock_benchmark .request_loader )
74
93
assert dataset .name == "data:prideandprejudice.txt.gz"
75
94
95
+
76
96
@pytest .mark .smoke
77
97
def test_dataset_from_data_with_file_dataset ():
78
98
mock_benchmark = mock_generative_benchmark ()
79
- mock_benchmark .request_loader .data = ' dataset.yaml'
99
+ mock_benchmark .request_loader .data = " dataset.yaml"
80
100
with (
81
- patch .object (FileDatasetCreator , 'handle_create' , new = new_handle_create ),
82
- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
83
- patch .object (SyntheticDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
84
- patch .object (FileDatasetCreator , 'is_supported' , new = new_is_supported ),
85
- patch .object (FileDatasetCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
101
+ patch .object (FileDatasetCreator , "handle_create" , new = new_handle_create ),
102
+ patch .object (
103
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
104
+ ),
105
+ patch .object (
106
+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
107
+ ),
108
+ patch .object (FileDatasetCreator , "is_supported" , new = new_is_supported ),
109
+ patch .object (
110
+ FileDatasetCreator ,
111
+ "extract_dataset_name" ,
112
+ new = new_extract_dataset_name_none ,
113
+ ),
86
114
):
87
115
dataset = Dataset .from_data (mock_benchmark .request_loader )
88
116
assert dataset .name == "dataset.yaml"
89
117
118
+
90
119
@pytest .mark .smoke
91
120
def test_dataset_from_data_with_hf_dataset ():
92
121
mock_benchmark = mock_generative_benchmark ()
93
- mock_benchmark .request_loader .data = ' openai/gsm8k'
122
+ mock_benchmark .request_loader .data = " openai/gsm8k"
94
123
with (
95
- patch .object (HFDatasetsCreator , 'handle_create' , new = new_handle_create ),
96
- patch .object (InMemoryDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
97
- patch .object (SyntheticDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
98
- patch .object (FileDatasetCreator , 'is_supported' , new = hardcoded_isnt_supported ),
99
- patch .object (HFDatasetsCreator , 'is_supported' , new = new_is_supported ),
100
- patch .object (HFDatasetsCreator , 'extract_dataset_name' , new = new_extract_dataset_name_none )
124
+ patch .object (HFDatasetsCreator , "handle_create" , new = new_handle_create ),
125
+ patch .object (
126
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
127
+ ),
128
+ patch .object (
129
+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
130
+ ),
131
+ patch .object (FileDatasetCreator , "is_supported" , new = hardcoded_isnt_supported ),
132
+ patch .object (HFDatasetsCreator , "is_supported" , new = new_is_supported ),
133
+ patch .object (
134
+ HFDatasetsCreator , "extract_dataset_name" , new = new_extract_dataset_name_none
135
+ ),
101
136
):
102
137
dataset = Dataset .from_data (mock_benchmark .request_loader )
103
- assert dataset .name == "openai/gsm8k"
138
+ assert dataset .name == "openai/gsm8k"
0 commit comments