1
+ from unittest .mock import MagicMock , patch
2
+
1
3
import pytest
2
4
3
- from guidellm .presentation .data_models import Bucket
5
+ from guidellm .dataset .file import FileDatasetCreator
6
+ from guidellm .dataset .hf_datasets import HFDatasetsCreator
7
+ from guidellm .dataset .in_memory import InMemoryDatasetCreator
8
+ from guidellm .dataset .synthetic import SyntheticDatasetCreator
9
+ from guidellm .presentation .data_models import Bucket , Dataset
10
+ from tests .unit .mock_benchmark import mock_generative_benchmark
4
11
5
12
6
13
@pytest .mark .smoke
@@ -18,3 +25,114 @@ def test_bucket_from_data():
18
25
assert buckets [1 ].value == 8.0
19
26
assert buckets [1 ].count == 5
20
27
assert bucket_width == 1
28
+
29
+
30
+ def mock_processor (cls ):
31
+ return mock_generative_benchmark ().request_loader .processor
32
+
33
+
34
+ def new_handle_create (cls , * args , ** kwargs ):
35
+ return MagicMock ()
36
+
37
+
38
+ def new_extract_dataset_name (cls , * args , ** kwargs ):
39
+ return "data:prideandprejudice.txt.gz"
40
+
41
+
42
+ @pytest .mark .smoke
43
+ def test_dataset_from_data_uses_extracted_dataset_name ():
44
+ mock_benchmark = mock_generative_benchmark ()
45
+ with (
46
+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
47
+ patch .object (
48
+ SyntheticDatasetCreator ,
49
+ "extract_dataset_name" ,
50
+ new = new_extract_dataset_name ,
51
+ ),
52
+ ):
53
+ dataset = Dataset .from_data (mock_benchmark .request_loader )
54
+ assert dataset .name == "data:prideandprejudice.txt.gz"
55
+
56
+
57
+ def new_is_supported (cls , * args , ** kwargs ):
58
+ return True
59
+
60
+
61
+ @pytest .mark .smoke
62
+ def test_dataset_from_data_with_in_memory_dataset ():
63
+ mock_benchmark = mock_generative_benchmark ()
64
+ with patch .object (InMemoryDatasetCreator , "is_supported" , new = new_is_supported ):
65
+ dataset = Dataset .from_data (mock_benchmark .request_loader )
66
+ assert dataset .name == "In-memory"
67
+
68
+
69
+ def hardcoded_isnt_supported (cls , * args , ** kwargs ):
70
+ return False
71
+
72
+
73
+ def new_extract_dataset_name_none (cls , * args , ** kwargs ):
74
+ return None
75
+
76
+
77
+ @pytest .mark .smoke
78
+ def test_dataset_from_data_with_synthetic_dataset ():
79
+ mock_benchmark = mock_generative_benchmark ()
80
+ with (
81
+ patch .object (SyntheticDatasetCreator , "handle_create" , new = new_handle_create ),
82
+ patch .object (
83
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
84
+ ),
85
+ patch .object (SyntheticDatasetCreator , "is_supported" , new = new_is_supported ),
86
+ patch .object (
87
+ SyntheticDatasetCreator ,
88
+ "extract_dataset_name" ,
89
+ new = new_extract_dataset_name_none ,
90
+ ),
91
+ ):
92
+ dataset = Dataset .from_data (mock_benchmark .request_loader )
93
+ assert dataset .name == "data:prideandprejudice.txt.gz"
94
+
95
+
96
+ @pytest .mark .smoke
97
+ def test_dataset_from_data_with_file_dataset ():
98
+ mock_benchmark = mock_generative_benchmark ()
99
+ mock_benchmark .request_loader .data = "dataset.yaml"
100
+ with (
101
+ patch .object (FileDatasetCreator , "handle_create" , new = new_handle_create ),
102
+ patch .object (
103
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
104
+ ),
105
+ patch .object (
106
+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
107
+ ),
108
+ patch .object (FileDatasetCreator , "is_supported" , new = new_is_supported ),
109
+ patch .object (
110
+ FileDatasetCreator ,
111
+ "extract_dataset_name" ,
112
+ new = new_extract_dataset_name_none ,
113
+ ),
114
+ ):
115
+ dataset = Dataset .from_data (mock_benchmark .request_loader )
116
+ assert dataset .name == "dataset.yaml"
117
+
118
+
119
+ @pytest .mark .smoke
120
+ def test_dataset_from_data_with_hf_dataset ():
121
+ mock_benchmark = mock_generative_benchmark ()
122
+ mock_benchmark .request_loader .data = "openai/gsm8k"
123
+ with (
124
+ patch .object (HFDatasetsCreator , "handle_create" , new = new_handle_create ),
125
+ patch .object (
126
+ InMemoryDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
127
+ ),
128
+ patch .object (
129
+ SyntheticDatasetCreator , "is_supported" , new = hardcoded_isnt_supported
130
+ ),
131
+ patch .object (FileDatasetCreator , "is_supported" , new = hardcoded_isnt_supported ),
132
+ patch .object (HFDatasetsCreator , "is_supported" , new = new_is_supported ),
133
+ patch .object (
134
+ HFDatasetsCreator , "extract_dataset_name" , new = new_extract_dataset_name_none
135
+ ),
136
+ ):
137
+ dataset = Dataset .from_data (mock_benchmark .request_loader )
138
+ assert dataset .name == "openai/gsm8k"
0 commit comments