@@ -138,6 +138,8 @@ def __init__(
138
138
self .text_creator = EndlessTextCreator (
139
139
data = config .source ,
140
140
)
141
+ # Add counter for unique prefixes
142
+ self .request_counter = 0
141
143
142
144
def __iter__ (
143
145
self ,
@@ -170,22 +172,44 @@ def __iter__(
170
172
output_tokens_sampler ,
171
173
):
172
174
start_index = rand .randint (0 , len (self .text_creator .words ))
175
+ # Increment counter for each request
176
+ self .request_counter += 1
173
177
yield {
174
- "prompt" : self ._create_prompt (prompt_tokens , start_index ),
178
+ "prompt" : self ._create_prompt (prompt_tokens , start_index , self . request_counter ),
175
179
"prompt_tokens_count" : prompt_tokens ,
176
180
"output_tokens_count" : output_tokens ,
177
181
}
178
182
179
- def _create_prompt (self , prompt_tokens : int , start_index : int ) -> str :
183
+ def _create_prompt (self , prompt_tokens : int , start_index : int , request_id : int ) -> str :
184
+ """
185
+ Create a prompt with unique prefix to prevent vLLM prefix caching.
186
+
187
+ Args:
188
+ prompt_tokens: Target number of tokens for the prompt
189
+ start_index: Starting position in the text corpus
190
+ request_id: Unique identifier for this request (used as prefix)
191
+ Returns:
192
+ Generated prompt string with unique prefix
193
+ """
180
194
if prompt_tokens <= 0 :
181
- return ""
195
+ return f"{ request_id } : "
196
+
197
+ # Create unique prefix that will prevent any prefix caching
198
+ unique_prefix = f"{ request_id } : "
199
+
200
+ # Calculate how many tokens the prefix uses
201
+ prefix_tokens = len (self .processor .tokenize (unique_prefix ))
202
+
203
+ # Adjust target tokens to account for the prefix
204
+ remaining_tokens = max (1 , prompt_tokens - prefix_tokens )
182
205
183
206
left = start_index
184
- right = start_index + 4 * prompt_tokens
207
+ right = start_index + 4 * remaining_tokens
185
208
186
209
while left < right :
187
210
mid = (left + right ) // 2
188
- test_prompt = self .text_creator .create_text (start_index , mid - start_index )
211
+ base_text = self .text_creator .create_text (start_index , mid - start_index )
212
+ test_prompt = unique_prefix + base_text
189
213
test_tokens = len (self .processor .tokenize (test_prompt ))
190
214
191
215
if test_tokens == prompt_tokens :
@@ -195,7 +219,8 @@ def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
195
219
else :
196
220
right = mid
197
221
198
- return self .text_creator .create_text (start_index , left - start_index )
222
+ base_text = self .text_creator .create_text (start_index , left - start_index )
223
+ return unique_prefix + base_text
199
224
200
225
201
226
class SyntheticDatasetCreator (DatasetCreator ):
0 commit comments