@@ -138,6 +138,8 @@ def __init__(
138138 self .text_creator = EndlessTextCreator (
139139 data = config .source ,
140140 )
141+ # Add counter for unique prefixes
142+ self .request_counter = 0
141143
142144 def __iter__ (
143145 self ,
@@ -170,22 +172,46 @@ def __iter__(
170172 output_tokens_sampler ,
171173 ):
172174 start_index = rand .randint (0 , len (self .text_creator .words ))
175+ # Increment counter for each request
176+ self .request_counter += 1
173177 yield {
174- "prompt" : self ._create_prompt (prompt_tokens , start_index ),
178+ "prompt" : self ._create_prompt (
179+ prompt_tokens , start_index , self .request_counter
180+ ),
175181 "prompt_tokens_count" : prompt_tokens ,
176182 "output_tokens_count" : output_tokens ,
177183 }
178184
179- def _create_prompt (self , prompt_tokens : int , start_index : int ) -> str :
185+ def _create_prompt (
186+ self , prompt_tokens : int , start_index : int , request_id : int
187+ ) -> str :
188+ """
189+ Create a prompt with unique prefix to prevent vLLM prefix caching.
190+ Args:
191+ prompt_tokens: Target number of tokens for the prompt
192+ start_index: Starting position in the text corpus
193+ request_id: Unique identifier for this request (used as prefix)
194+ Returns:
195+ Generated prompt string with unique prefix
196+ """
180197 if prompt_tokens <= 0 :
181- return ""
198+ return f"{ request_id } : "
199+
200+ unique_prefix = f"{ request_id } : "
201+
202+ # Calculate how many tokens the prefix uses
203+ prefix_tokens = len (self .processor .tokenize (unique_prefix ))
204+
205+ # Adjust target tokens to account for the prefix
206+ remaining_tokens = max (1 , prompt_tokens - prefix_tokens )
182207
183208 left = start_index
184- right = start_index + 4 * prompt_tokens
209+ right = start_index + 4 * remaining_tokens
185210
186211 while left < right :
187212 mid = (left + right ) // 2
188- test_prompt = self .text_creator .create_text (start_index , mid - start_index )
213+ base_text = self .text_creator .create_text (start_index , mid - start_index )
214+ test_prompt = unique_prefix + base_text
189215 test_tokens = len (self .processor .tokenize (test_prompt ))
190216
191217 if test_tokens == prompt_tokens :
@@ -195,7 +221,8 @@ def _create_prompt(self, prompt_tokens: int, start_index: int) -> str:
195221 else :
196222 right = mid
197223
198- return self .text_creator .create_text (start_index , left - start_index )
224+ base_text = self .text_creator .create_text (start_index , left - start_index )
225+ return unique_prefix + base_text
199226
200227
201228class SyntheticDatasetCreator (DatasetCreator ):
0 commit comments