Skip to content

Commit 705ee9f

Browse files
committed
fix start token
1 parent a4bdbb5 commit 705ee9f

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/guidellm/dataset/synthetic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,21 @@ def __iter__(
194194
}
195195

196196
def _create_prompt(
197-
self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
197+
self,
198+
prompt_tokens: int,
199+
start_index: int,
200+
unique_prefix: Optional[int] = None, # noqa: ARG002
198201
) -> list[int]:
199202
if prompt_tokens <= 0:
200203
return []
201204

202205
left = start_index
203206
right = start_index + 4 * prompt_tokens
204-
start_tokens = [unique_prefix] if unique_prefix else []
205207

206208
while left < right:
207209
mid = (left + right) // 2
208210
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
209-
test_tokens = start_tokens + self.processor.encode(test_prompt)
211+
test_tokens = self.processor.encode(test_prompt)
210212

211213
if len(test_tokens) == prompt_tokens:
212214
return test_tokens
@@ -216,7 +218,7 @@ def _create_prompt(
216218
right = mid
217219

218220
final_text = self.text_creator.create_text(start_index, left - start_index)
219-
return start_tokens + self.processor.encode(final_text)
221+
return self.processor.encode(final_text)
220222

221223

222224
class SyntheticDatasetCreator(DatasetCreator):

0 commit comments

Comments
 (0)