Skip to content

Commit 7b96175

Browse files
committed
fix start token
Signed-off-by: guangli.bao <[email protected]>
1 parent a4bdbb5 commit 7b96175

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

src/guidellm/dataset/synthetic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,21 @@ def __iter__(
194194
}
195195

196196
def _create_prompt(
197-
self, prompt_tokens: int, start_index: int, unique_prefix: Optional[int] = None
197+
self,
198+
prompt_tokens: int,
199+
start_index: int,
200+
unique_prefix: Optional[int] = None, # noqa: ARG002
198201
) -> list[int]:
199202
if prompt_tokens <= 0:
200203
return []
201204

202205
left = start_index
203206
right = start_index + 4 * prompt_tokens
204-
start_tokens = [unique_prefix] if unique_prefix else []
205207

206208
while left < right:
207209
mid = (left + right) // 2
208210
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
209-
test_tokens = start_tokens + self.processor.encode(test_prompt)
211+
test_tokens = self.processor.encode(test_prompt)
210212

211213
if len(test_tokens) == prompt_tokens:
212214
return test_tokens
@@ -216,7 +218,7 @@ def _create_prompt(
216218
right = mid
217219

218220
final_text = self.text_creator.create_text(start_index, left - start_index)
219-
return start_tokens + self.processor.encode(final_text)
221+
return self.processor.encode(final_text)
220222

221223

222224
class SyntheticDatasetCreator(DatasetCreator):

tests/unit/dataset/test_synthetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def test_create_prompt_method(
415415

416416
# Test normal case
417417
result = generator._create_prompt(5, 0, 42)
418-
assert result == [42, 1, 2, 3]
418+
assert result == [1, 2, 3]
419419

420420
# Test zero tokens
421421
result = generator._create_prompt(0, 0, 42)

0 commit comments

Comments
 (0)