Skip to content

Commit 24cc3cd

Browse files
committed
fix token
Signed-off-by: guangli.bao <[email protected]>
1 parent 705ee9f commit 24cc3cd

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

src/guidellm/dataset/synthetic.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,12 @@ def _create_prompt(
204204

205205
left = start_index
206206
right = start_index + 4 * prompt_tokens
207+
start_tokens = []
207208

208209
while left < right:
209210
mid = (left + right) // 2
210211
test_prompt = self.text_creator.create_text(start_index, mid - start_index)
211-
test_tokens = self.processor.encode(test_prompt)
212+
test_tokens = start_tokens + self.processor.encode(test_prompt)
212213

213214
if len(test_tokens) == prompt_tokens:
214215
return test_tokens
@@ -218,7 +219,7 @@ def _create_prompt(
218219
right = mid
219220

220221
final_text = self.text_creator.create_text(start_index, left - start_index)
221-
return self.processor.encode(final_text)
222+
return start_tokens + self.processor.encode(final_text)
222223

223224

224225
class SyntheticDatasetCreator(DatasetCreator):

tests/unit/dataset/test_synthetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ def test_create_prompt_method(
415415

416416
# Test normal case
417417
result = generator._create_prompt(5, 0, 42)
418-
assert result == [42, 1, 2, 3]
418+
assert result == [1, 2, 3]
419419

420420
# Test zero tokens
421421
result = generator._create_prompt(0, 0, 42)

0 commit comments

Comments
 (0)