From b2c4db6d71c745614ba6ff7f76d5acdd33c78a92 Mon Sep 17 00:00:00 2001 From: Ofir Ben Shoham Date: Thu, 27 Nov 2025 15:46:56 +0200 Subject: [PATCH] Optimize calibrate_draft_vocab to read only required lines when calibrate_size is set Signed-off-by: Ofir Ben Shoham --- .../speculative_decoding/scripts/calibrate_draft_vocab.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/speculative_decoding/scripts/calibrate_draft_vocab.py b/examples/speculative_decoding/scripts/calibrate_draft_vocab.py index 37a798cf8..16e6b2d0e 100644 --- a/examples/speculative_decoding/scripts/calibrate_draft_vocab.py +++ b/examples/speculative_decoding/scripts/calibrate_draft_vocab.py @@ -16,6 +16,7 @@ import argparse import json import os +from itertools import islice import torch from transformers import AutoTokenizer @@ -47,9 +48,8 @@ def main(): print("Calibrating vocab...") tokenizer = AutoTokenizer.from_pretrained(args.model) with open(args.data) as f: - conversations = [json.loads(line)["conversations"] for line in f] - if args.calibrate_size: - conversations = conversations[: args.calibrate_size] + lines = islice(f, args.calibrate_size) if args.calibrate_size else f + conversations = [json.loads(line)["conversations"] for line in lines] conversations = [item for sublist in conversations for item in sublist] d2t = calibrate_frequent_vocab(tokenizer, conversations, args.draft_vocab_size)