From 1774a0c47e11c2c82573acaaedc464770a39d56d Mon Sep 17 00:00:00 2001 From: Helena Kloosterman Date: Wed, 6 Aug 2025 14:35:24 +0200 Subject: [PATCH] Add truncation to count_tokens Align with OpenVINO Tokenizers which truncates inputs at model max length. --- demos/benchmark/embeddings/benchmark_embeddings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/benchmark/embeddings/benchmark_embeddings.py b/demos/benchmark/embeddings/benchmark_embeddings.py index d06a19c4ce..e710195aab 100644 --- a/demos/benchmark/embeddings/benchmark_embeddings.py +++ b/demos/benchmark/embeddings/benchmark_embeddings.py @@ -73,7 +73,7 @@ def count_tokens(docs, model): documents = docs.iter(batch_size=1) num_tokens = 0 for request in documents: - num_tokens += len(tokenizer(request["text"],add_special_tokens=False)["input_ids"][0]) + num_tokens += len(tokenizer(request["text"],add_special_tokens=False, truncation=True)["input_ids"][0]) return num_tokens @dataclass