Skip to content

Commit a5f60b0

Browse files
committed
Reduce duplicated code for text metrics
Signed-off-by: Jared O'Connell <[email protected]>
1 parent 93836bb commit a5f60b0

File tree

2 files changed

+14
-17
lines changed

2 files changed

+14
-17
lines changed

src/guidellm/data/preprocessors/formatters.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ def __call__(
103103
if prefix or text:
104104
prompt = prefix + text
105105
arguments.body["prompt"] = prompt
106-
input_metrics.text_characters = len(prompt)
107-
input_metrics.text_words = len(prompt.split())
106+
input_metrics.add_text_metrics(prompt)
108107

109108
return GenerationRequest(
110109
request_type="text_completions",
@@ -197,25 +196,14 @@ def __call__( # noqa: C901, PLR0912, PLR0915
197196
if not prefix:
198197
continue
199198

200-
input_metrics.text_characters = (
201-
input_metrics.text_characters or 0
202-
) + len(prefix)
203-
204-
input_metrics.text_words = (input_metrics.text_words or 0) + \
205-
len(prefix.split())
206-
199+
input_metrics.add_text_metrics(prefix)
207200
arguments.body["messages"].append({"role": "system", "content": prefix})
208201

209202
for text in columns.get("text_column", []):
210203
if not text:
211204
continue
212205

213-
input_metrics.text_characters = (
214-
input_metrics.text_characters or 0
215-
) + len(text)
216-
input_metrics.text_words = (
217-
input_metrics.text_words or 0
218-
) + len(text.split())
206+
input_metrics.add_text_metrics(text)
219207

220208
arguments.body["messages"].append(
221209
{"role": "user", "content": [{"type": "text", "text": text}]}
@@ -394,8 +382,7 @@ def __call__( # noqa: C901
394382
if prefix or text:
395383
prompt = prefix + text
396384
arguments.body["prompt"] = prompt
397-
input_metrics.text_characters = len(prompt)
398-
input_metrics.text_words = len(prompt.split())
385+
input_metrics.add_text_metrics(prompt)
399386

400387
return GenerationRequest(
401388
request_type="audio_transcriptions",

src/guidellm/schemas/request.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,16 @@ def total_tokens(self) -> int | None:
169169
self.video_tokens or 0
170170
) + (self.audio_tokens or 0) or None
171171

172+
def add_text_metrics(self, text):
173+
"""
174+
Adds the metrics from the given text to the fields
175+
`text_characters` and `text_words`.
176+
177+
:param text: Text to add metrics from
178+
"""
179+
self.text_characters = (self.text_characters or 0) + len(text)
180+
self.text_words = (self.text_words or 0) + len(text.split())
181+
172182

173183
class GenerationRequest(StandardBaseModel):
174184
"""

0 commit comments

Comments
 (0)