Skip to content

Commit 2572e4f

Browse files
Add new TurnLength score and allow evaluators to support list of values not only single values
1 parent 055bfb3 commit 2572e4f

File tree

2 files changed

+51
-0
lines changed

2 files changed

+51
-0
lines changed

src/sdialog/evaluation/__init__.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,52 @@ def __init__(self, name: str = None, speaker: Optional[str] = None):
651651
super().__init__(feature="mean-turn-length", name=name, speaker=speaker)
652652

653653

654+
class TurnLength(BaseDialogScore):
655+
"""
656+
Compute individual turn lengths (number of words per turn) for a dialogue.
657+
658+
Returns a list of word counts for each turn in the dialogue. This is a granular metric
659+
that captures turn length distribution, often used as raw input for downstream aggregations
660+
(e.g., computing mean or median turn length).
661+
662+
Example:
663+
664+
.. code-block:: python
665+
666+
from sdialog.evaluation import TurnLength
667+
668+
scorer = TurnLength()
669+
lengths = scorer(dialog) # Returns list of integers
670+
print(lengths) # [5, 12, 3, 18, ...] words per turn
671+
672+
# Filter by speaker
673+
scorer_system = TurnLength(speaker="System")
674+
system_lengths = scorer_system(dialog)
675+
676+
:param name: Optional score name (defaults to "turn-length").
677+
:type name: Optional[str]
678+
:param speaker: If set, only turns by this speaker (case-insensitive) are considered.
679+
:type speaker: Optional[str]
680+
"""
681+
def __init__(self, name: str = None, speaker: Optional[str] = None):
682+
"""Initialize turn length scorer."""
683+
super().__init__(name=name or "turn-length", ai_speaker=speaker)
684+
685+
def score(self, dialog: Dialog) -> List[int]:
686+
"""
687+
Compute word count for each turn in the dialogue.
688+
689+
:param dialog: Dialogue instance to evaluate.
690+
:type dialog: Dialog
691+
:return: List of integers representing word count per turn.
692+
:rtype: List[int]
693+
"""
694+
if self.ai_speaker is None:
695+
return [len(turn) for turn in dialog]
696+
else:
697+
return [len(turn) for turn in dialog if turn.speaker.lower() == self.ai_speaker.lower()]
698+
699+
654700
class HesitationRateScore(ConversationalFeatures):
655701
"""
656702
Compute the hesitation rate (percentage of hesitation tokens) for a dialogue.

src/sdialog/evaluation/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,11 @@ def __call__(self,
523523
try:
524524
scores = [self.dialog_score(dialogue)
525525
for dialogue in tqdm(dialogues, desc=desc, leave=self.verbose)]
526+
# Flatten scores if elements are iterables (but not strings or dicts)
527+
if scores and hasattr(scores[0], '__iter__') and not isinstance(scores[0], (str, dict)):
528+
scores = [item
529+
for sublist in scores
530+
for item in (sublist if isinstance(sublist, (list, tuple)) else [sublist])]
526531
except KeyboardInterrupt:
527532
logger.warning(
528533
f"Evaluation interrupted by user. Partial results for dataset '{dataset_name}' "

0 commit comments

Comments
 (0)