Skip to content

Commit 1e7121b

Browse files
feat: Unify the use of sentence_segmenter (#1629)
As title. The usage of sentence_segmenter for some indicators is not uniform. --------- Co-authored-by: Shahules786 <[email protected]>
1 parent d840b16 commit 1e7121b

File tree

2 files changed

+24
-6
lines changed

2 files changed

+24
-6
lines changed

src/ragas/metrics/_bleu_score.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class BleuScore(SingleTurnMetric):
1717
)
1818
weights: t.Tuple[float, ...] = (0.25, 0.25, 0.25, 0.25)
1919
sentence_segmenter: t.Optional[HasSegmentMethod] = None
20+
language: str = "english"
2021

2122
def __post_init__(self):
2223
try:
@@ -26,7 +27,8 @@ def __post_init__(self):
2627
raise ImportError(
2728
"nltk is required for bleu score. Please install it using `pip install nltk`"
2829
)
29-
self.segmenter = get_segmenter()
30+
if not self.sentence_segmenter:
31+
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
3032
self.word_tokenizer = word_tokenize
3133
self.corpus_bleu = corpus_bleu
3234

@@ -36,8 +38,13 @@ def init(self, run_config: RunConfig):
3638
async def _single_turn_ascore(
3739
self, sample: SingleTurnSample, callbacks: Callbacks
3840
) -> float:
39-
reference_sentences = self.segmenter.segment(sample.reference)
40-
response_sentences = self.segmenter.segment(sample.response)
41+
42+
assert (
43+
self.sentence_segmenter is not None
44+
), "Sentence segmenter is not initialized"
45+
46+
reference_sentences = self.sentence_segmenter.segment(sample.reference)
47+
response_sentences = self.sentence_segmenter.segment(sample.response)
4148

4249
reference = [
4350
[self.word_tokenizer(reference)] for reference in reference_sentences

src/ragas/metrics/_factual_correctness.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
from numpy.typing import NDArray
1010
from pydantic import BaseModel, Field
1111

12-
from ragas.metrics._faithfulness import NLIStatementInput, NLIStatementPrompt
12+
from ragas.metrics._faithfulness import (
13+
HasSegmentMethod,
14+
NLIStatementInput,
15+
NLIStatementPrompt,
16+
)
1317
from ragas.metrics.base import (
1418
MetricType,
1519
MetricWithLLM,
@@ -212,6 +216,8 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
212216
coverage: t.Literal["low", "high"] = "low"
213217
claim_decomposition_prompt: PydanticPrompt = ClaimDecompositionPrompt()
214218
nli_prompt: PydanticPrompt = NLIStatementPrompt()
219+
sentence_segmenter: t.Optional[HasSegmentMethod] = None
220+
language: str = "english"
215221

216222
def __post_init__(self):
217223
value = f"{self.atomicity}_atomicity_{self.coverage}_coverage"
@@ -224,7 +230,8 @@ def __post_init__(self):
224230
logger.warning(
225231
f"No examples found for the atomicity and coverage level: {value}"
226232
)
227-
self.segmenter = get_segmenter(language="english")
233+
if not self.sentence_segmenter:
234+
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
228235

229236
if type(self.beta) is not float:
230237
raise ValueError(
@@ -235,7 +242,11 @@ async def decompose_claims(
235242
self, response: str, callbacks: Callbacks
236243
) -> t.List[str]:
237244
assert self.llm is not None, "LLM must be set"
238-
sentences = self.segmenter.segment(response)
245+
assert (
246+
self.sentence_segmenter is not None
247+
), "Sentence segmenter is not initialized"
248+
249+
sentences = self.sentence_segmenter.segment(response)
239250
assert isinstance(sentences, list), "Segmenter must return a list of sentences"
240251
prompt_input = ClaimDecompositionInput(response=response, sentences=sentences)
241252
result = await self.claim_decomposition_prompt.generate(

0 commit comments

Comments
 (0)