Skip to content

Commit 6478a6e

Browse files
authored
fix: remove pysbd and sentence segmenting (#1826)
fixes: #1783 #1736
1 parent 2a96e6f commit 6478a6e

File tree

11 files changed

+107
-265
lines changed

11 files changed

+107
-265
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ dependencies = [
1313
"appdirs",
1414
"pydantic>=2",
1515
"openai>1",
16-
"pysbd>=0.3.4",
1716
"diskcache>=5.6.3",
1817
]
1918
dynamic = ["version", "readme"]

src/ragas/metrics/_answer_correctness.py

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,16 @@
1010
from ragas.dataset_schema import SingleTurnSample
1111
from ragas.metrics._answer_similarity import AnswerSimilarity
1212
from ragas.metrics._faithfulness import (
13-
FaithfulnessStatements,
14-
HasSegmentMethod,
15-
LongFormAnswerPrompt,
13+
StatementGeneratorInput,
14+
StatementGeneratorOutput,
15+
StatementGeneratorPrompt,
1616
)
1717
from ragas.metrics.base import (
1818
MetricOutputType,
1919
MetricType,
2020
MetricWithEmbeddings,
2121
MetricWithLLM,
2222
SingleTurnMetric,
23-
get_segmenter,
2423
)
2524
from ragas.metrics.utils import fbeta_score
2625
from ragas.prompt import PydanticPrompt
@@ -29,9 +28,6 @@
2928
if t.TYPE_CHECKING:
3029
from langchain_core.callbacks import Callbacks
3130

32-
from ragas.metrics._faithfulness import SentencesSimplified
33-
34-
3531
logger = logging.getLogger(__name__)
3632

3733

@@ -166,13 +162,12 @@ class AnswerCorrectness(MetricWithLLM, MetricWithEmbeddings, SingleTurnMetric):
166162
)
167163
output_type = MetricOutputType.CONTINUOUS
168164
correctness_prompt: PydanticPrompt = field(default_factory=CorrectnessClassifier)
169-
long_form_answer_prompt: PydanticPrompt = field(
170-
default_factory=LongFormAnswerPrompt
165+
statement_generator_prompt: PydanticPrompt = field(
166+
default_factory=StatementGeneratorPrompt
171167
)
172168
weights: list[float] = field(default_factory=lambda: [0.75, 0.25])
173169
beta: float = 1.0
174170
answer_similarity: t.Optional[AnswerSimilarity] = None
175-
sentence_segmenter: t.Optional[HasSegmentMethod] = None
176171
max_retries: int = 1
177172

178173
def __post_init__(self):
@@ -185,10 +180,6 @@ def __post_init__(self):
185180
if not all([w >= 0 for w in self.weights]):
186181
raise ValueError("Weights must be non-negative")
187182

188-
if self.sentence_segmenter is None:
189-
language = self.long_form_answer_prompt.language
190-
self.sentence_segmenter = get_segmenter(language=language, clean=False)
191-
192183
if type(self.beta) is not float:
193184
raise ValueError(
194185
"Beta must be a float. A beta > 1 gives more weight to recall, while beta < 1 favors precision."
@@ -210,25 +201,17 @@ def _compute_statement_presence(
210201

211202
async def _create_simplified_statements(
212203
self, question: str, text: str, callbacks: Callbacks
213-
) -> SentencesSimplified:
214-
assert self.sentence_segmenter is not None, "sentence_segmenter is not set"
204+
) -> StatementGeneratorOutput:
215205
assert self.llm is not None, "llm is not set"
216206

217-
sentences = self.sentence_segmenter.segment(text)
218-
sentences_with_index = {
219-
i: sentence
220-
for i, sentence in enumerate(sentences)
221-
if sentence.strip().endswith(".")
222-
}
223-
224-
statements_simplified = await self.long_form_answer_prompt.generate(
207+
prompt_input = StatementGeneratorInput(question=question, answer=text)
208+
statements = await self.statement_generator_prompt.generate(
225209
llm=self.llm,
226-
data=FaithfulnessStatements(
227-
question=question, answer=text, sentences=sentences_with_index
228-
),
210+
data=prompt_input,
229211
callbacks=callbacks,
230212
)
231-
return statements_simplified
213+
214+
return statements
232215

233216
async def _single_turn_ascore(
234217
self, sample: SingleTurnSample, callbacks: Callbacks
@@ -244,13 +227,11 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
244227
question = row["user_input"]
245228
statements: t.Dict[str, t.List[str]] = {}
246229
for item in ["response", "reference"]:
247-
simplified_statements = await self._create_simplified_statements(
230+
statements_x = await self._create_simplified_statements(
248231
question, row[item], callbacks
249232
)
250-
_statements_unwrapped = []
251-
for component in simplified_statements.sentences:
252-
_statements_unwrapped.extend(component.simpler_statements)
253-
statements[item] = _statements_unwrapped
233+
statements_x = statements_x.statements
234+
statements[item] = statements_x
254235

255236
if not all([val == [] for val in statements.values()]):
256237
ground_truth = [statement for statement in statements["reference"]]

src/ragas/metrics/_bleu_score.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
from langchain_core.callbacks import Callbacks
55

66
from ragas.dataset_schema import SingleTurnSample
7-
from ragas.metrics._faithfulness import HasSegmentMethod
8-
from ragas.metrics.base import MetricType, SingleTurnMetric, get_segmenter
7+
from ragas.metrics.base import MetricType, SingleTurnMetric
98
from ragas.run_config import RunConfig
109

1110

@@ -15,7 +14,6 @@ class BleuScore(SingleTurnMetric):
1514
_required_columns: t.Dict[MetricType, t.Set[str]] = field(
1615
default_factory=lambda: {MetricType.SINGLE_TURN: {"reference", "response"}}
1716
)
18-
sentence_segmenter: t.Optional[HasSegmentMethod] = None
1917
language: str = "english"
2018

2119
def __post_init__(self):
@@ -25,8 +23,6 @@ def __post_init__(self):
2523
raise ImportError(
2624
"sacrebleu is required for bleu score. Please install it using `pip install sacrebleu`"
2725
)
28-
if not self.sentence_segmenter:
29-
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
3026
self.corpus_bleu = corpus_bleu
3127

3228
def init(self, run_config: RunConfig):
@@ -35,12 +31,13 @@ def init(self, run_config: RunConfig):
3531
async def _single_turn_ascore(
3632
self, sample: SingleTurnSample, callbacks: Callbacks
3733
) -> float:
38-
assert (
39-
self.sentence_segmenter is not None
40-
), "Sentence segmenter is not initialized"
4134

42-
reference_sentences = self.sentence_segmenter.segment(sample.reference)
43-
response_sentences = self.sentence_segmenter.segment(sample.response)
35+
reference, response = sample.reference, sample.response
36+
assert isinstance(reference, str), "BleuScore expects a valid reference string"
37+
assert isinstance(response, str), "BleuScore expects a valid response string"
38+
39+
reference_sentences = reference.split(". ")
40+
response_sentences = response.split(". ")
4441

4542
reference = [[reference] for reference in reference_sentences]
4643
response = response_sentences

src/ragas/metrics/_factual_correctness.py

Lines changed: 31 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,12 @@
99
from numpy.typing import NDArray
1010
from pydantic import BaseModel, Field
1111

12-
from ragas.metrics._faithfulness import (
13-
HasSegmentMethod,
14-
NLIStatementInput,
15-
NLIStatementPrompt,
16-
)
12+
from ragas.metrics._faithfulness import NLIStatementInput, NLIStatementPrompt
1713
from ragas.metrics.base import (
1814
MetricOutputType,
1915
MetricType,
2016
MetricWithLLM,
2117
SingleTurnMetric,
22-
get_segmenter,
2318
)
2419
from ragas.metrics.utils import fbeta_score
2520
from ragas.prompt import PydanticPrompt
@@ -35,11 +30,10 @@
3530

3631
class ClaimDecompositionInput(BaseModel):
3732
response: str = Field(..., title="Response")
38-
sentences: t.List[str] = Field(..., title="Sentences from response")
3933

4034

4135
class ClaimDecompositionOutput(BaseModel):
42-
decomposed_claims: t.List[t.List[str]] = Field(..., title="Decomposed Claims")
36+
claims: t.List[str] = Field(..., title="Decomposed Claims")
4337

4438

4539
# Define an enum for decomposition types
@@ -52,32 +46,25 @@ class DecompositionType(Enum):
5246

5347
# Example input data
5448
example1_input = ClaimDecompositionInput(
55-
response="Charles Babbage was a French mathematician, philosopher, and food critic.",
56-
sentences=[
57-
"Charles Babbage was a French mathematician, philosopher, and food critic."
58-
],
49+
response="Charles Babbage was a French mathematician, philosopher, and food critic."
5950
)
6051

61-
# Define the examples using the new structure
52+
# Define the examples using the Pydantic structure
6253
claim_decomposition_examples = {
6354
DecompositionType.LOW_ATOMICITY_LOW_COVERAGE: [
6455
(
6556
example1_input,
6657
ClaimDecompositionOutput(
67-
decomposed_claims=[
68-
["Charles Babbage was a mathematician and philosopher."]
69-
]
58+
claims=["Charles Babbage was a mathematician and philosopher."]
7059
),
7160
)
7261
],
7362
DecompositionType.LOW_ATOMICITY_HIGH_COVERAGE: [
7463
(
7564
example1_input,
7665
ClaimDecompositionOutput(
77-
decomposed_claims=[
78-
[
79-
"Charles Babbage was a French mathematician, philosopher, and food critic."
80-
]
66+
claims=[
67+
"Charles Babbage was a French mathematician, philosopher, and food critic."
8168
]
8269
),
8370
)
@@ -86,9 +73,9 @@ class DecompositionType(Enum):
8673
(
8774
example1_input,
8875
ClaimDecompositionOutput(
89-
decomposed_claims=[
90-
["Charles Babbage was a mathematician."],
91-
["Charles Babbage was a philosopher."],
76+
claims=[
77+
"Charles Babbage was a mathematician.",
78+
"Charles Babbage was a philosopher.",
9279
]
9380
),
9481
)
@@ -97,11 +84,11 @@ class DecompositionType(Enum):
9784
(
9885
example1_input,
9986
ClaimDecompositionOutput(
100-
decomposed_claims=[
101-
["Charles Babbage was a mathematician."],
102-
["Charles Babbage was a philosopher."],
103-
["Charles Babbage was a food critic."],
104-
["Charles Babbage was French."],
87+
claims=[
88+
"Charles Babbage was a mathematician.",
89+
"Charles Babbage was a philosopher.",
90+
"Charles Babbage was a food critic.",
91+
"Charles Babbage was French.",
10592
]
10693
),
10794
)
@@ -110,23 +97,17 @@ class DecompositionType(Enum):
11097

11198
# Example input data with two sentences
11299
example2_input = ClaimDecompositionInput(
113-
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics.",
114-
sentences=[
115-
"Albert Einstein was a German theoretical physicist.",
116-
"He developed the theory of relativity and also contributed to the development of quantum mechanics.",
117-
],
100+
response="Albert Einstein was a German theoretical physicist. He developed the theory of relativity and also contributed to the development of quantum mechanics."
118101
)
119102

120103
# Adding examples to the dictionary with different decomposition types
121104
claim_decomposition_examples[DecompositionType.LOW_ATOMICITY_LOW_COVERAGE].append(
122105
(
123106
example2_input,
124107
ClaimDecompositionOutput(
125-
decomposed_claims=[
126-
["Albert Einstein was a German physicist."],
127-
[
128-
"Albert Einstein developed relativity and contributed to quantum mechanics."
129-
],
108+
claims=[
109+
"Albert Einstein was a German physicist.",
110+
"Albert Einstein developed relativity and contributed to quantum mechanics.",
130111
]
131112
),
132113
)
@@ -136,11 +117,9 @@ class DecompositionType(Enum):
136117
(
137118
example2_input,
138119
ClaimDecompositionOutput(
139-
decomposed_claims=[
140-
["Albert Einstein was a German theoretical physicist."],
141-
[
142-
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics."
143-
],
120+
claims=[
121+
"Albert Einstein was a German theoretical physicist.",
122+
"Albert Einstein developed the theory of relativity and also contributed to the development of quantum mechanics.",
144123
]
145124
),
146125
)
@@ -150,9 +129,9 @@ class DecompositionType(Enum):
150129
(
151130
example2_input,
152131
ClaimDecompositionOutput(
153-
decomposed_claims=[
154-
["Albert Einstein was a German theoretical physicist."],
155-
["Albert Einstein developed the theory of relativity."],
132+
claims=[
133+
"Albert Einstein was a German theoretical physicist.",
134+
"Albert Einstein developed the theory of relativity.",
156135
]
157136
),
158137
)
@@ -162,12 +141,10 @@ class DecompositionType(Enum):
162141
(
163142
example2_input,
164143
ClaimDecompositionOutput(
165-
decomposed_claims=[
166-
["Albert Einstein was a German theoretical physicist."],
167-
[
168-
"Albert Einstein developed the theory of relativity.",
169-
"Albert Einstein contributed to the development of quantum mechanics.",
170-
],
144+
claims=[
145+
"Albert Einstein was a German theoretical physicist.",
146+
"Albert Einstein developed the theory of relativity.",
147+
"Albert Einstein contributed to the development of quantum mechanics.",
171148
]
172149
),
173150
)
@@ -218,7 +195,6 @@ class FactualCorrectness(MetricWithLLM, SingleTurnMetric):
218195
coverage: t.Literal["low", "high"] = "low"
219196
claim_decomposition_prompt: PydanticPrompt = ClaimDecompositionPrompt()
220197
nli_prompt: PydanticPrompt = NLIStatementPrompt()
221-
sentence_segmenter: t.Optional[HasSegmentMethod] = None
222198
language: str = "english"
223199

224200
def __post_init__(self):
@@ -232,8 +208,6 @@ def __post_init__(self):
232208
logger.warning(
233209
f"No examples found for the atomicity and coverage level: {value}"
234210
)
235-
if not self.sentence_segmenter:
236-
self.sentence_segmenter = get_segmenter(language=self.language, clean=False)
237211

238212
if type(self.beta) is not float:
239213
raise ValueError(
@@ -244,20 +218,12 @@ async def decompose_claims(
244218
self, response: str, callbacks: Callbacks
245219
) -> t.List[str]:
246220
assert self.llm is not None, "LLM must be set"
247-
assert (
248-
self.sentence_segmenter is not None
249-
), "Sentence segmenter is not initialized"
250221

251-
sentences = self.sentence_segmenter.segment(response)
252-
assert isinstance(sentences, list), "Segmenter must return a list of sentences"
253-
prompt_input = ClaimDecompositionInput(response=response, sentences=sentences)
222+
prompt_input = ClaimDecompositionInput(response=response)
254223
result = await self.claim_decomposition_prompt.generate(
255224
data=prompt_input, llm=self.llm, callbacks=callbacks
256225
)
257-
claims_list = [
258-
claim for claim_list in result.decomposed_claims for claim in claim_list
259-
]
260-
return claims_list
226+
return result.claims
261227

262228
async def verify_claims(
263229
self, premise: str, hypothesis_list: t.List[str], callbacks: Callbacks

0 commit comments

Comments
 (0)