Skip to content

Commit 53b626e

Browse files
authored
Improve metrics (#131)
- Improve faithfulness prompts - change the sentence tokenizer to a better one to improve context recall and relevancy.
1 parent 5d3c459 commit 53b626e

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ dependencies = [
88
"protobuf<=3.20.0",
99
"langchain>=0.0.218",
1010
"openai",
11-
"pydantic<2.0"
11+
"pydantic<2.0",
12+
"pysbd>=0.3.4",
1213
]
1314
dynamic = ["version", "readme"]
1415

src/ragas/metrics/context_relevance.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import List
77

88
import numpy as np
9+
import pysbd
910
from datasets import Dataset
1011
from langchain.callbacks.manager import CallbackManager, trace_as_chain_group
1112
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
@@ -24,8 +25,16 @@
2425
)
2526

2627

27-
def sent_tokenize(sent: str) -> List[str]:
28-
return [s[:-1] if s.endswith(".") else s for s in sent.strip().split(". ")]
28+
seg = pysbd.Segmenter(language="en", clean=False)
29+
30+
31+
def sent_tokenize(text: str) -> List[str]:
32+
"""
33+
tokenizer text into sentences
34+
"""
35+
sentences = seg.segment(text)
36+
assert isinstance(sentences, list)
37+
return sentences
2938

3039

3140
class SentenceAgreement:
@@ -85,7 +94,7 @@ class ContextRelevancy(MetricWithLLM):
8594
Batch size for openai completion.
8695
strictness : int
8796
Controls the number of times sentence extraction is performed to quantify
88-
uncertainty from the LLM. Defaults to 2.
97+
uncertainty from the LLM. Defaults to 1.
8998
agreement_metric : str
9099
"bert_score" or "jaccard_score", used to measure agreement between multiple
91100
samples.
@@ -96,7 +105,7 @@ class ContextRelevancy(MetricWithLLM):
96105
name: str = "context_relevancy"
97106
evaluation_mode: EvaluationMode = EvaluationMode.qc
98107
batch_size: int = 15
99-
strictness: int = 2
108+
strictness: int = 1
100109
agreement_metric: str = "bert_score"
101110
model_name: str = "cross-encoder/stsb-TinyBERT-L-4"
102111

src/ragas/metrics/faithfulnes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#################
1818
LONG_FORM_ANSWER_PROMPT = HumanMessagePromptTemplate.from_template(
1919
"""\
20-
Given a question and answer, create one or more statements from answer.
20+
Given a question and answer, create one or more statements from each sentence in the given answer.
2121
question: Who was Albert Einstein and what is he best known for?
2222
answer: He was a German-born theoretical physicist, widely acknowledged to be one of the greatest and most influential physicists of all time. He was best known for developing the theory of relativity, he also made important contributions to the development of the theory of quantum mechanics.
2323
statements:\nAlbert Einstein was born in Germany.\nAlbert Einstein was best known for his theory of relativity.

0 commit comments

Comments
 (0)