Skip to content

Commit 4e2d520

Browse files
authored
doc: added numpy style documentation to context_relavency (#62)
1 parent 2661290 commit 4e2d520

File tree

2 files changed

+30
-9
lines changed

2 files changed

+30
-9
lines changed

src/ragas/evaluation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def evaluate(
3737
dataset : Dataset[question: list[str], contexts: list[list[str]], answer: list[str]]
3838
The dataset in the format of ragas which the metrics will use to score the RAG
3939
pipeline with
40-
4140
metrics : list[Metric] , optional
4241
List of metrics to use for evaluation. If not provided then ragas will run the
4342
evaluation on the best set of metrics to give a complete view.

src/ragas/metrics/context_relevance.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,30 +81,52 @@ def evaluate(self, answers: List[List[str]]) -> np.float_:
8181

8282
@dataclass
8383
class ContextRelevancy(Metric):
84-
8584
"""
86-
params
87-
strictness: Integer, controls the number of times sentence extraction is
88-
performed to quantify uncertainty from the LLM. Defaults to 2.
89-
agreement_metric: bert_score or jaccard_score, used to measure agreement
90-
between multiple samples.
91-
model_name: any encoder model. Used for calculating bert_score.
85+
Extracts sentences from the context that are relevant to the question with
86+
self-consistancy checks. The number of relevant sentences and is used as the score.
87+
88+
Attributes
89+
----------
90+
name : str
91+
batch_size : int
92+
Batch size for openai completion.
93+
strictness : int
94+
Controls the number of times sentence extraction is performed to quantify
95+
uncertainty from the LLM. Defaults to 2.
96+
agreement_metric : str
97+
"bert_score" or "jaccard_score", used to measure agreement between multiple
98+
samples.
99+
model_name : str
100+
any encoder model. Used for calculating bert_score.
92101
"""
93102

94103
name: str = "context_relavency"
95104
batch_size: int = 15
96-
agreement_metric: str = "bert_score"
97105
strictness: int = 2
106+
agreement_metric: str = "bert_score"
98107
model_name: str = "cross-encoder/stsb-TinyBERT-L-4"
99108

109+
def __post_init__(self: t.Self):
110+
if self.agreement_metric == "bert_score" and self.model_name is None:
111+
raise ValueError(
112+
"model_name must be provided when agreement_metric is bert_score"
113+
)
114+
100115
def init_model(self: t.Self):
101116
self.sent_agreement = SentenceAgreement(
102117
model_name=self.model_name, metric=self.agreement_metric
103118
)
104119

105120
def score(self: t.Self, dataset: Dataset) -> Dataset:
106121
"""
122+
Parameters
123+
----------
107124
dataset: Dataset[question: list[str], contexts: list[list[str]]]
125+
126+
Returns
127+
-------
128+
Dataset[question: list[str], contexts: list[list[str]], scores: list[float]]
129+
Dataset with the scores for each row.
108130
"""
109131
prompts = []
110132
questions, contexts = dataset["question"], dataset["contexts"]

0 commit comments

Comments
 (0)