@@ -81,30 +81,52 @@ def evaluate(self, answers: List[List[str]]) -> np.float_:
8181
8282@dataclass
8383class ContextRelevancy (Metric ):
84-
8584 """
86- params
87- strictness: Integer, controls the number of times sentence extraction is
88- performed to quantify uncertainty from the LLM. Defaults to 2.
89- agreement_metric: bert_score or jaccard_score, used to measure agreement
90- between multiple samples.
91- model_name: any encoder model. Used for calculating bert_score.
85+ Extracts sentences from the context that are relevant to the question with
86+ self-consistancy checks. The number of relevant sentences and is used as the score.
87+
88+ Attributes
89+ ----------
90+ name : str
91+ batch_size : int
92+ Batch size for openai completion.
93+ strictness : int
94+ Controls the number of times sentence extraction is performed to quantify
95+ uncertainty from the LLM. Defaults to 2.
96+ agreement_metric : str
97+ "bert_score" or "jaccard_score", used to measure agreement between multiple
98+ samples.
99+ model_name : str
100+ any encoder model. Used for calculating bert_score.
92101 """
93102
94103 name : str = "context_relavency"
95104 batch_size : int = 15
96- agreement_metric : str = "bert_score"
97105 strictness : int = 2
106+ agreement_metric : str = "bert_score"
98107 model_name : str = "cross-encoder/stsb-TinyBERT-L-4"
99108
109+ def __post_init__ (self : t .Self ):
110+ if self .agreement_metric == "bert_score" and self .model_name is None :
111+ raise ValueError (
112+ "model_name must be provided when agreement_metric is bert_score"
113+ )
114+
100115 def init_model (self : t .Self ):
101116 self .sent_agreement = SentenceAgreement (
102117 model_name = self .model_name , metric = self .agreement_metric
103118 )
104119
105120 def score (self : t .Self , dataset : Dataset ) -> Dataset :
106121 """
122+ Parameters
123+ ----------
107124 dataset: Dataset[question: list[str], contexts: list[list[str]]]
125+
126+ Returns
127+ -------
128+ Dataset[question: list[str], contexts: list[list[str]], scores: list[float]]
129+ Dataset with the scores for each row.
108130 """
109131 prompts = []
110132 questions , contexts = dataset ["question" ], dataset ["contexts" ]
0 commit comments