|
62 | 62 | "source": [ |
63 | 63 | "from ragas.llms import llm_factory\n", |
64 | 64 | "\n", |
65 | | - "evaluator_llm = llm_factory('gpt-4o')" |
| 65 | + "evaluator_llm = llm_factory(\"gpt-4o\")" |
66 | 66 | ] |
67 | 67 | }, |
68 | 68 | { |
|
104 | 104 | "hallucinations_binary = AspectCritic(\n", |
105 | 105 | " name=\"hallucinations_binary\",\n", |
106 | 106 | " definition=\"Did the model hallucinate or add any information that was not present in the retrieved context?\",\n", |
107 | | - " llm=evaluator_llm\n", |
| 107 | + " llm=evaluator_llm,\n", |
108 | 108 | ")\n", |
109 | 109 | "\n", |
110 | 110 | "await hallucinations_binary.single_turn_ascore(eval_dataset[0])" |
|
163 | 163 | "from ragas.metrics import RubricsScoreWithoutReference\n", |
164 | 164 | "\n", |
165 | 165 | "hallucinations_rubric = RubricsScoreWithoutReference(\n", |
166 | | - " name=\"hallucinations_rubric\",\n", |
167 | | - " llm=evaluator_llm,\n", |
168 | | - " rubrics=rubric\n", |
| 166 | + " name=\"hallucinations_rubric\", llm=evaluator_llm, rubrics=rubric\n", |
169 | 167 | ")\n", |
170 | 168 | "\n", |
171 | 169 | "await hallucinations_rubric.single_turn_ascore(eval_dataset[0])" |
|
215 | 213 | "from ragas.callbacks import Callbacks\n", |
216 | 214 | "from ragas.dataset_schema import SingleTurnSample\n", |
217 | 215 | "\n", |
| 216 | + "\n", |
218 | 217 | "@dataclass\n", |
219 | 218 | "class HallucinationsMetric(MetricWithLLM, SingleTurnMetric):\n", |
220 | 219 | " # name of the metric\n", |
221 | 220 | " name: str = \"hallucinations_metric\"\n", |
222 | 221 | " # we need to define the required columns for the metric\n", |
223 | | - " _required_columns: t.Dict[MetricType, t.Set[str]] = field(default_factory=lambda: {MetricType.SINGLE_TURN: {\"user_input\", \"response\", \"retrieved_contexts\"}})\n", |
| 222 | + " _required_columns: t.Dict[MetricType, t.Set[str]] = field(\n", |
| 223 | + " default_factory=lambda: {\n", |
| 224 | + " MetricType.SINGLE_TURN: {\"user_input\", \"response\", \"retrieved_contexts\"}\n", |
| 225 | + " }\n", |
| 226 | + " )\n", |
224 | 227 | "\n", |
225 | 228 | " def __post_init__(self):\n", |
226 | 229 | " # init the faithfulness metric\n", |
227 | 230 | " self.faithfulness_metric = Faithfulness(llm=self.llm)\n", |
228 | 231 | "\n", |
229 | | - " async def _single_turn_ascore(self, sample: SingleTurnSample, callbacks: Callbacks) -> float:\n", |
230 | | - " faithfulness_score = await self.faithfulness_metric.single_turn_ascore(sample, callbacks)\n", |
| 232 | + " async def _single_turn_ascore(\n", |
| 233 | + " self, sample: SingleTurnSample, callbacks: Callbacks\n", |
| 234 | + " ) -> float:\n", |
| 235 | + " faithfulness_score = await self.faithfulness_metric.single_turn_ascore(\n", |
| 236 | + " sample, callbacks\n", |
| 237 | + " )\n", |
231 | 238 | " return 1 - faithfulness_score" |
232 | 239 | ] |
233 | 240 | }, |
|
269 | 276 | "from ragas import evaluate\n", |
270 | 277 | "\n", |
271 | 278 | "results = evaluate(\n", |
272 | | - " eval_dataset, \n", |
273 | | - " metrics=[\n", |
274 | | - " hallucinations_metric,\n", |
275 | | - " hallucinations_rubric,\n", |
276 | | - " hallucinations_binary\n", |
277 | | - " ], \n", |
| 279 | + " eval_dataset,\n", |
| 280 | + " metrics=[hallucinations_metric, hallucinations_rubric, hallucinations_binary],\n", |
278 | 281 | ")" |
279 | 282 | ] |
280 | 283 | }, |
|
0 commit comments