|
| 1 | +from typing import Any |
1 | 2 | import pandas as pd |
2 | 3 |
|
3 | | -from graphgen.bases import BaseLLMWrapper, BaseOperator |
| 4 | +from graphgen.bases import BaseLLMWrapper, BaseOperator, QAPair |
4 | 5 | from graphgen.common import init_llm |
| 6 | +from graphgen.utils import run_concurrent |
5 | 7 |
|
6 | 8 |
|
7 | 9 | class EvaluateService(BaseOperator): |
@@ -38,7 +40,49 @@ def process(self, batch: pd.DataFrame) -> pd.DataFrame: |
38 | 40 | items = batch.to_dict(orient="records") |
39 | 41 | return pd.DataFrame(self.evaluate(items)) |
40 | 42 |
|
41 | | - def evaluate(self, items: list[dict]) -> list[dict]: |
42 | | - print(items) |
43 | | - pass |
| 43 | + async def _process_single(self, item: dict[str, Any]) -> dict[str, Any]: |
| 44 | + try: |
| 45 | + qa_pair = QAPair( |
| 46 | + question=str(item.get("question", "")), |
| 47 | + answer=str(item.get("answer", "")) |
| 48 | + ) |
| 49 | + if not qa_pair.question or not qa_pair.answer: |
| 50 | + self.logger.error("Empty question or answer, skipping.") |
| 51 | + return {} |
| 52 | + except Exception as e: |
| 53 | + self.logger.error( |
| 54 | + "Error in QAPair creation: %s", |
| 55 | + str(e) |
| 56 | + ) |
| 57 | + return {} |
44 | 58 |
|
| 59 | + for metric, evaluator in self.evaluators.items(): |
| 60 | + try: |
| 61 | + score = evaluator.evaluate(qa_pair) |
| 62 | + if isinstance(score, dict): |
| 63 | + for sub_metric, sub_score in score.items(): |
| 64 | + item[f"{metric}_{sub_metric}"] = float(sub_score) |
| 65 | + else: |
| 66 | + item[metric] = float(score) |
| 67 | + except Exception as e: |
| 68 | + self.logger.error( |
| 69 | + "Error in %s evaluation: %s", |
| 70 | + metric, |
| 71 | + str(e) |
| 72 | + ) |
| 73 | + item[metric] = None |
| 74 | + |
| 75 | + def evaluate(self, items: list[dict[str, Any]]) -> list[dict[str, Any]]: |
| 76 | + if not items: |
| 77 | + return [] |
| 78 | + |
| 79 | + results = run_concurrent( |
| 80 | + self._process_single, |
| 81 | + items, |
| 82 | + desc="Evaluating items", |
| 83 | + unit="item", |
| 84 | + ) |
| 85 | + |
| 86 | + results = [item for item in results if item] |
| 87 | + |
| 88 | + return results |
0 commit comments