Skip to content

Commit 79b5720

Browse files
Update 2.evaluation.md
1 parent 5a6ff48 commit 79b5720

File tree

1 file changed

+0
-61
lines changed

1 file changed

+0
-61
lines changed

docs/content/8.observation/2.evaluation.md

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -154,64 +154,3 @@ evaluator = DeepevalEvaluator(
154154
prometheus_config=prometheus_config,
155155
)
156156
```
157-
158-
## 完整示例
159-
160-
以下是使用 DeepEval 评测器的完整例子。其中定义了 [GEval](https://deepeval.com/docs/metrics-llm-evals) 指标和 [ToolCorrectnessMetric](https://deepeval.com/docs/metrics-tool-correctness) 指标,分别用于整体输出质量评估和工具调用正确率评估,并将评测结果上报至火山引擎的 VMP 平台:
161-
162-
```python
163-
import asyncio
164-
import os
165-
from builtin_tools.agent import agent
166-
167-
from deepeval.metrics import GEval, ToolCorrectnessMetric
168-
from deepeval.test_case import LLMTestCaseParams
169-
from veadk.config import getenv
170-
from veadk.evaluation.deepeval_evaluator import DeepevalEvaluator
171-
from veadk.evaluation.utils.prometheus import PrometheusPushgatewayConfig
172-
from veadk.prompts.prompt_evaluator import eval_principle_prompt
173-
174-
prometheus_config = PrometheusPushgatewayConfig()
175-
176-
# 1. Rollout, and generate eval set file
177-
# await agent.run(
178-
# prompt,
179-
# collect_runtime_data=True,
180-
# eval_set_id=f"eval_demo_set_{get_current_time()}",
181-
# )
182-
# # get expect output
183-
# dump_path = agent._dump_path
184-
# assert dump_path != "", "Dump eval set file failed! Please check runtime logs."
185-
186-
# 2. Evaluate in terms of eval set file
187-
evaluator = DeepevalEvaluator(
188-
agent=agent,
189-
judge_model_name=getenv("MODEL_JUDGE_NAME"),
190-
judge_model_api_base=getenv("MODEL_JUDGE_API_BASE"),
191-
judge_model_api_key=getenv("MODEL_JUDGE_API_KEY"),
192-
prometheus_config=prometheus_config,
193-
)
194-
195-
# 3. Define evaluation metrics
196-
metrics = [
197-
GEval(
198-
threshold=0.8,
199-
name="Base Evaluation",
200-
criteria=eval_principle_prompt,
201-
evaluation_params=[
202-
LLMTestCaseParams.INPUT,
203-
LLMTestCaseParams.ACTUAL_OUTPUT,
204-
LLMTestCaseParams.EXPECTED_OUTPUT,
205-
],
206-
),
207-
ToolCorrectnessMetric(
208-
threshold=0.5
209-
),
210-
]
211-
212-
# 4. Run evaluation
213-
eval_set_file_path = os.path.join(
214-
os.path.dirname(__file__), "builtin_tools", "evalsetf0aef1.evalset.json"
215-
)
216-
await evaluator.eval(eval_set_file_path=eval_set_file_path, metrics=metrics)
217-
```

0 commit comments

Comments
 (0)