@@ -154,64 +154,3 @@ evaluator = DeepevalEvaluator(
154154 prometheus_config = prometheus_config,
155155)
156156```
157-
158- ## 完整示例
159-
160- 以下是使用 DeepEval 评测器的完整例子。其中定义了 [ GEval] ( https://deepeval.com/docs/metrics-llm-evals ) 指标和 [ ToolCorrectnessMetric] ( https://deepeval.com/docs/metrics-tool-correctness ) 指标,分别用于整体输出质量评估和工具调用正确率评估,并将评测结果上报至火山引擎的 VMP 平台:
161-
162- ``` python
163- import asyncio
164- import os
165- from builtin_tools.agent import agent
166-
167- from deepeval.metrics import GEval, ToolCorrectnessMetric
168- from deepeval.test_case import LLMTestCaseParams
169- from veadk.config import getenv
170- from veadk.evaluation.deepeval_evaluator import DeepevalEvaluator
171- from veadk.evaluation.utils.prometheus import PrometheusPushgatewayConfig
172- from veadk.prompts.prompt_evaluator import eval_principle_prompt
173-
174- prometheus_config = PrometheusPushgatewayConfig()
175-
176- # 1. Rollout, and generate eval set file
177- # await agent.run(
178- # prompt,
179- # collect_runtime_data=True,
180- # eval_set_id=f"eval_demo_set_{get_current_time()}",
181- # )
182- # # get expect output
183- # dump_path = agent._dump_path
184- # assert dump_path != "", "Dump eval set file failed! Please check runtime logs."
185-
186- # 2. Evaluate in terms of eval set file
187- evaluator = DeepevalEvaluator(
188- agent = agent,
189- judge_model_name = getenv(" MODEL_JUDGE_NAME" ),
190- judge_model_api_base = getenv(" MODEL_JUDGE_API_BASE" ),
191- judge_model_api_key = getenv(" MODEL_JUDGE_API_KEY" ),
192- prometheus_config = prometheus_config,
193- )
194-
195- # 3. Define evaluation metrics
196- metrics = [
197- GEval(
198- threshold = 0.8 ,
199- name = " Base Evaluation" ,
200- criteria = eval_principle_prompt,
201- evaluation_params = [
202- LLMTestCaseParams.INPUT ,
203- LLMTestCaseParams.ACTUAL_OUTPUT ,
204- LLMTestCaseParams.EXPECTED_OUTPUT ,
205- ],
206- ),
207- ToolCorrectnessMetric(
208- threshold = 0.5
209- ),
210- ]
211-
212- # 4. Run evaluation
213- eval_set_file_path = os.path.join(
214- os.path.dirname(__file__ ), " builtin_tools" , " evalsetf0aef1.evalset.json"
215- )
216- await evaluator.eval(eval_set_file_path = eval_set_file_path, metrics = metrics)
217- ```
0 commit comments