2828from veadk .utils .misc import formatted_timestamp
2929
3030
31- class InvocationTestData (BaseModel ):
31+ class ToolInvocation (BaseModel ):
32+ tool_name : str
33+ tool_args : dict [str , Any ] = {}
34+ tool_result : Any = None
35+
36+
37+ class Invocation (BaseModel ):
3238 invocation_id : str = ""
3339 input : str
3440 actual_output : str
@@ -38,8 +44,8 @@ class InvocationTestData(BaseModel):
3844 latency : str = "" # ms
3945
4046
41- class EvalCaseData (BaseModel ):
42- invocations : list [InvocationTestData ]
47+ class EvalTestCase (BaseModel ):
48+ invocations : list [Invocation ]
4349
4450
4551class MetricResult (BaseModel ):
@@ -78,23 +84,23 @@ def __init__(
7884 ):
7985 self .name = name
8086 self .agent = agent
81- self .invocation_list : list [EvalCaseData ] = []
87+ self .invocation_list : list [EvalTestCase ] = []
8288 self .result_list : list [EvalResultData ] = []
8389 self .agent_information_list : list [dict ] = []
8490
85- def _load_eval_set (self , eval_set_file : str ) -> EvalSet :
86- from .eval_set_file_loader import load_eval_set_from_file
91+ def _build_eval_set_from_eval_json (self , eval_json_path : str ) -> EvalSet :
92+ from veadk . evaluation .eval_set_file_loader import load_eval_set_from_file
8793
88- return load_eval_set_from_file (eval_set_file )
94+ return load_eval_set_from_file (eval_json_path )
8995
90- def _load_eval_set_from_tracing (self , tracing_file : str ) -> EvalSet :
96+ def _build_eval_set_from_tracing_json (self , tracing_json_path : str ) -> EvalSet :
9197 try :
92- with open (tracing_file , "r" ) as f :
98+ with open (tracing_json_path , "r" ) as f :
9399 tracing_data = json .load (f )
94100 except json .JSONDecodeError as e :
95- raise ValueError (f"Invalid JSON format in file { tracing_file } : { e } " )
101+ raise ValueError (f"Invalid JSON format in file { tracing_json_path } : { e } " )
96102 except Exception as e :
97- raise ValueError (f"Error reading file { tracing_file } : { e } " )
103+ raise ValueError (f"Error reading file { tracing_json_path } : { e } " )
98104
99105 # Group spans by trace_id
100106 trace_groups = {}
@@ -188,9 +194,9 @@ def _load_eval_set_from_tracing(self, tracing_file: str) -> EvalSet:
188194
189195 return evalset
190196
191- def generate_eval_data (self , file_path : str ):
197+ def build_eval_set (self , file_path : str ):
192198 """Generate evaluation data from a given file and assign it to the class attribute `invocation_list`."""
193- eval_case_data_list : list [EvalCaseData ] = []
199+ eval_case_data_list : list [EvalTestCase ] = []
194200
195201 try :
196202 with open (file_path , "r" ) as f :
@@ -201,22 +207,22 @@ def generate_eval_data(self, file_path: str):
201207 raise ValueError (f"Error reading file { file_path } : { e } " )
202208
203209 if isinstance (file_content , dict ) and "eval_cases" in file_content :
204- eval_cases = self ._load_eval_set (file_path ).eval_cases
210+ eval_cases = self ._build_eval_set_from_eval_json (file_path ).eval_cases
205211 elif (
206212 isinstance (file_content , list )
207213 and len (file_content ) > 0
208214 and all (
209215 isinstance (span , dict ) and "trace_id" in span for span in file_content
210216 )
211217 ):
212- eval_cases = self ._load_eval_set_from_tracing (file_path ).eval_cases
218+ eval_cases = self ._build_eval_set_from_tracing_json (file_path ).eval_cases
213219 else :
214220 raise ValueError (
215221 f"Unsupported file format in { file_path } . Please provide a valid file."
216222 )
217223
218224 for eval_case in eval_cases :
219- eval_case_data = EvalCaseData (invocations = [])
225+ eval_case_data = EvalTestCase (invocations = [])
220226 if eval_case .session_input :
221227 self .agent_information_list .append (
222228 {
@@ -247,7 +253,7 @@ def generate_eval_data(self, file_path: str):
247253 )
248254
249255 eval_case_data .invocations .append (
250- InvocationTestData (
256+ Invocation (
251257 invocation_id = invocation .invocation_id ,
252258 input = _input ,
253259 actual_output = "" ,
@@ -261,7 +267,7 @@ def generate_eval_data(self, file_path: str):
261267 eval_case_data_list .append (eval_case_data )
262268 self .invocation_list = eval_case_data_list
263269
264- async def _run_agent_for_actual_data (self ):
270+ async def generate_actual_outputs (self ):
265271 for eval_case_data , agent_information in zip (
266272 self .invocation_list , self .agent_information_list
267273 ):
@@ -333,7 +339,7 @@ async def _run_agent_for_actual_data(self):
333339 invocation .actual_tool = _actual_tool
334340 invocation .latency = _latency
335341
336- def get_data (self ) -> list [list [dict [str , Any ]]]:
342+ def get_eval_set_information (self ) -> list [list [dict [str , Any ]]]:
337343 """Merge the evaluation data and return it in the format of list[list[dict]]"""
338344 result = []
339345 for i , eval_case in enumerate (self .invocation_list ):
@@ -360,7 +366,7 @@ def get_data(self) -> list[list[dict[str, Any]]]:
360366 return result
361367
362368 @abstractmethod
363- async def eval (
369+ async def evaluate (
364370 self ,
365371 eval_set_file_path : str ,
366372 metrics : list [Any ],
0 commit comments