|
13 | 13 | # limitations under the License. |
14 | 14 |
|
15 | 15 |
|
| 16 | +import json |
16 | 17 | import time |
17 | 18 | import uuid |
18 | 19 | from abc import abstractmethod |
|
24 | 25 | from google.genai import types |
25 | 26 | from pydantic import BaseModel |
26 | 27 |
|
| 28 | +from veadk.utils.misc import formatted_timestamp |
| 29 | + |
27 | 30 |
|
28 | 31 | class InvocationTestData(BaseModel): |
29 | 32 | invocation_id: str = "" |
@@ -79,15 +82,139 @@ def __init__( |
79 | 82 | self.result_list: list[EvalResultData] = [] |
80 | 83 | self.agent_information_list: list[dict] = [] |
81 | 84 |
|
82 | | - def load_eval_set(self, eval_set_file: str) -> list[EvalSet]: |
| 85 | + def _load_eval_set(self, eval_set_file: str) -> EvalSet: |
83 | 86 | from .eval_set_file_loader import load_eval_set_from_file |
84 | 87 |
|
85 | 88 | return load_eval_set_from_file(eval_set_file) |
86 | 89 |
|
87 | | - def generate_eval_data(self, eval_set_file_path: str): |
| 90 | + def _load_eval_set_from_tracing(self, tracing_file: str) -> EvalSet: |
| 91 | + try: |
| 92 | + with open(tracing_file, "r") as f: |
| 93 | + tracing_data = json.load(f) |
| 94 | + except json.JSONDecodeError as e: |
| 95 | + raise ValueError(f"Invalid JSON format in file {tracing_file}: {e}") |
| 96 | + except Exception as e: |
| 97 | + raise ValueError(f"Error reading file {tracing_file}: {e}") |
| 98 | + |
| 99 | + # Group spans by trace_id |
| 100 | + trace_groups = {} |
| 101 | + for span in tracing_data: |
| 102 | + trace_id = span["trace_id"] |
| 103 | + if trace_id not in trace_groups: |
| 104 | + trace_groups[trace_id] = [] |
| 105 | + trace_groups[trace_id].append(span) |
| 106 | + |
| 107 | + # Convert to evalset format |
| 108 | + eval_cases, conversation = [], [] |
| 109 | + app_name, user_id = "", "" |
| 110 | + creation_timestamp = 0 |
| 111 | + for trace_id, spans in trace_groups.items(): |
| 112 | + tool_uses = [] |
| 113 | + |
| 114 | + # Extract tool_uses from spans with name starting with "execute_tool" |
| 115 | + for span in spans: |
| 116 | + if span["name"].startswith("execute_tool"): |
| 117 | + tool_uses.append( |
| 118 | + { |
| 119 | + "id": span["attributes"].get("gen_ai.tool.call.id", None), |
| 120 | + "args": json.loads( |
| 121 | + span["attributes"].get( |
| 122 | + "gcp.vertex.agent.tool_call_args", "{}" |
| 123 | + ) |
| 124 | + ), |
| 125 | + "name": span["attributes"].get("gen_ai.tool.name", None), |
| 126 | + } |
| 127 | + ) |
| 128 | + |
| 129 | + # Extract conversation data from spans with name starting with "invocation" |
| 130 | + for span in spans: |
| 131 | + if span["name"].startswith("invocation"): |
| 132 | + # Parse input.value and output.value as JSON |
| 133 | + input_value = json.loads( |
| 134 | + span["attributes"].get("input.value", "{}") |
| 135 | + ) |
| 136 | + output_value = json.loads( |
| 137 | + span["attributes"].get("output.value", "{}") |
| 138 | + ) |
| 139 | + |
| 140 | + user_content = json.loads(input_value.get("new_message", {})) |
| 141 | + final_response = json.loads(json.dumps(user_content)) |
| 142 | + final_response["parts"][0]["text"] = ( |
| 143 | + output_value.get("content", {}) |
| 144 | + .get("parts", [{}])[0] |
| 145 | + .get("text", None) |
| 146 | + ) |
| 147 | + final_response["role"] = None |
| 148 | + conversation.append( |
| 149 | + { |
| 150 | + "invocation_id": output_value.get( |
| 151 | + "invocation_id", str(uuid.uuid4()) |
| 152 | + ), |
| 153 | + "user_content": user_content, |
| 154 | + "final_response": final_response, |
| 155 | + "intermediate_data": { |
| 156 | + "tool_uses": tool_uses, |
| 157 | + "intermediate_responses": [], |
| 158 | + }, |
| 159 | + "creation_timestamp": span["start_time"] / 1e9, |
| 160 | + } |
| 161 | + ) |
| 162 | + user_id = input_value.get("user_id", None) |
| 163 | + app_name = ( |
| 164 | + span["name"].replace("invocation", "").strip().strip("[]") |
| 165 | + ) |
| 166 | + creation_timestamp = span["start_time"] / 1e9 |
| 167 | + |
| 168 | + eval_cases.append( |
| 169 | + { |
| 170 | + "eval_id": f"veadk_eval_{formatted_timestamp()}", |
| 171 | + "conversation": conversation, |
| 172 | + "session_input": { |
| 173 | + "app_name": app_name, |
| 174 | + "user_id": user_id, |
| 175 | + "state": {}, |
| 176 | + }, |
| 177 | + "creation_timestamp": creation_timestamp, |
| 178 | + } |
| 179 | + ) |
| 180 | + |
| 181 | + evalset = EvalSet( |
| 182 | + eval_set_id="default", |
| 183 | + name="default", |
| 184 | + description=None, |
| 185 | + eval_cases=eval_cases, |
| 186 | + creation_timestamp=creation_timestamp, |
| 187 | + ) |
| 188 | + |
| 189 | + return evalset |
| 190 | + |
| 191 | + def generate_eval_data(self, file_path: str): |
| 192 | + """Generate evaluation data from a given file and assign it to the class attribute `invocation_list`.""" |
88 | 193 | eval_case_data_list: list[EvalCaseData] = [] |
89 | 194 |
|
90 | | - eval_cases = self.load_eval_set(eval_set_file_path).eval_cases |
| 195 | + try: |
| 196 | + with open(file_path, "r") as f: |
| 197 | + file_content = json.load(f) |
| 198 | + except json.JSONDecodeError as e: |
| 199 | + raise ValueError(f"Invalid JSON format in file {file_path}: {e}") |
| 200 | + except Exception as e: |
| 201 | + raise ValueError(f"Error reading file {file_path}: {e}") |
| 202 | + |
| 203 | + if isinstance(file_content, dict) and "eval_cases" in file_content: |
| 204 | + eval_cases = self._load_eval_set(file_path).eval_cases |
| 205 | + elif ( |
| 206 | + isinstance(file_content, list) |
| 207 | + and len(file_content) > 0 |
| 208 | + and all( |
| 209 | + isinstance(span, dict) and "trace_id" in span for span in file_content |
| 210 | + ) |
| 211 | + ): |
| 212 | + eval_cases = self._load_eval_set_from_tracing(file_path).eval_cases |
| 213 | + else: |
| 214 | + raise ValueError( |
| 215 | + f"Unsupported file format in {file_path}. Please provide a valid file." |
| 216 | + ) |
| 217 | + |
91 | 218 | for eval_case in eval_cases: |
92 | 219 | eval_case_data = EvalCaseData(invocations=[]) |
93 | 220 | self.agent_information_list.append( |
|
0 commit comments