|
| 1 | +import os |
| 2 | +import asyncio |
| 3 | +import logging |
| 4 | +from typing import Any, List, Dict |
| 5 | + |
| 6 | +import httpx |
| 7 | +from pydantic import BaseModel, Field, ConfigDict |
| 8 | +from crewai import Agent, Task, Crew |
| 9 | +from crewai_tools import tool |
| 10 | + |
| 11 | +# Assuming LLMSpec is defined elsewhere; placeholder import |
| 12 | +from agentic_security.http_spec import LLMSpec |
| 13 | + |
| 14 | +LLM_SPECS = [] # Populate with LLM spec strings if needed |
| 15 | + |
| 16 | +# Configure logging |
| 17 | +logging.basicConfig(level=logging.INFO) |
| 18 | +logger = logging.getLogger(__name__) |
| 19 | + |
| 20 | + |
| 21 | +# Define AgentSpecification model |
| 22 | +class AgentSpecification(BaseModel): |
| 23 | + name: str | None = Field(None, description="Name of the LLM/agent") |
| 24 | + version: str | None = Field(None, description="Version of the LLM/agent") |
| 25 | + description: str | None = Field(None, description="Description of the LLM/agent") |
| 26 | + capabilities: List[str] | None = Field(None, description="List of capabilities") |
| 27 | + configuration: Dict[str, Any] | None = Field( |
| 28 | + None, description="Configuration settings" |
| 29 | + ) |
| 30 | + endpoint: str | None = Field(None, description="Endpoint URL of the deployed agent") |
| 31 | + |
| 32 | + model_config = ConfigDict(arbitrary_types_allowed=True) |
| 33 | + |
| 34 | + |
| 35 | +# Define OperatorToolBox class (unchanged from original) |
| 36 | +class OperatorToolBox: |
| 37 | + def __init__(self, spec: AgentSpecification, datasets: List[Dict[str, Any]]): |
| 38 | + self.spec = spec |
| 39 | + self.datasets = datasets |
| 40 | + self.failures = [] |
| 41 | + self.llm_specs = [LLMSpec.from_string(spec) for spec in LLM_SPECS] |
| 42 | + |
| 43 | + def get_spec(self) -> AgentSpecification: |
| 44 | + return self.spec |
| 45 | + |
| 46 | + def get_datasets(self) -> List[Dict[str, Any]]: |
| 47 | + return self.datasets |
| 48 | + |
| 49 | + def validate(self) -> bool: |
| 50 | + if not self.spec.name or not self.spec.version: |
| 51 | + self.failures.append("Invalid specification: Name or version is missing.") |
| 52 | + return False |
| 53 | + if not self.datasets: |
| 54 | + self.failures.append("No datasets provided.") |
| 55 | + return False |
| 56 | + return True |
| 57 | + |
| 58 | + def stop(self) -> None: |
| 59 | + logger.info("Stopping the toolbox...") |
| 60 | + |
| 61 | + def run(self) -> None: |
| 62 | + logger.info("Running the toolbox...") |
| 63 | + |
| 64 | + def get_results(self) -> List[Dict[str, Any]]: |
| 65 | + return self.datasets |
| 66 | + |
| 67 | + def get_failures(self) -> List[str]: |
| 68 | + return self.failures |
| 69 | + |
| 70 | + def run_operation(self, operation: str) -> str: |
| 71 | + if operation not in ["dataset1", "dataset2", "dataset3"]: |
| 72 | + self.failures.append(f"Operation '{operation}' failed: Dataset not found.") |
| 73 | + return f"Operation '{operation}' failed: Dataset not found." |
| 74 | + return f"Operation '{operation}' executed successfully." |
| 75 | + |
| 76 | + async def test_llm_spec(self, llm_spec: LLMSpec, user_prompt: str) -> str: |
| 77 | + try: |
| 78 | + response = await llm_spec.verify() |
| 79 | + response.raise_for_status() |
| 80 | + logger.info(f"Verification succeeded for {llm_spec.url}") |
| 81 | + |
| 82 | + test_response = await llm_spec.probe(user_prompt) |
| 83 | + test_response.raise_for_status() |
| 84 | + response_data = test_response.json() |
| 85 | + return f"Test succeeded for {llm_spec.url}: {response_data}" |
| 86 | + except httpx.HTTPStatusError as e: |
| 87 | + self.failures.append(f"HTTP error occurred: {e}") |
| 88 | + logger.error(f"Test failed for {llm_spec.url}: {e}") |
| 89 | + return f"Test failed for {llm_spec.url}: {e}" |
| 90 | + except Exception as e: |
| 91 | + self.failures.append(f"An error occurred: {e}") |
| 92 | + logger.error(f"Test failed for {llm_spec.url}: {e}") |
| 93 | + return f"Test failed for {llm_spec.url}: {e}" |
| 94 | + |
| 95 | + async def test_with_prompt(self, spec_index: int, user_prompt: str) -> str: |
| 96 | + if not 0 <= spec_index < len(self.llm_specs): |
| 97 | + return f"Invalid spec index: {spec_index}. Valid range is 0 to {len(self.llm_specs) - 1}" |
| 98 | + llm_spec = self.llm_specs[spec_index] |
| 99 | + return await self.test_llm_spec(llm_spec, user_prompt) |
| 100 | + |
| 101 | + |
| 102 | +# Define CrewAI Tools |
| 103 | +@tool("validate_toolbox") |
| 104 | +def validate_toolbox(toolbox: OperatorToolBox) -> str: |
| 105 | + """Validate the toolbox configuration.""" |
| 106 | + is_valid = toolbox.validate() |
| 107 | + return ( |
| 108 | + "ToolBox validation successful." if is_valid else "ToolBox validation failed." |
| 109 | + ) |
| 110 | + |
| 111 | + |
| 112 | +@tool("execute_operation") |
| 113 | +def execute_operation(toolbox: OperatorToolBox, operation: str) -> str: |
| 114 | + """Execute a dataset operation.""" |
| 115 | + return toolbox.run_operation(operation) |
| 116 | + |
| 117 | + |
| 118 | +@tool("retrieve_results") |
| 119 | +def retrieve_results(toolbox: OperatorToolBox) -> str: |
| 120 | + """Retrieve the results of operations.""" |
| 121 | + results = toolbox.get_results() |
| 122 | + return ( |
| 123 | + f"Operation Results:\n{results}" |
| 124 | + if results |
| 125 | + else "No operations have been executed yet." |
| 126 | + ) |
| 127 | + |
| 128 | + |
| 129 | +@tool("retrieve_failures") |
| 130 | +def retrieve_failures(toolbox: OperatorToolBox) -> str: |
| 131 | + """Retrieve recorded failures.""" |
| 132 | + failures = toolbox.get_failures() |
| 133 | + return f"Failures:\n{failures}" if failures else "No failures recorded." |
| 134 | + |
| 135 | + |
| 136 | +@tool("list_llm_specs") |
| 137 | +def list_llm_specs(toolbox: OperatorToolBox) -> str: |
| 138 | + """List available LLM specifications.""" |
| 139 | + spec_list = "\n".join( |
| 140 | + f"{i}: {spec.url}" for i, spec in enumerate(toolbox.llm_specs) |
| 141 | + ) |
| 142 | + return f"Available LLM Specs:\n{spec_list}" |
| 143 | + |
| 144 | + |
| 145 | +@tool("test_llm_with_prompt") |
| 146 | +async def test_llm_with_prompt( |
| 147 | + toolbox: OperatorToolBox, spec_index: int, user_prompt: str |
| 148 | +) -> str: |
| 149 | + """Test an LLM spec with a user prompt.""" |
| 150 | + return await toolbox.test_with_prompt(spec_index, user_prompt) |
| 151 | + |
| 152 | + |
| 153 | +# Setup OperatorToolBox |
| 154 | +spec = AgentSpecification( |
| 155 | + name="DeepSeek Chat", |
| 156 | + version="1.0", |
| 157 | + description="A powerful language model", |
| 158 | + capabilities=["text-generation", "question-answering"], |
| 159 | + configuration={"max_tokens": 100}, |
| 160 | +) |
| 161 | +toolbox = OperatorToolBox( |
| 162 | + spec=spec, datasets=[{"id": "dataset1"}, {"id": "dataset2"}, {"id": "dataset3"}] |
| 163 | +) |
| 164 | + |
| 165 | +# Define CrewAI Agent |
| 166 | +dataset_manager_agent = Agent( |
| 167 | + role="Dataset Manager", |
| 168 | + goal="Manage and operate the OperatorToolBox to validate configurations, run operations, and test LLMs.", |
| 169 | + backstory="An expert in dataset management and LLM testing, designed to assist with toolbox operations.", |
| 170 | + verbose=True, |
| 171 | + llm="openai", # Using OpenAI-compatible API for DeepSeek; adjust if DeepSeek has a specific ID |
| 172 | + tools=[ |
| 173 | + validate_toolbox, |
| 174 | + execute_operation, |
| 175 | + retrieve_results, |
| 176 | + retrieve_failures, |
| 177 | + list_llm_specs, |
| 178 | + test_llm_with_prompt, |
| 179 | + ], |
| 180 | + allow_delegation=False, # Single agent, no delegation needed |
| 181 | +) |
| 182 | + |
| 183 | +# Define Tasks |
| 184 | +tasks = [ |
| 185 | + Task( |
| 186 | + description="Validate the toolbox configuration.", |
| 187 | + agent=dataset_manager_agent, |
| 188 | + expected_output="A string indicating whether validation succeeded or failed.", |
| 189 | + ), |
| 190 | + Task( |
| 191 | + description="List available LLM specifications.", |
| 192 | + agent=dataset_manager_agent, |
| 193 | + expected_output="A string listing available LLM specs.", |
| 194 | + ), |
| 195 | + Task( |
| 196 | + description="Guide the user to test an LLM with the prompt: 'Tell me a short story about a robot'. Suggest listing specs first.", |
| 197 | + agent=dataset_manager_agent, |
| 198 | + expected_output="A string suggesting the user list specs and proceed with testing.", |
| 199 | + ), |
| 200 | +] |
| 201 | + |
| 202 | +# Define Crew |
| 203 | +crew = Crew( |
| 204 | + agents=[dataset_manager_agent], |
| 205 | + tasks=tasks, |
| 206 | + verbose=2, # Detailed logging |
| 207 | +) |
| 208 | + |
| 209 | + |
| 210 | +# Async wrapper to handle async tools |
| 211 | +async def run_crew(): |
| 212 | + # Since CrewAI's process() is synchronous but our tool is async, we need to run it in an event loop |
| 213 | + result = ( |
| 214 | + crew.kickoff() |
| 215 | + ) # Synchronous call; async tools are awaited internally by CrewAI |
| 216 | + print("\nCrew Results:") |
| 217 | + for task_result in result: |
| 218 | + print(f"Task: {task_result.description}") |
| 219 | + print(f"Output: {task_result.output}\n") |
| 220 | + |
| 221 | + # Handle user interaction for LLM testing |
| 222 | + print("Please select a spec index from the listed specs and confirm to proceed.") |
| 223 | + user_input = ( |
| 224 | + input("Enter spec index and 'yes' to confirm (e.g., '0 yes'): ").strip().split() |
| 225 | + ) |
| 226 | + if len(user_input) == 2 and user_input[1].lower() == "yes": |
| 227 | + try: |
| 228 | + spec_index = int(user_input[0]) |
| 229 | + user_prompt = "Tell me a short story about a robot" |
| 230 | + # Create a new task for testing |
| 231 | + test_task = Task( |
| 232 | + description=f"Test LLM at index {spec_index} with prompt: '{user_prompt}'", |
| 233 | + agent=dataset_manager_agent, |
| 234 | + expected_output="A string with the test result from the LLM.", |
| 235 | + ) |
| 236 | + test_crew = Crew( |
| 237 | + agents=[dataset_manager_agent], tasks=[test_task], verbose=2 |
| 238 | + ) |
| 239 | + test_result = test_crew.kickoff() |
| 240 | + print(f"Test Output: {test_result[0].output}\n") |
| 241 | + except ValueError: |
| 242 | + print("Invalid spec index provided.\n") |
| 243 | + else: |
| 244 | + print("Test canceled. Please provide a valid index and confirmation.\n") |
| 245 | + |
| 246 | + |
| 247 | +# Ensure DeepSeek API key is set |
| 248 | +os.environ["OPENAI_API_KEY"] = os.environ.get( |
| 249 | + "DEEPSEEK_API_KEY", "" |
| 250 | +) # CrewAI uses OPENAI_API_KEY |
| 251 | +os.environ["OPENAI_MODEL_NAME"] = ( |
| 252 | + "deepseek:chat" # Specify DeepSeek model (adjust if needed) |
| 253 | +) |
| 254 | + |
| 255 | +if __name__ == "__main__": |
| 256 | + asyncio.run(run_crew()) |
0 commit comments