diff --git a/agents-core/vision_agents/core/agents/agents.py b/agents-core/vision_agents/core/agents/agents.py index 73726cc1..545840bd 100644 --- a/agents-core/vision_agents/core/agents/agents.py +++ b/agents-core/vision_agents/core/agents/agents.py @@ -5,12 +5,11 @@ import time import uuid from dataclasses import asdict -from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeGuard, Coroutine +from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeGuard from uuid import uuid4 import getstream.models from aiortc import VideoStreamTrack -from getstream.video.async_call import Call from getstream.video.rtc import Call from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import TrackType @@ -697,7 +696,7 @@ async def create_user(self) -> None: async def create_call(self, call_type: str, call_id: str) -> Call: """Shortcut for creating a call/room etc.""" call = self.edge.client.video.call(call_type, call_id) - response = await call.get_or_create(data={"created_by_id": self.agent_user.id}) + await call.get_or_create(data={"created_by_id": self.agent_user.id}) return call diff --git a/examples/01_simple_agent_example/simple_agent_example.py b/examples/01_simple_agent_example/simple_agent_example.py index b81cdb76..a063b765 100644 --- a/examples/01_simple_agent_example/simple_agent_example.py +++ b/examples/01_simple_agent_example/simple_agent_example.py @@ -1,15 +1,18 @@ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core import User, Agent +from vision_agents.core import User, Agent, cli +from vision_agents.core.agents import AgentLauncher from vision_agents.plugins import deepgram, getstream, gemini, vogent, elevenlabs # from vision_agents.core.profiling import Profiler +logger = logging.getLogger(__name__) + load_dotenv() -async def start_agent() -> None: +async def create_agent(**kwargs) -> Agent: llm = gemini.LLM("gemini-2.0-flash") # create an agent to run with Stream's edge, openAI llm agent = Agent( @@ -30,9 +33,14 @@ async def start_agent() -> None: # realtime version (vad, tts and stt not needed) # llm=openai.Realtime() ) + return agent + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() # Create a call - call = agent.edge.client.video.call("default", str(uuid4())) + call = await agent.create_call(call_type, call_id) # Have the agent join the call/room with await agent.join(call): @@ -89,4 +97,4 @@ def _flush_and_shutdown(): if __name__ == "__main__": # setup_telemetry() - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/02_golf_coach_example/golf_coach_example.py b/examples/02_golf_coach_example/golf_coach_example.py index e9ce7b2b..a186bd4d 100644 --- a/examples/02_golf_coach_example/golf_coach_example.py +++ b/examples/02_golf_coach_example/golf_coach_example.py @@ -1,15 +1,17 @@ -import asyncio -from uuid import uuid4 +import logging from dotenv import load_dotenv -from vision_agents.core import User, Agent +from vision_agents.core import User, Agent, cli +from vision_agents.core.agents import AgentLauncher from vision_agents.plugins import getstream, ultralytics, gemini +logger = logging.getLogger(__name__) + load_dotenv() -async def start_agent() -> None: +async def create_agent(**kwargs) -> Agent: agent = Agent( edge=getstream.Edge(), # use stream for edge video transport agent_user=User(name="AI golf coach"), @@ -20,9 +22,14 @@ async def start_agent() -> None: ultralytics.YOLOPoseProcessor(model_path="yolo11n-pose.pt") ], # realtime pose detection with yolo ) + return agent + - # create a call, some other video networks call this a room - call = agent.edge.client.video.call("default", str(uuid4())) +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # join the call and open a demo env with await agent.join(call): @@ -37,4 +44,4 @@ async def start_agent() -> None: if __name__ == "__main__": - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/09_github_mcp_demo/gemini_realtime_github_mcp_demo.py b/examples/other_examples/09_github_mcp_demo/gemini_realtime_github_mcp_demo.py index 8f1d285a..69eec9ec 100644 --- a/examples/other_examples/09_github_mcp_demo/gemini_realtime_github_mcp_demo.py +++ b/examples/other_examples/09_github_mcp_demo/gemini_realtime_github_mcp_demo.py @@ -5,13 +5,13 @@ using voice commands through the Gemini Live API. """ -import asyncio import logging import os -from uuid import uuid4 + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.mcp import MCPServerRemote from vision_agents.plugins.gemini.gemini_realtime import Realtime from vision_agents.plugins import getstream @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -async def start_agent(): +async def create_agent(**kwargs) -> Agent: """Demonstrate Gemini Realtime with GitHub MCP server integration.""" # Get GitHub PAT from environment @@ -34,14 +34,14 @@ async def start_agent(): if not github_pat: logger.error("GITHUB_PAT environment variable not found!") logger.error("Please set GITHUB_PAT in your .env file or environment") - return + raise ValueError("GITHUB_PAT environment variable not found") # Get Google API key from environment google_api_key = os.getenv("GOOGLE_API_KEY") if not google_api_key: logger.error("GOOGLE_API_KEY environment variable not found!") logger.error("Please set GOOGLE_API_KEY in your .env file or environment") - return + raise ValueError("GOOGLE_API_KEY environment variable not found") # Create GitHub MCP server github_server = MCPServerRemote( @@ -69,6 +69,10 @@ async def start_agent(): logger.info("Agent created with Gemini Realtime and GitHub MCP server") logger.info(f"GitHub server: {github_server}") + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: try: # Set up event handler for when participants join @agent.subscribe @@ -85,8 +89,10 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): f"Hello {event.participant.user.name}! I'm your GitHub AI assistant powered by Gemini Live. I have access to {len(mcp_functions)} GitHub tools and can help you with repositories, issues, pull requests, and more through voice commands!" ) + # ensure the agent user is created + await agent.create_user() # Create a call - call = agent.edge.client.video.call("default", str(uuid4())) + call = await agent.create_call(call_type, call_id) # Have the agent join the call/room logger.info("šŸŽ¤ Agent joining call...") @@ -123,4 +129,4 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): if __name__ == "__main__": - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/09_github_mcp_demo/github_mcp_demo.py b/examples/other_examples/09_github_mcp_demo/github_mcp_demo.py index e39e7eac..d57f95ec 100644 --- a/examples/other_examples/09_github_mcp_demo/github_mcp_demo.py +++ b/examples/other_examples/09_github_mcp_demo/github_mcp_demo.py @@ -5,13 +5,13 @@ by the LLM without any manual registration required. """ -import asyncio import logging import os -from uuid import uuid4 + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.mcp import MCPServerRemote from vision_agents.plugins.openai.openai_llm import OpenAILLM from vision_agents.plugins import elevenlabs, deepgram, silero, getstream @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -async def start_agent(): +async def create_agent(**kwargs) -> Agent: """Demonstrate GitHub MCP server integration.""" # Get GitHub PAT from environment @@ -34,7 +34,7 @@ async def start_agent(): if not github_pat: logger.error("GITHUB_PAT environment variable not found!") logger.error("Please set GITHUB_PAT in your .env file or environment") - return + raise ValueError("GITHUB_PAT environment variable not found") # Create GitHub MCP server github_server = MCPServerRemote( @@ -49,7 +49,7 @@ async def start_agent(): if not openai_api_key: logger.error("OPENAI_API_KEY environment variable not found!") logger.error("Please set OPENAI_API_KEY in your .env file or environment") - return + raise ValueError("OPENAI_API_KEY environment variable not found") # Create OpenAI LLM llm = OpenAILLM(model="gpt-4o", api_key=openai_api_key) @@ -74,6 +74,10 @@ async def start_agent(): logger.info("Agent created with GitHub MCP server") logger.info(f"GitHub server: {github_server}") + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: try: # Connect to GitHub MCP server with timeout logger.info("Connecting to GitHub MCP server...") @@ -95,8 +99,10 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): f"Hello {event.participant.user.name}! I'm your GitHub AI assistant with access to {len(mcp_functions)} GitHub tools. I can help you with repositories, issues, pull requests, and more!" ) + # ensure the agent user is created + await agent.create_user() # Create a call - call = agent.edge.client.video.call("default", str(uuid4())) + call = await agent.create_call(call_type, call_id) # Have the agent join the call/room logger.info("šŸŽ¤ Agent joining call...") @@ -126,4 +132,4 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): if __name__ == "__main__": - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/09_github_mcp_demo/openai_realtime_github_mcp_demo.py b/examples/other_examples/09_github_mcp_demo/openai_realtime_github_mcp_demo.py index ef88c62d..7d703239 100644 --- a/examples/other_examples/09_github_mcp_demo/openai_realtime_github_mcp_demo.py +++ b/examples/other_examples/09_github_mcp_demo/openai_realtime_github_mcp_demo.py @@ -5,13 +5,13 @@ using voice commands through the OpenAI Realtime API. """ -import asyncio import logging import os -from uuid import uuid4 + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.mcp import MCPServerRemote from vision_agents.plugins.openai.openai_realtime import Realtime from vision_agents.plugins import getstream @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -async def start_agent(): +async def create_agent(**kwargs) -> Agent: """Demonstrate OpenAI Realtime with GitHub MCP server integration.""" # Get GitHub PAT from environment @@ -34,14 +34,14 @@ async def start_agent(): if not github_pat: logger.error("GITHUB_PAT environment variable not found!") logger.error("Please set GITHUB_PAT in your .env file or environment") - return + raise ValueError("GITHUB_PAT environment variable not found") # Check OpenAI API key from environment openai_api_key = os.getenv("OPENAI_API_KEY") if not openai_api_key: logger.error("OPENAI_API_KEY environment variable not found!") logger.error("Please set OPENAI_API_KEY in your .env file or environment") - return + raise ValueError("OPENAI_API_KEY environment variable not found") # Create GitHub MCP server github_server = MCPServerRemote( @@ -71,6 +71,10 @@ async def start_agent(): logger.info("Agent created with OpenAI Realtime and GitHub MCP server") logger.info(f"GitHub server: {github_server}") + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: try: # Set up event handler for when participants join @agent.subscribe @@ -87,8 +91,10 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): f"Hello {event.participant.user.name}! I'm your GitHub AI assistant powered by OpenAI Realtime. I have access to {len(mcp_functions)} GitHub tools and can help you with repositories, issues, pull requests, and more through voice commands!" ) + # ensure the agent user is created + await agent.create_user() # Create a call - call = agent.edge.client.video.call("default", str(uuid4())) + call = await agent.create_call(call_type, call_id) # Have the agent join the call/room logger.info("šŸŽ¤ Agent joining call...") @@ -125,4 +131,4 @@ async def on_participant_joined(event: CallSessionParticipantJoinedEvent): if __name__ == "__main__": - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/gemini_live_realtime/gemini_live_example.py b/examples/other_examples/gemini_live_realtime/gemini_live_example.py index 75b29c69..71ec0d49 100644 --- a/examples/other_examples/gemini_live_realtime/gemini_live_example.py +++ b/examples/other_examples/gemini_live_realtime/gemini_live_example.py @@ -1,12 +1,10 @@ -import asyncio import logging -from uuid import uuid4 from dotenv import load_dotenv -from getstream import AsyncStream from vision_agents.core.edge.types import User -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.plugins import gemini, getstream load_dotenv() @@ -18,9 +16,7 @@ logger = logging.getLogger(__name__) -async def start_agent() -> None: - client = AsyncStream() - +async def create_agent(**kwargs) -> Agent: agent = Agent( edge=getstream.Edge(), agent_user=User( @@ -30,17 +26,20 @@ async def start_agent() -> None: llm=gemini.Realtime(), processors=[], # processors can fetch extra data, check images/audio data or transform video ) - - call = client.video.call("default", str(uuid4())) + return agent +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) with await agent.join(call): - await asyncio.sleep(5) await agent.edge.open_demo(call) await agent.llm.simple_response(text="Describe what you see and say hi") await agent.finish() # run till the call ends if __name__ == "__main__": - asyncio.run(start_agent()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/audio_moderation/main.py b/examples/other_examples/plugins_examples/audio_moderation/main.py index e970f9f1..fca397ac 100644 --- a/examples/other_examples/plugins_examples/audio_moderation/main.py +++ b/examples/other_examples/plugins_examples/audio_moderation/main.py @@ -24,16 +24,17 @@ import uuid from dotenv import load_dotenv -from uuid import uuid4 from getstream.stream import Stream from getstream.models import CheckResponse, ModerationPayload -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import deepgram, getstream, openai from vision_agents.core.stt.events import STTTranscriptEvent, STTErrorEvent logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) # Suppress dataclasses_json missing value RuntimeWarnings warnings.filterwarnings( @@ -58,8 +59,13 @@ def moderate(client: Stream, text: str, user_name: str) -> CheckResponse: ).data -async def main(): - load_dotenv() +load_dotenv() + +# Global client for moderation +client = Stream.from_env() + + +async def create_agent(**kwargs) -> Agent: print("\nšŸ¤– Starting moderation bot...") print("The bot will join the call and moderate all audio it receives.") print( @@ -106,12 +112,18 @@ async def handle_stt_error(event: STTErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and start conversation with await agent.join(call): + await agent.edge.open_demo(call) print("šŸŽ§ Listening for audio... (Press Ctrl+C to stop)") await agent.finish() @@ -147,7 +159,6 @@ def setup_moderation_config(client: Stream): args = parse_args() if args.setup: - client = Stream.from_env() setup_moderation_config(client) - - asyncio.run(main()) + else: + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/mcp/main.py b/examples/other_examples/plugins_examples/mcp/main.py index f348bdbe..b60f765e 100644 --- a/examples/other_examples/plugins_examples/mcp/main.py +++ b/examples/other_examples/plugins_examples/mcp/main.py @@ -15,15 +15,18 @@ - Install dependencies: pip install -e . """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import deepgram, elevenlabs, openai, getstream from vision_agents.core.mcp import MCPBaseServer +logger = logging.getLogger(__name__) + # Example MCP server for demonstration class ExampleMCPServer(MCPBaseServer): """Example MCP server that provides weather information.""" @@ -56,7 +59,8 @@ async def call_tool(self, name: str, arguments: dict): load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with MCP servers agent = Agent( edge=getstream.Edge(), @@ -67,15 +71,21 @@ async def main(): tts=elevenlabs.TTS(), mcp_servers=[ExampleMCPServer()], ) + return agent - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and start MCP-enabled conversation with await agent.join(call): + await agent.edge.open_demo(call) await agent.say("Hello! I have access to MCP tools including weather information. How can I help you?") await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) \ No newline at end of file diff --git a/examples/other_examples/plugins_examples/stt_deepgram_transcription/main.py b/examples/other_examples/plugins_examples/stt_deepgram_transcription/main.py index 830199fe..db090a0c 100644 --- a/examples/other_examples/plugins_examples/stt_deepgram_transcription/main.py +++ b/examples/other_examples/plugins_examples/stt_deepgram_transcription/main.py @@ -17,19 +17,23 @@ - Install dependencies: uv sync """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.core.stt.events import STTTranscriptEvent, STTErrorEvent from vision_agents.core.llm.events import LLMTextResponseCompletedEvent from vision_agents.plugins import deepgram, openai, getstream, elevenlabs +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with STT + LLM + TTS for conversation agent = Agent( edge=getstream.Edge(), @@ -73,15 +77,21 @@ async def handle_stt_error(event: STTErrorEvent): if event.context: agent.logger.error(f" └─ context: {event.context}") + return agent - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and start conversation with await agent.join(call): + await agent.edge.open_demo(call) await agent.say("Hello! I'm your transcription bot. I'll listen to what you say, transcribe it, and respond to you. Try saying something!") await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/stt_moonshine_transcription/main.py b/examples/other_examples/plugins_examples/stt_moonshine_transcription/main.py index eee72776..53546893 100644 --- a/examples/other_examples/plugins_examples/stt_moonshine_transcription/main.py +++ b/examples/other_examples/plugins_examples/stt_moonshine_transcription/main.py @@ -16,18 +16,22 @@ - Install dependencies: pip install -e . """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import moonshine, openai, getstream from vision_agents.core.stt.events import STTTranscriptEvent, STTErrorEvent +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with STT + LLM for conversation agent = Agent( edge=getstream.Edge(), @@ -59,14 +63,21 @@ async def handle_stt_error(event: STTErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and start conversation with await agent.join(call): + await agent.edge.open_demo(call) await agent.simple_response("Hello! I can transcribe your speech and respond to you.") await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/tts_cartesia/main.py b/examples/other_examples/plugins_examples/tts_cartesia/main.py index 9da31467..af83c583 100644 --- a/examples/other_examples/plugins_examples/tts_cartesia/main.py +++ b/examples/other_examples/plugins_examples/tts_cartesia/main.py @@ -17,19 +17,23 @@ CARTESIA_API_KEY """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import cartesia, getstream, openai from vision_agents.core.events import CallSessionParticipantJoinedEvent from vision_agents.core.tts.events import TTSAudioEvent, TTSErrorEvent +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with TTS agent = Agent( edge=getstream.Edge(), @@ -56,13 +60,20 @@ async def handle_tts_error(event: TTSErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and wait with await agent.join(call): + await agent.edge.open_demo(call) await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/tts_elevenlabs/main.py b/examples/other_examples/plugins_examples/tts_elevenlabs/main.py index 58290892..585fac70 100644 --- a/examples/other_examples/plugins_examples/tts_elevenlabs/main.py +++ b/examples/other_examples/plugins_examples/tts_elevenlabs/main.py @@ -17,19 +17,23 @@ ELEVENLABS_API_KEY """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import elevenlabs, getstream, openai from vision_agents.core.events import CallSessionParticipantJoinedEvent from vision_agents.core.tts.events import TTSAudioEvent, TTSErrorEvent +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with TTS agent = Agent( edge=getstream.Edge(), @@ -56,13 +60,20 @@ async def handle_tts_error(event: TTSErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and wait with await agent.join(call): + await agent.edge.open_demo(call) await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/tts_kokoro/main.py b/examples/other_examples/plugins_examples/tts_kokoro/main.py index b36ad008..a7f1652e 100644 --- a/examples/other_examples/plugins_examples/tts_kokoro/main.py +++ b/examples/other_examples/plugins_examples/tts_kokoro/main.py @@ -17,19 +17,23 @@ KOKORO_API_KEY """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import kokoro, getstream, openai from vision_agents.core.events import CallSessionParticipantJoinedEvent from vision_agents.core.tts.events import TTSAudioEvent, TTSErrorEvent +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with TTS agent = Agent( edge=getstream.Edge(), @@ -56,13 +60,20 @@ async def handle_tts_error(event: TTSErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and wait with await agent.join(call): + await agent.edge.open_demo(call) await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/vad_silero/main.py b/examples/other_examples/plugins_examples/vad_silero/main.py index 67bbab76..b7cfd52b 100644 --- a/examples/other_examples/plugins_examples/vad_silero/main.py +++ b/examples/other_examples/plugins_examples/vad_silero/main.py @@ -13,18 +13,22 @@ `STREAM_API_KEY`, `STREAM_API_SECRET` (and optionally `STREAM_BASE_URL`). """ -import asyncio -from uuid import uuid4 +import logging + from dotenv import load_dotenv -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.core.vad.events import VADAudioEvent, VADErrorEvent from vision_agents.plugins import silero, openai, getstream +logger = logging.getLogger(__name__) + load_dotenv() -async def main(): + +async def create_agent(**kwargs) -> Agent: # Create agent with VAD + LLM for conversation agent = Agent( edge=getstream.Edge(), @@ -52,14 +56,21 @@ async def handle_vad_error(event: VADErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", str(uuid4())) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) # Join call and start conversation with await agent.join(call): + await agent.edge.open_demo(call) await agent.simple_response("Hello! I can detect when you speak and respond to you.") await agent.finish() + if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/video_moderation/main.py b/examples/other_examples/plugins_examples/video_moderation/main.py index b39c3c77..3458885c 100644 --- a/examples/other_examples/plugins_examples/video_moderation/main.py +++ b/examples/other_examples/plugins_examples/video_moderation/main.py @@ -31,12 +31,14 @@ from getstream.models import UserRequest from getstream.stream import Stream from getstream.models import CheckResponse, ModerationPayload -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import deepgram, getstream, openai from vision_agents.core.stt.events import STTTranscriptEvent, STTErrorEvent logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) # Suppress dataclasses_json missing value RuntimeWarnings warnings.filterwarnings( @@ -102,27 +104,13 @@ def moderate(client: Stream, text: str, user_name: str) -> CheckResponse: ).data -async def main(): - # Load environment variables - load_dotenv() - - # Initialize Stream client from ENV - client = Stream.from_env() - - # Create a unique call ID for this session - call_id = str(uuid.uuid4()) - print(f"šŸ“ž Call ID: {call_id}") - - user_id = f"user-{uuid.uuid4()}" - create_user(client, user_id, "My User") - logging.info("šŸ‘¤ Created user: %s", user_id) +load_dotenv() - user_token = client.create_token(user_id, expiration=3600) - logging.info("šŸ”‘ Created token for user: %s", user_id) +# Global client for moderation +client = Stream.from_env() - # Open browser for users to join with the user token - open_browser(client.api_key, user_token, call_id) +async def create_agent(**kwargs) -> Agent: print("\nšŸ¤– Starting moderation bot...") print("The bot will join the call and moderate all audio it receives.") print( @@ -169,14 +157,30 @@ async def handle_stt_error(event: STTErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", call_id) - call.get_or_create(data={"created_by_id": "moderation-bot"}) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # Create a demo user for browser testing + user_id = f"user-{uuid.uuid4()}" + create_user(client, user_id, "My User") + logging.info("šŸ‘¤ Created user: %s", user_id) + + user_token = client.create_token(user_id, expiration=3600) + logging.info("šŸ”‘ Created token for user: %s", user_id) + + # Open browser for users to join with the user token + open_browser(client.api_key, user_token, call_id) try: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) + # Join call and start moderation with await agent.join(call): + await agent.edge.open_demo(call) print("šŸŽ§ Listening for audio... (Press Ctrl+C to stop)") await agent.finish() except asyncio.CancelledError: @@ -221,7 +225,6 @@ def setup_moderation_config(client: Stream): args = parse_args() if args.setup: - client = Stream.from_env() setup_moderation_config(client) - - asyncio.run(main()) + else: + cli(AgentLauncher(create_agent=create_agent, join_call=join_call)) diff --git a/examples/other_examples/plugins_examples/wizper_stt_translate/main.py b/examples/other_examples/plugins_examples/wizper_stt_translate/main.py index d85c6b2b..82435fc1 100644 --- a/examples/other_examples/plugins_examples/wizper_stt_translate/main.py +++ b/examples/other_examples/plugins_examples/wizper_stt_translate/main.py @@ -28,13 +28,15 @@ from getstream.models import UserRequest from getstream.stream import Stream -from vision_agents.core.agents import Agent +from vision_agents.core.agents import Agent, AgentLauncher +from vision_agents.core import cli from vision_agents.core.edge.types import User from vision_agents.plugins import wizper, silero, getstream, openai from vision_agents.core.stt.events import STTTranscriptEvent, STTErrorEvent from vision_agents.core.vad.events import VADAudioEvent, VADErrorEvent logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) def create_user(client: Stream, id: str, name: str) -> None: @@ -78,31 +80,16 @@ def open_browser(api_key: str, token: str, call_id: str) -> str: return url -async def main(): - """Main example function.""" - print("šŸŽ™ļø Stream + Fal Real-time Transcription Example") - print("=" * 55) - - # Load environment variables - load_dotenv() +load_dotenv() - # Initialize Stream client from ENV - client = Stream.from_env() +# Global client +client = Stream.from_env() - # Create a unique call ID for this session - call_id = str(uuid.uuid4()) - print(f"šŸ“ž Call ID: {call_id}") - - user_id = f"user-{uuid.uuid4()}" - create_user(client, user_id, "My User") - logging.info("šŸ‘¤ Created user: %s", user_id) - - user_token = client.create_token(user_id, expiration=3600) - logging.info("šŸ”‘ Created token for user: %s", user_id) - - # Open browser for users to join with the user token - open_browser(client.api_key, user_token, call_id) +async def create_agent(**kwargs) -> Agent: + """Main example function.""" + print("šŸŽ™ļø Stream + Fal Real-time Transcription Example") + print("=" * 55) print("\nšŸ¤– Starting transcription bot...") print( "The bot will join the call and transcribe all audio it receives, optionally translating it to French." @@ -158,14 +145,30 @@ async def handle_vad_error(event: VADErrorEvent): if event.context: print(f" └─ context: {event.context}") - # Create call and open demo - call = agent.edge.client.video.call("default", call_id) - call.get_or_create(data={"created_by_id": "transcription-bot"}) - agent.edge.open_demo(call) + return agent + + +async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None: + # Create a demo user for browser testing + user_id = f"user-{uuid.uuid4()}" + create_user(client, user_id, "My User") + logging.info("šŸ‘¤ Created user: %s", user_id) + + user_token = client.create_token(user_id, expiration=3600) + logging.info("šŸ”‘ Created token for user: %s", user_id) + + # Open browser for users to join with the user token + open_browser(client.api_key, user_token, call_id) try: + # ensure the agent user is created + await agent.create_user() + # Create a call + call = await agent.create_call(call_type, call_id) + # Join call and start transcription with await agent.join(call): + await agent.edge.open_demo(call) print("šŸŽ§ Listening for audio... (Press Ctrl+C to stop)") await agent.finish() except asyncio.CancelledError: @@ -180,4 +183,4 @@ async def handle_vad_error(event: VADErrorEvent): if __name__ == "__main__": - asyncio.run(main()) + cli(AgentLauncher(create_agent=create_agent, join_call=join_call))