Skip to content

Commit ef4d050

Browse files
committed
Add workflow and voice-agent types
1 parent 8876305 commit ef4d050

18 files changed

+562
-197
lines changed

llmstack/apps/apis.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -606,15 +606,9 @@ def post(self, request):
606606
app_owner_profile = get_object_or_404(Profile, user=owner)
607607
app_type_slug = request.data["type_slug"] if "type_slug" in request.data else None
608608
app_type = (
609-
get_object_or_404(
610-
AppType,
611-
id=request.data["app_type"],
612-
)
609+
AppType.objects.filter(id=request.data["app_type"]).first()
613610
if "app_type" in request.data
614-
else get_object_or_404(
615-
AppType,
616-
slug=app_type_slug,
617-
)
611+
else AppType.objects.filter(slug=app_type_slug).first()
618612
)
619613
app_name = request.data["name"]
620614
app_description = request.data["description"] if "description" in request.data else ""
@@ -670,6 +664,7 @@ def post(self, request):
670664
owner=owner,
671665
description=app_description,
672666
type=app_type,
667+
type_slug=app_type.slug if app_type else app_type_slug,
673668
template_slug=template_slug,
674669
web_integration_config=web_integration_config,
675670
slack_integration_config=slack_integration_config,
@@ -679,7 +674,7 @@ def post(self, request):
679674
app_data = {
680675
"name": app_name,
681676
"description": app_description,
682-
"type_slug": app_type.slug,
677+
"type_slug": app_type.slug if app_type else app_type_slug,
683678
"description": app_description,
684679
"config": app_config,
685680
"input_fields": app_input_fields,

llmstack/apps/app_types.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import uuid
22

3-
from llmstack.apps.models import App, AppType
3+
from llmstack.apps.models import App
44

55
from .types.agent import Agent # noqa: F401
66
from .types.app_type_interface import AppTypeInterface
77
from .types.chat import ChatApp # noqa: F401
88
from .types.discord import DiscordApp # noqa: F401
99
from .types.slack import SlackApp # noqa: F401
1010
from .types.twilio import TwilioApp # noqa: F401
11+
from .types.voice_agent import VoiceAgent # noqa: F401
1112
from .types.web import WebApp # noqa: F401
13+
from .types.workflow import Workflow # noqa: F401
1214

1315

1416
class AppTypeFactory:
@@ -18,7 +20,7 @@ class AppTypeFactory:
1820

1921
@staticmethod
2022
def get_app_type_handler(
21-
app_type: AppType,
23+
app_type_slug: str,
2224
platform: str = None,
2325
) -> AppTypeInterface:
2426
subclasses = AppTypeInterface.__subclasses__()
@@ -31,7 +33,7 @@ def get_app_type_handler(
3133

3234
# Match with slug
3335
for subclass in subclasses:
34-
if subclass.slug() == app_type.slug.lower():
36+
if subclass.slug() == app_type_slug.lower():
3537
return subclass
3638

3739
return None
@@ -40,7 +42,7 @@ def get_app_type_handler(
4042
def get_app_type_signature_verifier(app_id: str, platform: str = "web"):
4143
app = App.objects.get(uuid=uuid.UUID(app_id))
4244
app_type_handler = AppTypeFactory.get_app_type_handler(
43-
app.type,
45+
app.type.slug if app.type else app.type_slug,
4446
platform,
4547
)
4648

llmstack/apps/runner/agent_actor.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
)
1717
from llmstack.apps.runner.output_actor import OutputActor
1818
from llmstack.common.utils.liquid import render_template
19-
from llmstack.common.utils.provider_config import get_matched_provider_config
2019
from llmstack.play.actor import BookKeepingData
2120
from llmstack.play.messages import ContentData, Error, Message, MessageType
2221
from llmstack.play.output_stream import stitch_model_objects
@@ -34,6 +33,7 @@ def __init__(
3433
dependencies: list = [],
3534
templates: Dict[str, str] = {},
3635
agent_config: Dict[str, Any] = {},
36+
is_voice_agent: bool = False,
3737
metadata: Dict[str, Any] = {},
3838
provider_configs: Dict[str, Any] = {},
3939
tools: List[Dict] = [],
@@ -42,25 +42,13 @@ def __init__(
4242
self._process_output_task = None
4343
self._config = agent_config
4444
self._provider_configs = provider_configs
45-
self._provider_slug = self._config.get("provider_slug", "openai")
46-
self._model_slug = self._config.get("model", "gpt-4o-mini")
47-
self._provider_config = get_matched_provider_config(
48-
provider_configs=self._provider_configs,
49-
provider_slug=self._provider_slug,
50-
model_slug=self._model_slug,
51-
)
52-
self._realtime = self._config.get("realtime", False)
45+
self._is_voice_agent = is_voice_agent
5346

5447
self._controller_config = AgentControllerConfig(
5548
provider_configs=self._provider_configs,
56-
provider_config=self._provider_config,
57-
provider_slug=self._provider_slug,
58-
model_slug=self._model_slug,
59-
system_message=self._config.get("system_message", "You are a helpful assistant."),
49+
agent_config=self._config,
50+
is_voice_agent=self._is_voice_agent,
6051
tools=tools,
61-
stream=True if self._config.get("stream") is None else self._config.get("stream"),
62-
realtime=self._realtime,
63-
max_steps=min(self._config.get("max_steps", 30), 100),
6452
metadata=metadata,
6553
)
6654

@@ -245,18 +233,18 @@ async def _process_output(self):
245233
"usage_metrics": [
246234
("promptly/*/*/*", MetricType.INVOCATION, (ProviderConfigSource.PLATFORM_DEFAULT, 1)),
247235
(
248-
f"{self._provider_slug}/*/{self._model_slug}/*",
236+
controller_output.data.provider,
249237
MetricType.INPUT_TOKENS,
250238
(
251-
self._provider_config.provider_config_source,
239+
controller_output.data.source,
252240
controller_output.data.prompt_tokens,
253241
),
254242
),
255243
(
256-
f"{self._provider_slug}/*/{self._model_slug}/*",
244+
controller_output.data.provider,
257245
MetricType.OUTPUT_TOKENS,
258246
(
259-
self._provider_config.provider_config_source,
247+
controller_output.data.source,
260248
controller_output.data.completion_tokens,
261249
),
262250
),
@@ -278,8 +266,8 @@ async def _process_output(self):
278266
def on_receive(self, message: Message) -> None:
279267
if message.type == MessageType.CONTENT:
280268
if message.sender == "_inputs0":
281-
if self._realtime:
282-
# For realtime, we send both text and audio streams if available
269+
if self._is_voice_agent:
270+
# For voice agents, we send both text and audio streams if available
283271
content = []
284272
if message.data.content.get("text", None):
285273
content.append(

llmstack/apps/runner/agent_controller.py

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,51 @@
55
import queue
66
import ssl
77
import threading
8-
from typing import Any, Dict, List, Optional, Union
8+
from typing import Any, Dict, List, Literal, Optional, Union
99

1010
import websockets
1111
from asgiref.sync import sync_to_async
1212
from pydantic import BaseModel, ConfigDict
1313

14+
from llmstack.apps.types.agent import AgentConfigSchema
15+
from llmstack.apps.types.voice_agent import VoiceAgentConfigSchema
1416
from llmstack.common.blocks.base.schema import StrEnum
1517
from llmstack.common.utils.liquid import render_template
1618
from llmstack.common.utils.provider_config import get_matched_provider_config
1719
from llmstack.common.utils.sslr.types.chat.chat_completion import ChatCompletion
1820
from llmstack.common.utils.sslr.types.chat.chat_completion_chunk import (
1921
ChatCompletionChunk,
2022
)
21-
from llmstack.processors.providers.config import ProviderConfig
2223
from llmstack.processors.providers.promptly import get_llm_client_from_provider_config
2324

2425
logger = logging.getLogger(__name__)
2526

2627

2728
class AgentControllerConfig(BaseModel):
2829
provider_configs: Dict[str, Any]
29-
provider_config: ProviderConfig
30-
provider_slug: str
31-
model_slug: str
32-
system_message: str
30+
agent_config: Union[AgentConfigSchema, VoiceAgentConfigSchema]
31+
is_voice_agent: bool = False
3332
tools: List[Dict]
34-
stream: bool = False
35-
realtime: bool = False
36-
max_steps: int = 30
3733
metadata: Dict[str, Any]
38-
model_config = ConfigDict(protected_namespaces=())
34+
35+
model_config = ConfigDict(arbitrary_types_allowed=True)
36+
37+
def __init__(self, **data):
38+
# Convert agent_config to correct type if needed
39+
if "agent_config" in data:
40+
config = data["agent_config"]
41+
if isinstance(config, dict):
42+
if data.get("is_voice_agent", False):
43+
data["agent_config"] = VoiceAgentConfigSchema(**config)
44+
else:
45+
data["agent_config"] = AgentConfigSchema(**config)
46+
47+
super().__init__(**data)
48+
49+
if self.is_voice_agent and not isinstance(self.agent_config, VoiceAgentConfigSchema):
50+
raise ValueError("agent_config must be VoiceAgentConfigSchema when is_voice_agent is True")
51+
elif not self.is_voice_agent and not isinstance(self.agent_config, AgentConfigSchema):
52+
raise ValueError("agent_config must be AgentConfigSchema when is_voice_agent is False")
3953

4054

4155
class AgentControllerDataType(StrEnum):
@@ -54,6 +68,8 @@ class AgentUsageData(BaseModel):
5468
prompt_tokens: int = 0
5569
completion_tokens: int = 0
5670
total_tokens: int = 0
71+
provider: str = ""
72+
source: str = ""
5773

5874

5975
class AgentMessageRole(StrEnum):
@@ -116,19 +132,10 @@ class AgentController:
116132
def __init__(self, output_queue: asyncio.Queue, config: AgentControllerConfig):
117133
self._output_queue = output_queue
118134
self._config = config
119-
self._messages: List[AgentMessage] = [
120-
AgentSystemMessage(
121-
role=AgentMessageRole.SYSTEM,
122-
content=[
123-
AgentMessageContent(
124-
type=AgentMessageContentType.TEXT,
125-
data=render_template(self._config.system_message, {}),
126-
)
127-
],
128-
)
129-
]
135+
self._messages: List[AgentMessage] = []
130136
self._llm_client = None
131137
self._websocket = None
138+
self._provider_config = None
132139

133140
self._input_text_stream = None
134141
self._input_audio_stream = None
@@ -154,11 +161,16 @@ async def _handle_websocket_messages(self):
154161
if event["type"] == "session.created":
155162
logger.info(f"Session created: {event['session']['id']}")
156163
session = {}
157-
session["instructions"] = self._config.system_message
164+
session["instructions"] = self._config.agent_config.system_message
158165
session["tools"] = [
159166
{"type": "function", **t["function"]} for t in self._config.tools if t["type"] == "function"
160167
]
161168

169+
if self._config.agent_config.input_audio_format:
170+
session["input_audio_format"] = self._config.agent_config.input_audio_format
171+
if self._config.agent_config.output_audio_format:
172+
session["output_audio_format"] = self._config.agent_config.output_audio_format
173+
162174
updated_session = {
163175
"type": "session.update",
164176
"session": session,
@@ -173,6 +185,12 @@ async def _init_websocket_connection(self):
173185
from llmstack.apps.models import AppSessionFiles
174186
from llmstack.assets.stream import AssetStream
175187

188+
self._provider_config = get_matched_provider_config(
189+
provider_configs=self._config.provider_configs,
190+
provider_slug=self._config.agent_config.backend.provider,
191+
model_slug=self._config.agent_config.backend.model,
192+
)
193+
176194
# Create the output streams
177195
self._output_audio_stream = AssetStream(
178196
await sync_to_async(AppSessionFiles.create_streaming_asset)(
@@ -191,9 +209,9 @@ async def _init_websocket_connection(self):
191209
ssl_context.check_hostname = False
192210
ssl_context.verify_mode = ssl.CERT_NONE
193211

194-
websocket_url = f"wss://api.openai.com/v1/realtime?model={self._config.model_slug}"
212+
websocket_url = f"wss://api.openai.com/v1/realtime?model={self._config.agent_config.backend.model}"
195213
headers = {
196-
"Authorization": f"Bearer {self._config.provider_config.api_key}",
214+
"Authorization": f"Bearer {self._provider_config.api_key}",
197215
"OpenAI-Beta": "realtime=v1",
198216
}
199217

@@ -208,16 +226,34 @@ async def _init_websocket_connection(self):
208226
self._loop.create_task(self._handle_websocket_messages())
209227

210228
def _init_llm_client(self):
229+
self._provider_config = get_matched_provider_config(
230+
provider_configs=self._config.provider_configs,
231+
provider_slug=self._config.agent_config.provider,
232+
model_slug=self._config.agent_config.model,
233+
)
234+
211235
self._llm_client = get_llm_client_from_provider_config(
212-
self._config.provider_slug,
213-
self._config.model_slug,
236+
self._config.agent_config.provider,
237+
self._config.agent_config.model,
214238
lambda provider_slug, model_slug: get_matched_provider_config(
215239
provider_configs=self._config.provider_configs,
216240
provider_slug=provider_slug,
217241
model_slug=model_slug,
218242
),
219243
)
220244

245+
self._messages.append(
246+
AgentSystemMessage(
247+
role=AgentMessageRole.SYSTEM,
248+
content=[
249+
AgentMessageContent(
250+
type=AgentMessageContentType.TEXT,
251+
data=render_template(self._config.agent_config.system_message, {}),
252+
)
253+
],
254+
)
255+
)
256+
221257
async def _process_input_audio_stream(self):
222258
if self._input_audio_stream:
223259
async for chunk in self._input_audio_stream.read_async():
@@ -317,8 +353,8 @@ def process(self, data: AgentControllerData):
317353
self._messages.append(data.data)
318354

319355
try:
320-
if len(self._messages) > self._config.max_steps:
321-
raise Exception(f"Max steps ({self._config.max_steps}) exceeded: {len(self._messages)}")
356+
if len(self._messages) > self._config.agent_config.max_steps:
357+
raise Exception(f"Max steps ({self._config.agent_config.max_steps}) exceeded: {len(self._messages)}")
322358

323359
if data.type != AgentControllerDataType.AGENT_OUTPUT:
324360
self._input_messages_queue.put(data)
@@ -334,7 +370,7 @@ def process(self, data: AgentControllerData):
334370
)
335371

336372
async def process_messages(self, data: AgentControllerData):
337-
if self._config.realtime:
373+
if self._config.is_voice_agent and self._config.agent_config.backend.backend_type == Literal["multi_modal"]:
338374
if not self._websocket:
339375
await self._init_websocket_connection()
340376

@@ -391,14 +427,15 @@ async def process_messages(self, data: AgentControllerData):
391427
self._init_llm_client()
392428

393429
client_messages = self._convert_messages_to_llm_client_format()
430+
stream = True if self._config.agent_config.stream is None else self._config.agent_config.stream
394431
response = self._llm_client.chat.completions.create(
395-
model=self._config.model_slug,
432+
model=self._config.agent_config.model,
396433
messages=client_messages,
397-
stream=self._config.stream,
434+
stream=stream,
398435
tools=self._config.tools,
399436
)
400437

401-
if self._config.stream:
438+
if stream:
402439
for chunk in response:
403440
self.add_llm_client_response_to_output_queue(chunk)
404441
else:
@@ -419,6 +456,8 @@ def add_llm_client_response_to_output_queue(self, response: Any):
419456
prompt_tokens=response.usage.input_tokens,
420457
completion_tokens=response.usage.output_tokens,
421458
total_tokens=response.usage.total_tokens,
459+
source=self._provider_config.provider_config_source,
460+
provider=str(self._provider_config),
422461
),
423462
)
424463
)
@@ -621,6 +660,10 @@ async def add_ws_event_to_output_queue(self, event: Any):
621660
type=AgentControllerDataType.INPUT_STREAM,
622661
)
623662
)
663+
elif event_type == "input_audio_buffer.speech_stopped":
664+
pass
665+
elif event_type == "conversation.item.input_audio_transcription.completed":
666+
pass
624667
elif event_type == "error":
625668
logger.error(f"WebSocket error: {event}")
626669

0 commit comments

Comments
 (0)