Skip to content

Commit d6d6059

Browse files
committed
voice_assistant entity and voice stream handler
1 parent 141fbe5 commit d6d6059

File tree

12 files changed

+810
-62
lines changed

12 files changed

+810
-62
lines changed

docs/setup.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,28 @@ Local installation:
1616
```shell
1717
pip3 install --force-reinstall dist/ucapi-$VERSION-py3-none-any.whl
1818
```
19+
20+
## Protobuf
21+
22+
1. Optional (recommended): install the Python plugin toolchain for consistent results:
23+
```bash
24+
python3 -m pip install --upgrade grpcio-tools protobuf
25+
```
26+
2. From the project root, run:
27+
```bash
28+
python3 scripts/compile_protos.py
29+
```
30+
- This will generate `ucapi/proto/ucr_integration_voice_pb2.py` (and `.pyi` if supported).
31+
3. Add and commit the generated files to Git:
32+
```bash
33+
git add ucapi/proto/ucr_integration_voice_pb2.py ucapi/proto/ucr_integration_voice_pb2.pyi || true
34+
git commit -m "Generate protobuf Python modules for voice integration"
35+
```
36+
37+
Notes:
38+
- The library does not re-generate at build time; we ship the generated code with the package.
39+
- If you prefer using system `protoc`, ensure it’s on `PATH`; the script will fall back to it automatically.
40+
- Imports at runtime (if/when needed) will look like:
41+
```python
42+
from ucapi.proto import ucr_integration_voice_pb2 as voice_pb2
43+
```

examples/setup_flow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async def handle_driver_setup(
3838
"""
3939
Start driver setup.
4040
41-
Initiated by Remote Two to set up the driver.
41+
Initiated by Remote Two/3 to set up the driver.
4242
4343
:param msg: value(s) of input fields in the first setup screen.
4444
:return: the setup action on how to continue

examples/voice.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"driver_id": "voice_test",
3+
"version": "0.0.1",
4+
"min_core_api": "0.20.0",
5+
"name": { "en": "Voice test" },
6+
"icon": "uc:integration",
7+
"description": {
8+
"en": "Minimal Python integration driver example for voice commands."
9+
},
10+
"port": 9084,
11+
"developer": {
12+
"name": "Unfolded Circle ApS",
13+
"email": "hello@unfoldedcircle.com",
14+
"url": "https://www.unfoldedcircle.com"
15+
},
16+
"home_page": "https://www.unfoldedcircle.com",
17+
"release_date": "2025-12-11"
18+
}

examples/voice.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env python3
2+
"""Voice assistant entity integration example. Bare minimum of an integration driver."""
3+
import asyncio
4+
import logging
5+
from asyncio import sleep
6+
from typing import Any
7+
8+
import ucapi
9+
from ucapi import AssistantEvent, AssistantEventType, VoiceAssistant
10+
from ucapi.api_definitions import AssistantTextResponse, AssistantSttResponse
11+
from ucapi.voice_assistant import (
12+
Commands as VACommands,
13+
Features as VAFeatures,
14+
Attributes as VAAttr,
15+
VoiceAssistantEntityOptions,
16+
AudioConfiguration,
17+
SampleFormat,
18+
)
19+
20+
loop = asyncio.new_event_loop()
21+
api = ucapi.IntegrationAPI(loop)
22+
23+
session_id = 0
24+
25+
26+
@api.listens_to(ucapi.Events.CONNECT)
27+
async def on_connect() -> None:
28+
# When the remote connects, we just set the device state. We are ready all the time!
29+
await api.set_device_state(ucapi.DeviceStates.CONNECTED)
30+
31+
32+
@api.listens_to(ucapi.Events.SUBSCRIBE_ENTITIES)
33+
async def on_subscribe_entities(entity_ids: list[str]) -> None:
34+
for entity_id in entity_ids:
35+
api.configured_entities.update_attributes(entity_id, {VAAttr.STATE: "ON"})
36+
37+
38+
async def on_voice_cmd(
39+
entity: ucapi.VoiceAssistant, cmd_id: str, params: dict[str, Any] | None
40+
) -> ucapi.StatusCodes:
41+
"""
42+
Voice assistant command handler.
43+
44+
Called by the integration-API if a command is sent to a configured voice_assistant-entity.
45+
46+
:param entity: voice assistant entity
47+
:param cmd_id: command
48+
:param params: optional command parameters
49+
:return: status of the command
50+
"""
51+
# HACK until core is fixed
52+
global session_id
53+
54+
print(f"Got {entity.id} command request: {cmd_id}")
55+
if params is None:
56+
return ucapi.StatusCodes.BAD_REQUEST
57+
58+
session_id = params.get("session_id", 0)
59+
if session_id <= 0:
60+
return ucapi.StatusCodes.BAD_REQUEST
61+
62+
if cmd_id == VACommands.VOICE_START:
63+
ready_evt = AssistantEvent(
64+
type=AssistantEventType.READY,
65+
entity_id=entity.id,
66+
session_id=session_id,
67+
)
68+
await api.broadcast_assistant_event(ready_evt)
69+
70+
# Acknowledge start; binary audio will arrive on the WS binary channel
71+
return ucapi.StatusCodes.OK
72+
return ucapi.StatusCodes.NOT_IMPLEMENTED
73+
74+
75+
async def on_voice_session(session):
76+
print(
77+
f"Voice stream started: session={session.session_id}, "
78+
f"{session.config.channels}ch @ {session.config.sample_rate} Hz"
79+
)
80+
# HACK until core is fixed
81+
global session_id
82+
83+
total = 0
84+
async for frame in session: # frame is bytes
85+
total += len(frame)
86+
# feed frame into your voice assistant / LLM here
87+
print(f"Got {len(frame)} bytes of audio data")
88+
print(f"Voice stream ended: session={session.session_id}, bytes={total}")
89+
90+
event = AssistantEvent(
91+
type=AssistantEventType.STT_RESPONSE,
92+
entity_id="va_main",
93+
session_id=session_id,
94+
data=AssistantSttResponse(
95+
text="I'm just a demo and I don't know what you said."
96+
),
97+
)
98+
await api.broadcast_assistant_event(event)
99+
100+
await sleep(1)
101+
event = AssistantEvent(
102+
type=AssistantEventType.TEXT_RESPONSE,
103+
entity_id="va_main",
104+
session_id=session_id,
105+
data=AssistantTextResponse(
106+
success=True, text=f"You have sent {total} bytes of audio data"
107+
),
108+
)
109+
await api.broadcast_assistant_event(event)
110+
111+
await sleep(1)
112+
event = AssistantEvent(
113+
type=AssistantEventType.FINISHED,
114+
entity_id="va_main",
115+
session_id=session_id,
116+
)
117+
await api.broadcast_assistant_event(event)
118+
119+
120+
if __name__ == "__main__":
121+
logging.basicConfig()
122+
123+
entity = VoiceAssistant(
124+
identifier="va_main",
125+
name={"en": "Demo Voice Assistant"},
126+
features=[VAFeatures.TRANSCRIPTION, VAFeatures.RESPONSE_TEXT],
127+
attributes={VAAttr.STATE.value: "ON"},
128+
options=VoiceAssistantEntityOptions(
129+
audio_cfg=AudioConfiguration(
130+
channels=1, sample_rate=16000, sample_format=SampleFormat.I16
131+
),
132+
),
133+
cmd_handler=on_voice_cmd,
134+
)
135+
136+
api.available_entities.add(entity)
137+
api.set_voice_stream_handler(on_voice_session)
138+
139+
loop.run_until_complete(api.init("voice.json"))
140+
loop.run_forever()

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Waiting for: https://github.com/pypa/pip/issues/11440
33
# Workaround: use a pre-commit hook with https://github.com/scikit-image/scikit-image/blob/main/tools/generate_requirements.py
44

5+
protobuf~=6.33.2
56
pyee>=9.0
67
websockets>=14.0
78
zeroconf>=0.120.0

ucapi/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22
"""
3-
Integration driver library for Remote Two.
3+
Integration driver library for Remote Two/3.
44
55
:copyright: (c) 2023 by Unfolded Circle ApS.
66
:license: MPL-2.0, see LICENSE for more details.
@@ -11,6 +11,10 @@
1111

1212
from .api_definitions import ( # isort:skip # noqa: F401
1313
AbortDriverSetup,
14+
AssistantError,
15+
AssistantErrorCode,
16+
AssistantEvent,
17+
AssistantEventType,
1418
DeviceStates,
1519
DriverSetupRequest,
1620
Events,
@@ -28,6 +32,11 @@
2832
from .entity import Entity, EntityTypes # isort:skip # noqa: F401
2933
from .entities import Entities # isort:skip # noqa: F401
3034
from .api import IntegrationAPI # isort:skip # noqa: F401
35+
from .voice_stream import ( # isort:skip # noqa: F401
36+
AudioConfig,
37+
VoiceSession,
38+
VoiceStreamHandler,
39+
)
3140

3241
# Entity types
3342
from .button import Button # noqa: F401
@@ -38,6 +47,7 @@
3847
from .remote import Remote # noqa: F401
3948
from .sensor import Sensor # noqa: F401
4049
from .switch import Switch # noqa: F401
50+
from .voice_assistant import VoiceAssistant # noqa: F401
4151

4252
try:
4353
from ._version import version as __version__

0 commit comments

Comments
 (0)