Skip to content

Commit d746b90

Browse files
committed
NVIDIA Pipecat SDK 0.2.0 release changes
1 parent f272bf7 commit d746b90

33 files changed

+2693
-1401
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,6 @@ output/
2525

2626
# Ignore docs
2727
docs/build/
28+
29+
# Ignore .DS_Store
30+
.DS_Store

CHANGELOG.md

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,39 @@
1-
# NVIDIA Pipecat 0.1.0 (23 April 2025)
1+
## Changelog
2+
All notable changes to this project will be documented in this file.
3+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
4+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
5+
6+
## [0.2.0] - 2025-06-17
7+
8+
### Added
9+
- Support for deepseek, mistral-ai, and llama-nemotron models in Nvidia LLM Service
10+
- Support for BotSpeakingFrame in animation graph service
11+
12+
### Changed
13+
- Upgraded Riva Client version to 2.20.0
14+
- Upgraded to pipecat 0.0.68
15+
- Improved animation graph stream handling
16+
- Improved task cancellation support in NVIDIA LLM and NVIDIA RAG Service
17+
18+
### Fixed
19+
- Fixed transcription synchronization for multiple final ASR transcripts
20+
- Fixed edge case where mouth of avatar would not close
21+
- Fixed animation stream handling for broken streams
22+
- Fixed Elevenlabs edge case issues with multi-lingual use cases
23+
- Fixed chunk truncation issues in RAG Service
24+
- Fixed dangling tasks and pipeline cleanup issues
25+
26+
## [0.1.1] - 2025-04-30
27+
28+
### Fixed
29+
30+
- `RivaTTSService` doesn't work with `nvidia-riva-client 2.19.1` version due to breaking changes, updated `pyproject.toml` to use `2.19.0` version only.
31+
32+
33+
## [0.1.0] - 2025-04-23
234
The NVIDIA Pipecat library augments the Pipecat framework by adding additional frame processors and services, as well as new multimodal frames to enhance avatar interactions. This is the first release of the NVIDIA Pipecat library.
335

4-
## New Features
36+
### Added
537

638
- Added Pipecat services for [Riva ASR (Automatic Speech Recognition)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/asr/asr-overview.html#), [Riva TTS (Text to Speech)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html), and [Riva NMT (Neural Machine Translation)](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/translation/translation-overview.html) models.
739
- Added Pipecat frames, processors, and services to support multimodal avatar interactions and use cases. This includes `Audio2Face3DService`, `AnimationGraphService`, `FacialGestureProviderProcessor`, and `PostureProviderProcessor`.
@@ -13,7 +45,6 @@ The NVIDIA Pipecat library augments the Pipecat framework by adding additional f
1345
- Released source code for the voice assistant example using `nvidia-pipecat`, along with the `pipecat-ai` library service, to showcase NVIDIA services with `ACETransport`.
1446

1547

16-
## Improvements
48+
### Changed
1749

1850
- Added `ElevenLabsTTSServiceWithEndOfSpeech`, an extended version of the ElevenLabs TTS service with end-of-speech events for usage in avatar interactions.
19-

examples/speech-to-speech/bot.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,10 @@
1111
from fastapi.staticfiles import StaticFiles
1212
from pipecat.audio.vad.silero import SileroVADAnalyzer
1313
from pipecat.frames.frames import LLMMessagesFrame
14-
15-
# Uncomment the following line if you want to use ElevenLabsTTS
16-
# from pipecat.services.elevenlabs import ElevenLabsTTSService
1714
from pipecat.pipeline.pipeline import Pipeline
1815
from pipecat.pipeline.task import PipelineParams, PipelineTask
1916
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
17+
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
2018

2119
from nvidia_pipecat.pipeline.ace_pipeline_runner import ACEPipelineRunner, PipelineMetadata
2220

@@ -29,12 +27,17 @@
2927
BotTranscriptSynchronization,
3028
UserTranscriptSynchronization,
3129
)
30+
from nvidia_pipecat.services.elevenlabs import ElevenLabsTTSServiceWithEndOfSpeech
3231
from nvidia_pipecat.services.nvidia_llm import NvidiaLLMService
33-
from nvidia_pipecat.services.riva_speech import RivaASRService, RivaTTSService
32+
from nvidia_pipecat.services.riva_speech import RivaASRService
3433
from nvidia_pipecat.transports.network.ace_fastapi_websocket import ACETransport, ACETransportParams
3534
from nvidia_pipecat.transports.services.ace_controller.routers.websocket_router import router as websocket_router
35+
36+
# from nvidia_pipecat.services.riva_speech import RivaTTSService
3637
from nvidia_pipecat.utils.logging import setup_default_ace_logging
3738

39+
# from nvidia_pipecat.serializers.ace_websocket import ACEWebSocketSerializer
40+
3841
load_dotenv(override=True)
3942

4043
setup_default_ace_logging(level="DEBUG")
@@ -52,16 +55,14 @@ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
5255
transport = ACETransport(
5356
websocket=pipeline_metadata.websocket,
5457
params=ACETransportParams(
55-
vad_enabled=True,
5658
vad_analyzer=SileroVADAnalyzer(),
57-
vad_audio_passthrough=True,
59+
# serializer=ACEWebSocketSerializer(),
5860
),
5961
)
6062

6163
llm = NvidiaLLMService(
6264
api_key=os.getenv("NVIDIA_API_KEY"),
6365
model="meta/llama-3.1-8b-instruct",
64-
base_url=None,
6566
)
6667

6768
stt = RivaASRService(
@@ -71,27 +72,32 @@ async def create_pipeline_task(pipeline_metadata: PipelineMetadata):
7172
sample_rate=16000,
7273
model="parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble",
7374
)
74-
tts = RivaTTSService(
75-
server="localhost:50051",
76-
api_key=os.getenv("NVIDIA_API_KEY"),
77-
voice_id="English-US.Female-1",
78-
language="en-US",
79-
quality=20,
75+
# Uncomment the following if you want to use Riva TTS (make sure to comment out ElevenLabsTTS below)
76+
# tts = RivaTTSService(
77+
# server="localhost:50051",
78+
# api_key=os.getenv("NVIDIA_API_KEY"),
79+
# voice_id="English-US.Female-1",
80+
# model="fastpitch-hifigan-tts",
81+
# language="en-US",
82+
# zero_shot_quality=20,
83+
# )
84+
85+
tts = ElevenLabsTTSServiceWithEndOfSpeech(
86+
api_key=os.getenv("ELEVENLABS_API_KEY"),
87+
voice_id=os.getenv("ELEVENLABS_VOICE_ID", "cgSgspJ2msm6clMCkdW9"),
8088
sample_rate=16000,
81-
model="fastpitch-hifigan-tts",
89+
model="eleven_flash_v2_5",
90+
param=ElevenLabsTTSService.InputParams(
91+
stability=0.3,
92+
speed=0.97,
93+
similarity_boost=0.85,
94+
),
8295
)
96+
8397
# Used to synchronize the user and bot transcripts in the UI
8498
stt_transcript_synchronization = UserTranscriptSynchronization()
8599
tts_transcript_synchronization = BotTranscriptSynchronization()
86100

87-
# Uncomment the following if you want to use ElevenLabsTTS (make sure to comment out Riva TTS below)
88-
# tts = ElevenLabsTTSService(
89-
# api_key=os.getenv("ELEVENLABS_API_KEY"),
90-
# voice_id=os.getenv("ELEVENLABS_VOICE_ID", "EXAVITQu4vr4xnSDxMaL"),
91-
# sample_rate=16000,
92-
# model = "eleven_flash_v2_5",
93-
# )
94-
95101
messages = [
96102
{
97103
"role": "system",
@@ -156,8 +162,8 @@ async def on_client_connected(transport, client):
156162

157163
app = FastAPI()
158164
app.include_router(websocket_router)
159-
runner = ACEPipelineRunner(pipeline_callback=create_pipeline_task)
165+
runner = ACEPipelineRunner.create_instance(pipeline_callback=create_pipeline_task)
160166
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "../static")), name="static")
161167

162168
if __name__ == "__main__":
163-
uvicorn.run(app, host="0.0.0.0", port=8100)
169+
uvicorn.run("bot:app", host="0.0.0.0", port=8100, workers=1)

flake.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
description = "NVIDIA ACE Pipecat SDK";
33

44
inputs = {
5-
nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-24.11";
5+
nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
66
flake-utils.url = "github:numtide/flake-utils";
77
pyproject-nix = {
88
url = "github:pyproject-nix/pyproject.nix";
@@ -49,6 +49,49 @@
4949
numba = prev.numba.overrideAttrs (old: {
5050
buildInputs = (old.buildInputs or []) ++ [pkgs.tbb_2021_11];
5151
});
52+
semantic-version = prev.semantic-version.overrideAttrs (old: {
53+
nativeBuildInputs =
54+
old.nativeBuildInputs
55+
++ final.resolveBuildSystem {
56+
setuptools = [];
57+
wheel = [];
58+
};
59+
});
60+
61+
setuptools-scm = prev.setuptools-scm.overrideAttrs (old: {
62+
nativeBuildInputs =
63+
old.nativeBuildInputs
64+
++ final.resolveBuildSystem {
65+
setuptools = [];
66+
wheel = [];
67+
};
68+
});
69+
70+
setuptools-rust = prev.setuptools-rust.overrideAttrs (old: {
71+
nativeBuildInputs =
72+
old.nativeBuildInputs
73+
++ final.resolveBuildSystem {
74+
setuptools = [];
75+
wheel = [];
76+
};
77+
});
78+
79+
libcst = prev.libcst.overrideAttrs (old: {
80+
nativeBuildInputs =
81+
old.nativeBuildInputs
82+
++ final.resolveBuildSystem {
83+
setuptools = [];
84+
wheel = [];
85+
};
86+
});
87+
nvidia-pipecat = prev.nvidia-pipecat.overrideAttrs (old: {
88+
nativeBuildInputs =
89+
old.nativeBuildInputs
90+
++ final.resolveBuildSystem {
91+
hatchling = [];
92+
editables = [];
93+
};
94+
});
5295
};
5396

5497
# Use Python 3.12 from nixpkgs

pyproject.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "nvidia-pipecat"
3-
version = "0.1.0"
3+
version = "0.2.0"
44
description = "NVIDIA ACE Pipecat SDK"
55
readme = "NVIDIA_PIPECAT.md"
66
license = { file = "LICENSE" }
@@ -12,10 +12,8 @@ dependencies = [
1212
"av>=13.0.0",
1313
"fastapi>=0.115.7",
1414
"hatchling>=1.27.0",
15-
"nvidia-riva-client>=2.18.0",
1615
"onnxruntime>=1.20.1",
1716
"openai>=1.58.1",
18-
"pipecat-ai==0.0.57",
1917
"sentence-transformers>=3.3.1",
2018
"torch>=2.5.1",
2119
"python-dotenv>=1.0.1",
@@ -30,6 +28,8 @@ dependencies = [
3028
"nvidia-animation-graph==1.1.0",
3129
"opentelemetry-sdk>=1.31.0",
3230
"opentelemetry-instrumentation-grpc>=0.52b0",
31+
"nvidia-riva-client==2.20.0",
32+
"pipecat-ai==0.0.68",
3333
]
3434

3535
[build-system]
@@ -40,7 +40,7 @@ build-backend = "hatchling.build"
4040
packages = ["src/nvidia_pipecat"]
4141

4242
[tool.uv.sources]
43-
torch = { index = "pytorch" }
43+
torch = { index = "pytorch", marker = "sys_platform != 'darwin'" }
4444

4545
[[tool.uv.index]]
4646
name = "pytorch"

0 commit comments

Comments
 (0)