Skip to content

Commit ebde28d

Browse files
authored
fix: bake punkt_tab file into docker images (#234)
1 parent 1a7eb08 commit ebde28d

1 file changed

Lines changed: 5 additions & 0 deletions

File tree

api/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \
2626
# Copy and install pipecat from local submodule
2727
COPY pipecat /tmp/pipecat
2828
RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter,camb]' && \
29+
# Pre-download NLTK punkt_tab tokenizer data (required by pipecat at runtime)
30+
python -c "import nltk; nltk.download('punkt_tab', quiet=True)" && \
2931
# Clean up pip cache and temporary pipecat directory
3032
rm -rf /root/.cache/pip /tmp/pipecat
3133

@@ -48,6 +50,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
4850
# Copy Python packages from builder stage
4951
COPY --from=builder /root/.local /root/.local
5052

53+
# Copy NLTK data (punkt_tab tokenizer) from builder stage
54+
COPY --from=builder /root/nltk_data /root/nltk_data
55+
5156
# Make sure scripts in .local are available
5257
ENV PATH=/root/.local/bin:$PATH
5358

0 commit comments

Comments
 (0)