-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompose-integrated.yml
More file actions
137 lines (133 loc) · 3.97 KB
/
compose-integrated.yml
File metadata and controls
137 lines (133 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Integrated stack: TTS (tts-indic-server) and LLM (vllm-server) in compose; ASR can be host.
# Copy .env.example to .env. UI at http://localhost:80 — proxies /v1 to backend.
services:
talk:
image: dwani/talk-server:latest
expose:
- "8000"
depends_on:
- tts-indic-server
- vllm-server
- postgres
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
interval: 10s
timeout: 5s
retries: 3
start_period: 5s
environment:
DWANI_API_BASE_URL_TTS: http://tts-indic-server:10804
DWANI_API_BASE_URL_ASR: http://asr-indic-server:10803
DWANI_API_BASE_URL_LLM: http://vllm-server:10802
DWANI_LLM_MODEL: ${DWANI_LLM_MODEL:-gemma3}
DWANI_AGENT_BASE_URL: http://agents:8081
DWANI_DATABASE_URL: ${DWANI_DATABASE_URL:-postgresql+psycopg://talk:talk@postgres:5432/talk}
DWANI_AUTH_COOKIE_NAME: ${DWANI_AUTH_COOKIE_NAME:-dwani_auth_session}
DWANI_AUTH_SESSION_TTL_SECONDS: ${DWANI_AUTH_SESSION_TTL_SECONDS:-86400}
DWANI_AUTH_COOKIE_SAMESITE: ${DWANI_AUTH_COOKIE_SAMESITE:-lax}
DWANI_AUTH_COOKIE_SECURE: ${DWANI_AUTH_COOKIE_SECURE:-0}
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
talk-ui:
image: dwani/talk-ux:latest
ports:
- "80:80"
depends_on:
- talk
restart: unless-stopped
asr-indic-server:
image: dwani/asr-indic-server:latest
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
ports:
- 10803:10803
environment:
- HF_TOKEN
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
tts-indic-server:
image: dwani/tts-indic-server:latest
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
ports:
- "10804:10804"
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
environment:
TTS_NFE_STEPS: 8
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
vllm-server:
image: vllm/vllm-openai:latest
command: [
"--model", "RedHatAI/gemma-3-12b-it-FP8-dynamic",
"--served-model-name", "gemma3",
"--host", "0.0.0.0",
"--port", "10802",
"--gpu-memory-utilization", "0.9",
"--tensor-parallel-size", "1",
"--max-model-len", "8192",
"--disable-log-requests",
"--dtype", "bfloat16",
"--enable-chunked-prefill",
"--enable-prefix-caching",
"--max-num-batched-tokens", "8192",
"--chat-template-content-format", "openai"
]
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
ports:
- "10802:10802"
agents:
image: dwani/talk-agents:latest
expose:
- "8081"
depends_on:
- vllm-server
environment:
LITELLM_MODEL_NAME: ${LITELLM_MODEL_NAME:-openai/gemma3}
LITELLM_API_BASE: http://vllm-server:10802/v1
LITELLM_API_KEY: ${LITELLM_API_KEY}
volumes:
- fix_my_city_data:/app/fix-my-city/data
restart: unless-stopped
postgres:
image: postgres:16-alpine
environment:
POSTGRES_USER: ${DWANI_POSTGRES_USER:-talk}
POSTGRES_PASSWORD: ${DWANI_POSTGRES_PASSWORD:-talk}
POSTGRES_DB: ${DWANI_POSTGRES_DB:-talk}
expose:
- "5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DWANI_POSTGRES_USER:-talk} -d ${DWANI_POSTGRES_DB:-talk}"]
interval: 10s
timeout: 5s
retries: 5
volumes:
- talk_postgres_data:/var/lib/postgresql/data
restart: unless-stopped
volumes:
fix_my_city_data:
talk_postgres_data: