talk/compose-integrated.yml at main · dwani-ai/talk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Integrated stack: TTS (tts-indic-server) and LLM (vllm-server) in compose; ASR can be host.
# Copy .env.example to .env. UI at http://localhost:80 — proxies /v1 to backend.
services:
  talk:
    image: dwani/talk-server:latest
    expose:
      - "8000"
    depends_on:
      - tts-indic-server
      - vllm-server
      - postgres
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"]
      interval: 10s
      timeout: 5s
      retries: 3
      start_period: 5s
    environment:
      DWANI_API_BASE_URL_TTS: http://tts-indic-server:10804
      DWANI_API_BASE_URL_ASR: http://asr-indic-server:10803
      DWANI_API_BASE_URL_LLM: http://vllm-server:10802
      DWANI_LLM_MODEL: ${DWANI_LLM_MODEL:-gemma3}
      DWANI_AGENT_BASE_URL: http://agents:8081
      DWANI_DATABASE_URL: ${DWANI_DATABASE_URL:-postgresql+psycopg://talk:talk@postgres:5432/talk}
      DWANI_AUTH_COOKIE_NAME: ${DWANI_AUTH_COOKIE_NAME:-dwani_auth_session}
      DWANI_AUTH_SESSION_TTL_SECONDS: ${DWANI_AUTH_SESSION_TTL_SECONDS:-86400}
      DWANI_AUTH_COOKIE_SAMESITE: ${DWANI_AUTH_COOKIE_SAMESITE:-lax}
      DWANI_AUTH_COOKIE_SECURE: ${DWANI_AUTH_COOKIE_SECURE:-0}
    extra_hosts:
      - "host.docker.internal:host-gateway"
    restart: unless-stopped

  talk-ui:
    image: dwani/talk-ux:latest
    ports:
      - "80:80"
    depends_on:
      - talk
    restart: unless-stopped
  asr-indic-server:
    image: dwani/asr-indic-server:latest
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface
    ports:
      - 10803:10803
    environment:
      - HF_TOKEN
    extra_hosts:
      - "host.docker.internal:host-gateway"
    restart: unless-stopped
    deploy:
          resources:
            reservations:
              devices:
                - driver: nvidia
                  device_ids: ['0']
                  capabilities: [gpu]
  tts-indic-server:
    image: dwani/tts-indic-server:latest
    volumes:
      - ~/.cache/huggingface:/root/.cache/huggingface
    ports:
      - "10804:10804"
    extra_hosts:
      - "host.docker.internal:host-gateway"
    restart: unless-stopped
    environment:
      TTS_NFE_STEPS: 8
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              device_ids: ['0']
              capabilities: [gpu]
  vllm-server:
    image: vllm/vllm-openai:latest
    command: [
      "--model", "RedHatAI/gemma-3-12b-it-FP8-dynamic",
      "--served-model-name", "gemma3",
      "--host", "0.0.0.0",
      "--port", "10802",
      "--gpu-memory-utilization", "0.9",
      "--tensor-parallel-size", "1",
      "--max-model-len", "8192",
      "--disable-log-requests",
      "--dtype", "bfloat16",
      "--enable-chunked-prefill",
      "--enable-prefix-caching",
      "--max-num-batched-tokens", "8192",
      "--chat-template-content-format", "openai"
    ]
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    ports:
      - "10802:10802"

  agents:
    image: dwani/talk-agents:latest
    expose:
      - "8081"
    depends_on:
      - vllm-server
    environment:
      LITELLM_MODEL_NAME: ${LITELLM_MODEL_NAME:-openai/gemma3}
      LITELLM_API_BASE: http://vllm-server:10802/v1
      LITELLM_API_KEY: ${LITELLM_API_KEY}
    volumes:
      - fix_my_city_data:/app/fix-my-city/data
    restart: unless-stopped

  postgres:
    image: postgres:16-alpine
    environment:
      POSTGRES_USER: ${DWANI_POSTGRES_USER:-talk}
      POSTGRES_PASSWORD: ${DWANI_POSTGRES_PASSWORD:-talk}
      POSTGRES_DB: ${DWANI_POSTGRES_DB:-talk}
    expose:
      - "5432"
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${DWANI_POSTGRES_USER:-talk} -d ${DWANI_POSTGRES_DB:-talk}"]
      interval: 10s
      timeout: 5s
      retries: 5
    volumes:
      - talk_postgres_data:/var/lib/postgresql/data
    restart: unless-stopped

volumes:
  fix_my_city_data:
  talk_postgres_data: