-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.prod.yml
More file actions
134 lines (127 loc) · 3.51 KB
/
docker-compose.prod.yml
File metadata and controls
134 lines (127 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
version: '3.8'
services:
# Frontend - React App
frontend:
build:
context: ./virtual-counselor
dockerfile: Dockerfile
container_name: virtual-counselor-frontend
ports:
- "3007:80"
environment:
- NODE_ENV=production
- REACT_APP_API_URL=http://localhost:3008
restart: unless-stopped
depends_on:
- api
networks:
- vc-network
# Backend API - Node.js + SQLite
api:
build:
context: .
dockerfile: Dockerfile.api
container_name: virtual-counselor-api
ports:
- "3008:3008"
env_file:
- .env.production
- prompt-search/.env
environment:
- NODE_ENV=production
- API_PORT=3008
# WEBHOOK_SECRET is provided via env_file (.env.production)
# ANTHROPIC_API_KEY and NVIDIA_API_KEY are provided via prompt-search/.env
volumes:
- ./data:/app/data
- ./pdf-archieved-catalog:/app/pdf-archieved-catalog:ro
restart: unless-stopped
networks:
- vc-network
healthcheck:
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:3008/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# n8n - Workflow Automation
n8n:
image: n8nio/n8n:latest
container_name: virtual-counselor-n8n
ports:
- "3009:5678"
env_file:
- .env.production
environment:
- N8N_BASIC_AUTH_ACTIVE=true
# Provide N8N basic auth credentials via the env_file (.env.production)
# Add these keys to your .env.production on the host:
# N8N_BASIC_AUTH_USER=admin
# N8N_BASIC_AUTH_PASSWORD=<strong-password>
- N8N_HOST=localhost
- N8N_PORT=5678
- N8N_PROTOCOL=http
- WEBHOOK_URL=http://api:3008
- GENERIC_TIMEZONE=America/Los_Angeles
# WEBHOOK_SECRET is provided via env_file (.env.production)
volumes:
- n8n_data:/home/node/.n8n
restart: unless-stopped
depends_on:
- api
networks:
- vc-network
# llama.cpp inference server — official pre-built CUDA image, no compilation
# GTX 950 (2GB VRAM): runs Llama 3.2 3B fully on GPU (sm_52 supported)
llamacpp:
image: ghcr.io/ggml-org/llama.cpp:server-cuda
container_name: virtual-counselor-llamacpp
runtime: nvidia
environment:
- LLAMA_ARG_MODEL=/models/Llama-3.2-3B-Instruct-Q4_K_M.gguf
- LLAMA_ARG_N_GPU_LAYERS=20
- LLAMA_ARG_CTX_SIZE=1024
- LLAMA_ARG_N_PARALLEL=1
- LLAMA_ARG_UBATCH_SIZE=128
- LLAMA_ARG_HOST=0.0.0.0
- LLAMA_ARG_PORT=8081
volumes:
- ./prompt-search/models:/models:ro
restart: unless-stopped
networks:
- vc-network
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
profiles:
- local-llm
# RAG wrapper — does retrieval then calls llamacpp for inference
llm:
build:
context: .
dockerfile: Dockerfile.llm
container_name: virtual-counselor-llm
ports:
- "8080:8080"
environment:
- LLAMACPP_URL=http://llamacpp:8081
- INDEX_DIR=/app/data/domain
- PORT=8080
volumes:
- ./prompt-search/data/domain:/app/data/domain:ro
depends_on:
- llamacpp
restart: unless-stopped
networks:
- vc-network
profiles:
- local-llm
networks:
vc-network:
driver: bridge
volumes:
n8n_data: