This repository was archived by the owner on Mar 3, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
102 lines (95 loc) · 2.82 KB
/
docker-compose.yaml
File metadata and controls
102 lines (95 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Open CTF Environment - Docker Compose
#
# Single Dockerfile with multi-stage build targets:
# docker/Dockerfile → targets: base, sft, grpo
#
# Usage:
# docker compose run --rm sft # Stage 1: SFT (TRL)
# docker compose run --rm merge # Merge LoRA adapter
# docker compose run --rm grpo # Stage 2: GRPO (SkyRL)
# docker compose run --rm validate # Validate pipeline
# docker compose run --rm export # Export to GGUF
#
# Override model/data paths via environment or .env file.
# Default target model: Qwen3.5-27B (requires vLLM nightly, transformers>=5.2.0)
x-gpu: &gpu
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
shm_size: "64g"
x-volumes: &volumes
volumes:
- ./data:/workspace/data
- ./outputs:/workspace/outputs
- ./configs:/workspace/configs
services:
# Stage 1: SFT via TRL
sft:
build:
context: .
dockerfile: docker/Dockerfile
target: sft
<<: [*gpu, *volumes]
command: >
open-ctf-train sft
--model ${MODEL:-Qwen/Qwen3.5-27B}
--data /workspace/data/${SFT_DATA:-sft.jsonl}
--output /workspace/outputs/sft
--config /workspace/configs/training/${TRAINING_CONFIG:-training_qwen35_27b.yaml}
environment:
- WANDB_API_KEY=${WANDB_API_KEY:-}
- HF_TOKEN=${HF_TOKEN:-}
# Merge LoRA adapter into base (uses SFT image)
merge:
build:
context: .
dockerfile: docker/Dockerfile
target: sft
<<: [*gpu, *volumes]
command: >
open-ctf-train merge
--adapter ${ADAPTER:-/workspace/outputs/sft}
--base-model ${MODEL:-Nanbeige/Nanbeige4.1-3B}
--output /workspace/outputs/sft-merged
environment:
- HF_TOKEN=${HF_TOKEN:-}
# Stage 2: GRPO via SkyRL (requires merged model)
grpo:
build:
context: .
dockerfile: docker/Dockerfile
target: grpo
<<: [*gpu, *volumes]
command: >
open-ctf-train grpo
--model ${GRPO_MODEL:-/workspace/outputs/sft-merged}
--data /workspace/data/${GRPO_DATA:-grpo_offline_683.jsonl}
--output /workspace/outputs/grpo
environment:
- WANDB_API_KEY=${WANDB_API_KEY:-}
- HF_TOKEN=${HF_TOKEN:-}
# Validation (uses base — no heavy ML deps needed)
validate:
build:
context: .
dockerfile: docker/Dockerfile
target: base
<<: *volumes
command: open-ctf-validate
# Export to GGUF (uses SFT image for merge + export tools)
export:
build:
context: .
dockerfile: docker/Dockerfile
target: sft
<<: [*gpu, *volumes]
command: >
open-ctf-export
--adapter ${ADAPTER:-/workspace/outputs/sft}
--base-model ${MODEL:-Nanbeige/Nanbeige4.1-3B}
--output /workspace/outputs/model.gguf
--quant ${QUANT:-Q4_K_M}