-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_3b_orpo_1gpu.sh
More file actions
executable file
·107 lines (90 loc) · 3.23 KB
/
train_3b_orpo_1gpu.sh
File metadata and controls
executable file
·107 lines (90 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env bash
# ==========================================================================
# EVAFRILL-Mo 3B ORPO Training — H100 MIG 3g.40gb (Single GPU)
#
# Runs ORPO (Odds Ratio Preference Optimization) with LoRA.
# Single stage: SFT + preference alignment simultaneously.
#
# Base: checkpoints/3b_final/checkpoint-0319772 (pretrained)
# Data: data/preference/combined_preference.jsonl
# ==========================================================================
set -euo pipefail
# Activate Python env if needed
if [ -f /root/ai-env/bin/activate ]; then
source /root/ai-env/bin/activate
fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
PRETRAINED_CKPT="checkpoints/3b_final/checkpoint-0319772"
ORPO_DATA="data/preference/combined_preference.jsonl"
TOKENIZER="tokenizer/korean_sp/tokenizer.json"
# ==========================================
# ORPO Training
# ==========================================
echo "=========================================="
echo "ORPO: Single-stage SFT + Preference Align"
echo "=========================================="
ORPO_DIR="checkpoints/3b_orpo"
ORPO_LOG="checkpoints/3b_orpo/train.log"
MAX_RETRIES=5
RETRY_DELAY=30
RETRIES=0
while [ $RETRIES -lt $MAX_RETRIES ]; do
# Find latest checkpoint for resume
RESUME_ARG=""
if [ -d "$ORPO_DIR" ]; then
LATEST=$(ls -td "$ORPO_DIR"/checkpoint-[0-9]* 2>/dev/null | head -1)
if [ -n "$LATEST" ] && [ -f "$LATEST/model.pt" ]; then
RESUME_ARG="--resume $LATEST"
echo "Resuming from: $LATEST"
fi
fi
python3 train/orpo_native.py \
--pretrained_checkpoint "$PRETRAINED_CKPT" \
--orpo_data "$ORPO_DATA" \
--tokenizer "$TOKENIZER" \
--checkpoint_dir "$ORPO_DIR" \
--config configs/h100_mig/orpo_3b_1gpu.yaml \
--device cuda:0 \
--log_file "$ORPO_LOG" \
$RESUME_ARG \
&& break
RETRIES=$((RETRIES + 1))
echo "[WARN] ORPO crashed (attempt $RETRIES/$MAX_RETRIES). Restarting in ${RETRY_DELAY}s..."
sleep $RETRY_DELAY
done
if [ $RETRIES -ge $MAX_RETRIES ]; then
echo "[ERROR] ORPO failed after $MAX_RETRIES attempts"
exit 1
fi
echo "ORPO training complete."
# ==========================================
# SLERP Merge: SFT + ORPO
# ==========================================
echo ""
echo "=========================================="
echo "SLERP Merge: SFT ↔ ORPO"
echo "=========================================="
SFT_CKPT="checkpoints/3b_sft_v2/checkpoint-best"
ORPO_MERGED="$ORPO_DIR/checkpoint-merged"
if [ ! -d "$ORPO_MERGED" ]; then
echo "[WARN] ORPO merged checkpoint not found, using final checkpoint"
LATEST=$(ls -td "$ORPO_DIR"/checkpoint-[0-9]* 2>/dev/null | head -1)
if [ -n "$LATEST" ]; then
ORPO_MERGED="$LATEST"
else
echo "[ERROR] No ORPO checkpoint found"
exit 1
fi
fi
python3 scripts/merge_checkpoints.py \
--ckpt_a "$SFT_CKPT" \
--ckpt_b "$ORPO_MERGED" \
--output "checkpoints/3b_orpo/checkpoint-slerp" \
--alpha 0.5
echo ""
echo "=========================================="
echo "ORPO Pipeline Complete!"
echo " ORPO Training: $ORPO_DIR"
echo " SLERP Merge: checkpoints/3b_orpo/checkpoint-slerp"
echo "=========================================="