-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmonitor_training.sh
More file actions
executable file
·57 lines (47 loc) · 1.69 KB
/
monitor_training.sh
File metadata and controls
executable file
·57 lines (47 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
# PokerAI Training Monitor Script
# Provides periodic progress updates for training runs
MONITOR_INTERVAL=${1:-300} # Default 5 minutes
LOG_FILE="training_output.log"
METRICS_FILE="training_metrics.csv"
echo "🎯 PokerAI Training Monitor Started"
echo "===================================="
echo "Monitoring interval: ${MONITOR_INTERVAL} seconds"
echo "Press Ctrl+C to stop monitoring"
echo
# Function to get training status
get_training_status() {
echo "=== $(date '+%Y-%m-%d %H:%M:%S') ==="
# Check if training is running
if ps aux | grep "python.*train.py" | grep -v grep > /dev/null; then
echo "✅ Training: RUNNING"
ps aux | grep "python.*train.py" | grep -v grep | awk '{print " PID:", $2, "CPU:", $3"%", "MEM:", $4"%", "TIME:", $10}'
else
echo "❌ Training: NOT RUNNING"
return 1
fi
# GPU status
echo "🎮 GPU Status:"
rocm-smi --showuse | grep "GPU use" | sed 's/^/ /'
# Training progress
if [ -f "$METRICS_FILE" ]; then
local lines=$(wc -l < "$METRICS_FILE")
if [ $lines -gt 1 ]; then
echo "📊 Training Progress: $((lines-1)) data points logged"
tail -2 "$METRICS_FILE" | head -1 | awk -F',' '{print " Latest hand:", $1, "Avg reward:", $2}'
else
echo "📊 Training Progress: Still initializing (header only)"
fi
fi
# Recent log activity
if [ -f "$LOG_FILE" ]; then
echo "📝 Recent Activity:"
tail -5 "$LOG_FILE" | grep -E "(INFO|Simulation time|CFR|Phase|Training Progress)" | tail -3 | sed 's/^/ /'
fi
echo
}
# Main monitoring loop
while true; do
get_training_status
sleep $MONITOR_INTERVAL
done