Skip to content

Commit f6a02de

Browse files
author
Qi Li (leeli4)
committed
change to centralize controller design
1 parent e2c77f0 commit f6a02de

File tree

3 files changed

+57
-40
lines changed

3 files changed

+57
-40
lines changed

collect.sh

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ OUTPUT_FILE=$2
77
DURATION=$3
88
VERBOSE=${4:-0}
99
SIGNAL_FILE=$5
10+
SIGNALBACK_FILE="/tmp/signalback/nso_collect_start_signalback_$$"
11+
1012

1113
if [ -z "$PID" ] || [ -z "$OUTPUT_FILE" ] || [ -z "$DURATION" ]; then
1214
echo "Usage: $0 <pid> <output_file> <duration> [verbose_flag] [signal_file]"
@@ -30,21 +32,30 @@ OUTPUT_DIR=$(dirname "$OUTPUT_FILE")
3032
mkdir -p "$OUTPUT_DIR"
3133

3234

33-
# Wait for all collection processes to be ready before starting
34-
if [ ! -z "$SIGNAL_FILE" ]; then
35-
log_verbose "Waiting for start signal..."
36-
37-
while [ ! -f "$SIGNAL_FILE" ]; do
38-
sleep 0.1
39-
done
40-
41-
log_verbose "Start signal received. Beginning data collection for PID $PID..."
42-
fi
43-
44-
# for (( i=0;i<=$DURATION;i++ ))
45-
# do
46-
# START_TIME=$(date +%s%N)
4735

36+
37+
for (( i=0;i<=$DURATION;i++ ))
38+
do
39+
40+
# Wait for centralized controller signal
41+
if [ ! -z "$SIGNAL_FILE" ]; then
42+
log_verbose "Waiting for start signal..."
43+
#echo "Waiting for start signal..."
44+
while true; do
45+
if [ ! -f "$SIGNAL_FILE" ]; then
46+
sleep 0.1
47+
elif [[ $(cat $SIGNAL_FILE) -ne $i ]];then
48+
sleep 0.1
49+
else
50+
break
51+
fi
52+
done
53+
54+
55+
log_verbose "Start signal received. Beginning data collection for PID $PID..."
56+
fi
57+
rm -f $SIGNALBACK_FILE
58+
# Tick!
4859
ALO_TOTAL=$(cat /proc/meminfo | grep 'Committed_AS' | awk -F' ' '{print $2}')
4960
Limit=$(cat /proc/meminfo | grep 'CommitLimit' | awk -F' ' '{print $2}')
5061

@@ -58,15 +69,9 @@ fi
5869
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "$OUTPUT_FILE"
5970
log_verbose "$i second is collected to $OUTPUT_FILE"
6071
fi
72+
touch $SIGNALBACK_FILE
6173

62-
# END_TIME=$(date +%s%N)
63-
# ELAPSED=$(($END_TIME - $START_TIME))
64-
# SLEEP_TIME=$(($NS - $ELAPSED))
6574

66-
# if (( SLEEP_TIME > 0 )); then
67-
# SLEEP_SECONDS=$(awk "BEGIN {printf \"%.9f\", $SLEEP_TIME/$NS}")
68-
# sleep $SLEEP_SECONDS
69-
# fi
70-
# done
75+
done
7176

72-
#echo "Collection for PID $PID done"
77+
log_verbose "Collection for PID $PID done"

compare_mem_alloc.plt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ set format x '%H:%M:%S'
5656

5757
show style line
5858

59-
plot "data/python3/mem_ncs.smp.log" using 1:5 with lines axes x1y1 lc 'red' lw 2 title "CommitLimit", \
60-
"data/python3/mem_ncs.smp.log" using 1:4 with lines axes x1y1 title "Commited_AS", \
59+
plot "data/ncs.smp/mem_ncs.smp.log" using 1:5 with lines axes x1y1 lc 'red' lw 2 title "CommitLimit", \
60+
"data/ncs.smp/mem_ncs.smp.log" using 1:4 with lines axes x1y1 title "Commited_AS", \
6161
"data/ncs.smp/mem_ncs.smp.log" using 1:3 with lines axes x1y1 title "ncs.smp", \
6262
"data/python3/mem_total.log" using 1:3 with lines axes x1y1 title "PythonVM(Total)", \
6363
"data/NcsJVMLauncher/mem_NcsJVMLauncher.log" using 1:3 with lines axes x1y1 title "JavaVM"

plot.sh

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,33 +76,39 @@ echo "====================================== Collection for for all process ====
7676
SIGNAL_FILE="/tmp/nso_collect_start_signal_$$"
7777
rm -f "$SIGNAL_FILE"
7878

79+
rm -rf "/tmp/signalback"
80+
mkdir "/tmp/signalback"
81+
7982
# Find and collect for each process type
8083
echo "Starting collection processes..."
8184

8285
NS=1000000000
86+
counter=0
8387

8488
mkdir -p data/python3
8589
for (( i=0;i<=$DURATION;i++ ))
8690
do
8791
START_TIME=$(date +%s%N)
8892
PYTHON_PIDS=$(pgrep -f "python.* .*startup\.py")
89-
JVM_PID=$(pgrep -f NcsJVMLauncher)
93+
JVM_PID=$(pgrep -f com.tailf.ncs.NcsJVMLauncher)
9094
NCS_PID=$(pgrep -f "\.smp.*-ncs true")
9195

9296
# Collect ncs.smp or beam.smp NSO process
9397
if [ ! -z "$NCS_PID" ]; then
94-
#echo "Starting collection for ncs.smp PID $NCS_PID"
9598
COLLECT_PIDS=$(pgrep -f ".*collect.sh.* $NCS_PID")
9699
if [ -z "$COLLECT_PIDS" ]; then
100+
echo "New ncs.smp process PID $NCS_PID: ncs.smp. Start Collection"
101+
counter=$((counter+1))
97102
bash collect.sh $NCS_PID "data/ncs.smp/mem_ncs.smp.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
98103
fi
99104
fi
100105

101106
# Collect NcsJVMLauncher process
102107
if [ ! -z "$JVM_PID" ]; then
103-
#echo "Starting collection for NcsJVMLauncher PID $JVM_PID"
104108
COLLECT_PIDS=$(pgrep -f ".*collect.sh.* $JVM_PID")
105109
if [ -z "$COLLECT_PIDS" ]; then
110+
echo "New JVM process PID $JVM_PID: NcsJVMLauncher. Start Collection"
111+
counter=$((counter+1))
106112
bash collect.sh $JVM_PID "data/NcsJVMLauncher/mem_NcsJVMLauncher.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
107113
fi
108114
fi
@@ -112,19 +118,16 @@ do
112118
for pid in $PYTHON_PIDS; do
113119
COLLECT_PIDS=$(pgrep -f ".*collect.sh.* $pid")
114120
if [ -z "$COLLECT_PIDS" ]; then
115-
#echo "Not Found Collection Process for Python process PID $pid. Spwaning new Collection Process."
116121
PYTHON_SCRIPT=$(ps -p $pid -o command | tail -n 1 | awk -F' ' '{print $9}')
117122
SCRIPT_NAME=$(basename "$PYTHON_SCRIPT" .py 2>/dev/null || echo "python_$pid")
118123
if [ ! -z "$PYTHON_SCRIPT" ]; then
119-
#echo "Starting collection for Python process PID $pid: $SCRIPT_NAME"
124+
echo "New Python process PID $pid: $SCRIPT_NAME. Start Collection"
125+
counter=$((counter+1))
120126
bash collect.sh $pid "data/python3/mem_$SCRIPT_NAME.log" $DURATION-$i $VERBOSE "$SIGNAL_FILE" &
121127
fi
122-
# else
123-
# echo "Collection Process already running for Python process PID $pid"
128+
124129
fi
125130
done
126-
# else
127-
# echo "No Python processes found to collect. for second $i"
128131
fi
129132

130133

@@ -139,17 +142,26 @@ do
139142
fi
140143

141144
# Signal all processes to start collecting
142-
touch "$SIGNAL_FILE"
143-
wait
144-
echo -ne "Data Collection - $i second out of $DURATION second"\\r
145+
echo "$i" > $SIGNAL_FILE
145146

147+
while [[ $(ps -aux | grep "collect.sh" | wc -l) -gt $(($(ls "/tmp/signalback" | wc -l)+1)) ]]; do
148+
149+
sleep 0.1
150+
done
146151
# Clean up signal file
147152
rm -f "$SIGNAL_FILE"
148-
pkill -f collect.sh
149-
# # Give a moment for all processes to register
150-
# sleep 1
153+
154+
rm -rf "/tmp/signalback"
155+
mkdir "/tmp/signalback"
156+
157+
echo -ne "Data Collection - $i second out of $DURATION second"\\r
158+
159+
151160
done
152161

162+
wait
163+
pkill -f collect.sh
164+
rm -rf "/tmp/signalback"
153165
echo ""
154166
echo "Data Collection - OK!"
155167

0 commit comments

Comments
 (0)