Skip to content

Commit 98c100a

Browse files
Merge pull request #5 from MikaelMollberg/one-collect-per-process
Start one process for each collection. Highly decrease the collection variance for PythonVM.
2 parents 4bbe6af + f4cfb1b commit 98c100a

File tree

3 files changed

+140
-103
lines changed

3 files changed

+140
-103
lines changed

collect.sh

Lines changed: 30 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
NS=1000000000
44

5-
PROCESS=$1
6-
DURATION=$2
7-
VERBOSE=${3:-0}
8-
9-
if [ -z "$PROCESS" ] || [ -z "$DURATION" ]; then
10-
echo "Usage: $0 <process_name> <duration> [verbose_flag]"
5+
PID=$1
6+
OUTPUT_FILE=$2
7+
DURATION=$3
8+
VERBOSE=${4:-0}
9+
SIGNAL_FILE=$5
10+
11+
if [ -z "$PID" ] || [ -z "$OUTPUT_FILE" ] || [ -z "$DURATION" ]; then
12+
echo "Usage: $0 <pid> <output_file> <duration> [verbose_flag] [signal_file]"
1113
exit 1
1214
fi
1315

@@ -17,104 +19,38 @@ log_verbose() {
1719
fi
1820
}
1921

20-
rm -rf data/$PROCESS
21-
mkdir data/$PROCESS
22-
23-
PY_CHECK=0
24-
25-
case "$PROCESS" in
26-
"python3")
27-
PY_CHECK=1
28-
;;
29-
"python")
30-
PY_CHECK=1
31-
;;
32-
*)
33-
PY_CHECK=0
34-
;;
35-
esac
36-
37-
NCS_CHECK=0
38-
case "$PROCESS" in
39-
"ncs.smp")
40-
NCS_CHECK=1
41-
;;
42-
*)
43-
NCS_CHECK=0
44-
;;
45-
esac
22+
OUTPUT_DIR=$(dirname "$OUTPUT_FILE")
23+
mkdir -p "$OUTPUT_DIR"
24+
25+
26+
# Wait for all collection processes to be ready before starting
27+
if [ ! -z "$SIGNAL_FILE" ]; then
28+
log_verbose "Waiting for start signal..."
29+
30+
while [ ! -f "$SIGNAL_FILE" ]; do
31+
sleep 0.1
32+
done
4633

34+
log_verbose "Start signal received. Beginning data collection for PID $PID..."
35+
fi
4736

4837
for (( i=0;i<=$DURATION;i++ ))
4938
do
5039
START_TIME=$(date +%s%N)
51-
#echo $i" second is collected"
52-
PID=$(pgrep -f $PROCESS)
53-
#PYfiles=$(ls data/python3)
54-
#UPDATEfiles=$(ps -o command -p $data | awk -F' ' '{print $9}')
55-
#Diff=$(comm <(echo $PYfiles) <(echo $UPDATEfiles))
56-
#if [ $PY_CHECK -eq 0 ]; then
57-
# PID=$(echo $PID | awk -F' ' '{print $1}')
58-
#fi
5940

6041
ALO_TOTAL=$(cat /proc/meminfo | grep 'Committed_AS' | awk -F' ' '{print $2}')
6142
Limit=$(cat /proc/meminfo | grep 'CommitLimit' | awk -F' ' '{print $2}')
6243

63-
SUM_ALO_PID=0
64-
SUM_PHY=0
65-
ALO_PID=0
66-
PHY=0
6744
TIME=$(date +%T)
68-
counter=$(wc -w <<< "$PID")
69-
70-
for pid in $PID ; do
71-
name=""
72-
com=""
73-
if [ $PY_CHECK -eq 1 ]; then
74-
name=$(ps -p $pid -o command | awk -F' ' '{print $9}')
75-
com=name
76-
else
77-
name=$PROCESS
78-
com=$(ps -p $pid -o command | awk -F' ' '{print $5}')
79-
#echo $pid" "$name " " $com " "$(ps -p $pid -o command)
80-
fi
81-
if [ ! -z "${name}" ] && [ ! -z "${com}" ] ; then
82-
name=$(echo $name)
83-
log_verbose "Monitoring PID: $pid $name"
84-
ALO_PID=$(pmap -d $pid | grep "writeable/private" | awk -F' ' '{print $4}' | egrep -o '[0-9.]+' )
85-
PHY=$(cat /proc/$pid/status | grep VmRSS | awk -F' ' '{print $2}')
86-
87-
if [ $PY_CHECK -eq 1 ] || [ $NCS_CHECK -eq 1 ] ; then
88-
re='^[0-9]+$'
89-
if [[ $ALO_PID =~ $re && $PHY =~ $re ]] ; then
90-
SUM_ALO_PID=$(($SUM_ALO_PID+$ALO_PID))
91-
SUM_PHY=$(($SUM_PHY+$PHY))
92-
fi
93-
fi
94-
fi
95-
if [ $counter -gt 1 ] ; then
96-
if [ ! -z "${name}" ] && [ ! -z "${com}" ] ; then
97-
if [ $NCS_CHECK -eq 1 ] ; then
98-
echo $TIME" "$SUM_PHY" "$SUM_ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
99-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
100-
else
101-
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
102-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
103-
fi
104-
fi
105-
else
106-
if [ ! -z "${name}" ] ; then
107-
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
108-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
109-
fi
110-
fi
111-
done
112-
113-
if [ $PY_CHECK -eq 1 ]; then
114-
echo $TIME" "$SUM_PHY" "$SUM_ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_total.log"
115-
fi
11645

117-
#echo $TIME" 0 0 0 0" >> "data/ref.log"
46+
log_verbose "Monitoring PID: $PID"
47+
ALO_PID=$(pmap -d $PID | grep "writeable/private" | awk -F' ' '{print $4}' | egrep -o '[0-9.]+' )
48+
PHY=$(cat /proc/$PID/status | grep VmRSS | awk -F' ' '{print $2}')
49+
50+
if [ ! -z "$ALO_PID" ] && [ ! -z "$PHY" ]; then
51+
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "$OUTPUT_FILE"
52+
log_verbose "$i second is collected to $OUTPUT_FILE"
53+
fi
11854

11955
END_TIME=$(date +%s%N)
12056
ELAPSED=$(($END_TIME - $START_TIME))
@@ -126,4 +62,4 @@ do
12662
fi
12763
done
12864

129-
echo "Collection for $PROCESS done"
65+
echo "Collection for PID $PID done"

graphs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ mkdir graphs/$1
2828

2929
for filename in data/$1/*.log; do
3030
if [ $PY_CHECK -eq 1 ]; then
31-
name=$(echo $filename | awk -F'_' '{print $2}'| awk -F'.' '{print $1}')
31+
name=$(echo $filename | awk -F'/' '{print $NF}' | awk -F'mem_|\.log' '{print $2}')
3232
else
3333
name=$1
3434
fi

plot.sh

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,49 @@
33
VERBOSE=0
44
DURATION=""
55

6+
create_combined_python_log() {
7+
echo "Creating combined Python total log..."
8+
9+
if ! ls data/python3/mem_*.log >/dev/null 2>&1; then
10+
echo "No Python process logs found to combine"
11+
return
12+
fi
13+
14+
num_files=$(find data/python3 -name "mem_*.log" ! -name "mem_total.log" | wc -l)
15+
16+
find data/python3 -name "mem_*.log" ! -name "mem_total.log" -exec cat {} \; | awk -v num_files="$num_files" '
17+
{
18+
timestamp = $1
19+
rss = $2
20+
alloc = $3
21+
system_total = $4
22+
limit = $5
23+
24+
totals[timestamp,"rss"] += rss
25+
totals[timestamp,"alloc"] += alloc
26+
totals[timestamp,"system"] = system_total
27+
totals[timestamp,"limit"] = limit
28+
29+
count[timestamp]++
30+
31+
if (!(timestamp in seen)) {
32+
timestamps[++ts_count] = timestamp
33+
seen[timestamp] = 1
34+
}
35+
}
36+
END {
37+
for (i = 1; i <= ts_count; i++) {
38+
ts = timestamps[i]
39+
if (count[ts] == num_files) {
40+
print ts, totals[ts,"rss"], totals[ts,"alloc"], totals[ts,"system"], totals[ts,"limit"]
41+
}
42+
}
43+
}
44+
' | sort > data/python3/mem_total.log
45+
46+
echo "Combined Python total log created"
47+
}
48+
649
# Parse arguments
750
while [[ $# -gt 0 ]]; do
851
case $1 in
@@ -24,12 +67,65 @@ if [ -z "$DURATION" ]; then
2467
exit 1
2568
fi
2669

70+
# Clean up old data
71+
rm -rf data/ncs.smp data/NcsJVMLauncher data/python3
72+
2773
echo "====================================== Collection for for all process ====================================================="
28-
bash collect.sh ncs.smp $DURATION $VERBOSE &
29-
bash collect.sh NcsJVMLauncher $DURATION $VERBOSE &
30-
bash collect.sh python3 $DURATION $VERBOSE &
74+
75+
# Create a signal file to coordinate process startup
76+
SIGNAL_FILE="/tmp/nso_collect_start_signal_$$"
77+
rm -f "$SIGNAL_FILE"
78+
79+
# Find and collect for each process type
80+
echo "Starting collection processes..."
81+
82+
# Collect ncs.smp or beam.smp NSO process
83+
NCS_PID=$(pgrep -f "\.smp.*-ncs true")
84+
if [ ! -z "$NCS_PID" ]; then
85+
echo "Starting collection for ncs.smp PID $NCS_PID"
86+
bash collect.sh $NCS_PID "data/ncs.smp/mem_ncs.smp.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
87+
fi
88+
89+
# Collect NcsJVMLauncher process
90+
JVM_PID=$(pgrep -f NcsJVMLauncher)
91+
if [ ! -z "$JVM_PID" ]; then
92+
echo "Starting collection for NcsJVMLauncher PID $JVM_PID"
93+
bash collect.sh $JVM_PID "data/NcsJVMLauncher/mem_NcsJVMLauncher.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
94+
fi
95+
96+
# Collect Python processes
97+
PYTHON_PIDS=$(pgrep -f "python.* .*startup\.py")
98+
if [ ! -z "$PYTHON_PIDS" ]; then
99+
mkdir -p data/python3
100+
for pid in $PYTHON_PIDS; do
101+
PYTHON_SCRIPT=$(ps -p $pid -o command | tail -n 1 | awk -F' ' '{print $9}')
102+
SCRIPT_NAME=$(basename "$PYTHON_SCRIPT" .py 2>/dev/null || echo "python_$pid")
103+
if [ ! -z "$PYTHON_SCRIPT" ]; then
104+
echo "Starting collection for Python process PID $pid: $SCRIPT_NAME"
105+
bash collect.sh $pid "data/python3/mem_$SCRIPT_NAME.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
106+
fi
107+
done
108+
else
109+
echo "No Python processes found to collect"
110+
fi
111+
112+
# Give a moment for all processes to register
113+
sleep 1
114+
115+
# Signal all processes to start collecting
116+
echo "All collection processes started. Signaling to begin data collection..."
117+
touch "$SIGNAL_FILE"
118+
31119
wait
32-
echo -e "===================================== Collection for for all process done =================================================\n\n"
120+
121+
# Clean up signal file
122+
rm -f "$SIGNAL_FILE"
123+
124+
if [ ! -z "$PYTHON_PIDS" ]; then
125+
create_combined_python_log
126+
fi
127+
128+
echo "===================================== Collection for for all process done ================================================="
33129

34130

35131
echo "====================================== Ploting graph to all process ========================================================"
@@ -38,10 +134,15 @@ bash graphs.sh ncs.smp $VERBOSE
38134
echo -e "===================================== Ploting graph for ncs.smp process done =================================================\n"
39135
echo "====================================== Ploting graph for NcsJVMLauncher process ========================================================"
40136
bash graphs.sh NcsJVMLauncher $VERBOSE
41-
echo -e "===================================== Ploting graph for NcsJVMLauncher process done =================================================\n"
42-
echo "====================================== Ploting graph for python3 process ========================================================"
43-
bash graphs.sh python3 $VERBOSE
44-
echo -e "===================================== Ploting graph for python3 process done =================================================\n"
137+
echo "===================================== Ploting graph for NcsJVMLauncher process done =================================================\n"
138+
echo "====================================== Ploting graph for python3 processes ========================================================\n"
139+
if [ -d "data/python3" ]; then
140+
echo "Plotting combined graph for python3 processes"
141+
bash graphs.sh python3 $VERBOSE
142+
else
143+
echo "No python3 data directory found"
144+
fi
145+
echo "===================================== Ploting graph for python3 processes done =================================================\n"
45146
echo "====================================== Ploting graph to compare between process ========================================================"
46147
bash graphs_compare.sh $VERBOSE
47148
echo -e "====================================== Ploting graph to compare between process done ========================================================\n"

0 commit comments

Comments
 (0)