Skip to content

Commit f4cfb1b

Browse files
MikaelMollbergMikael Mollberg
authored andcommitted
Start one process for each collection
Change the python collection from being done in the same process to each collection being its own process. This will improve the collection in systems with many python packages that could take more than one second for each collection iteration.
1 parent 865a49b commit f4cfb1b

File tree

2 files changed

+139
-102
lines changed

2 files changed

+139
-102
lines changed

collect.sh

Lines changed: 30 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22

33
NS=1000000000
44

5-
PROCESS=$1
6-
DURATION=$2
7-
VERBOSE=${3:-0}
8-
9-
if [ -z "$PROCESS" ] || [ -z "$DURATION" ]; then
10-
echo "Usage: $0 <process_name> <duration> [verbose_flag]"
5+
PID=$1
6+
OUTPUT_FILE=$2
7+
DURATION=$3
8+
VERBOSE=${4:-0}
9+
SIGNAL_FILE=$5
10+
11+
if [ -z "$PID" ] || [ -z "$OUTPUT_FILE" ] || [ -z "$DURATION" ]; then
12+
echo "Usage: $0 <pid> <output_file> <duration> [verbose_flag] [signal_file]"
1113
exit 1
1214
fi
1315

@@ -17,104 +19,38 @@ log_verbose() {
1719
fi
1820
}
1921

20-
rm -rf data/$PROCESS
21-
mkdir data/$PROCESS
22-
23-
PY_CHECK=0
24-
25-
case "$PROCESS" in
26-
"python3")
27-
PY_CHECK=1
28-
;;
29-
"python")
30-
PY_CHECK=1
31-
;;
32-
*)
33-
PY_CHECK=0
34-
;;
35-
esac
36-
37-
NCS_CHECK=0
38-
case "$PROCESS" in
39-
"ncs.smp")
40-
NCS_CHECK=1
41-
;;
42-
*)
43-
NCS_CHECK=0
44-
;;
45-
esac
22+
OUTPUT_DIR=$(dirname "$OUTPUT_FILE")
23+
mkdir -p "$OUTPUT_DIR"
24+
25+
26+
# Wait for all collection processes to be ready before starting
27+
if [ ! -z "$SIGNAL_FILE" ]; then
28+
log_verbose "Waiting for start signal..."
29+
30+
while [ ! -f "$SIGNAL_FILE" ]; do
31+
sleep 0.1
32+
done
4633

34+
log_verbose "Start signal received. Beginning data collection for PID $PID..."
35+
fi
4736

4837
for (( i=0;i<=$DURATION;i++ ))
4938
do
5039
START_TIME=$(date +%s%N)
51-
#echo $i" second is collected"
52-
PID=$(pgrep -f $PROCESS)
53-
#PYfiles=$(ls data/python3)
54-
#UPDATEfiles=$(ps -o command -p $data | awk -F' ' '{print $9}')
55-
#Diff=$(comm <(echo $PYfiles) <(echo $UPDATEfiles))
56-
#if [ $PY_CHECK -eq 0 ]; then
57-
# PID=$(echo $PID | awk -F' ' '{print $1}')
58-
#fi
5940

6041
ALO_TOTAL=$(cat /proc/meminfo | grep 'Committed_AS' | awk -F' ' '{print $2}')
6142
Limit=$(cat /proc/meminfo | grep 'CommitLimit' | awk -F' ' '{print $2}')
6243

63-
SUM_ALO_PID=0
64-
SUM_PHY=0
65-
ALO_PID=0
66-
PHY=0
6744
TIME=$(date +%T)
68-
counter=$(wc -w <<< "$PID")
69-
70-
for pid in $PID ; do
71-
name=""
72-
com=""
73-
if [ $PY_CHECK -eq 1 ]; then
74-
name=$(ps -p $pid -o command | awk -F' ' '{print $9}')
75-
com=name
76-
else
77-
name=$PROCESS
78-
com=$(ps -p $pid -o command | awk -F' ' '{print $5}')
79-
#echo $pid" "$name " " $com " "$(ps -p $pid -o command)
80-
fi
81-
if [ ! -z "${name}" ] && [ ! -z "${com}" ] ; then
82-
name=$(echo $name)
83-
log_verbose "Monitoring PID: $pid $name"
84-
ALO_PID=$(pmap -d $pid | grep "writeable/private" | awk -F' ' '{print $4}' | egrep -o '[0-9.]+' )
85-
PHY=$(cat /proc/$pid/status | grep VmRSS | awk -F' ' '{print $2}')
86-
87-
if [ $PY_CHECK -eq 1 ] || [ $NCS_CHECK -eq 1 ] ; then
88-
re='^[0-9]+$'
89-
if [[ $ALO_PID =~ $re && $PHY =~ $re ]] ; then
90-
SUM_ALO_PID=$(($SUM_ALO_PID+$ALO_PID))
91-
SUM_PHY=$(($SUM_PHY+$PHY))
92-
fi
93-
fi
94-
fi
95-
if [ $counter -gt 1 ] ; then
96-
if [ ! -z "${name}" ] && [ ! -z "${com}" ] ; then
97-
if [ $NCS_CHECK -eq 1 ] ; then
98-
echo $TIME" "$SUM_PHY" "$SUM_ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
99-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
100-
else
101-
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
102-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
103-
fi
104-
fi
105-
else
106-
if [ ! -z "${name}" ] ; then
107-
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_"$name".log"
108-
log_verbose "$i second is collected towards data/$PROCESS/mem_$name.log"
109-
fi
110-
fi
111-
done
112-
113-
if [ $PY_CHECK -eq 1 ]; then
114-
echo $TIME" "$SUM_PHY" "$SUM_ALO_PID" "$ALO_TOTAL" "$Limit >> "data/"$PROCESS"/mem_total.log"
115-
fi
11645

117-
#echo $TIME" 0 0 0 0" >> "data/ref.log"
46+
log_verbose "Monitoring PID: $PID"
47+
ALO_PID=$(pmap -d $PID | grep "writeable/private" | awk -F' ' '{print $4}' | egrep -o '[0-9.]+' )
48+
PHY=$(cat /proc/$PID/status | grep VmRSS | awk -F' ' '{print $2}')
49+
50+
if [ ! -z "$ALO_PID" ] && [ ! -z "$PHY" ]; then
51+
echo $TIME" "$PHY" "$ALO_PID" "$ALO_TOTAL" "$Limit >> "$OUTPUT_FILE"
52+
log_verbose "$i second is collected to $OUTPUT_FILE"
53+
fi
11854

11955
END_TIME=$(date +%s%N)
12056
ELAPSED=$(($END_TIME - $START_TIME))
@@ -126,4 +62,4 @@ do
12662
fi
12763
done
12864

129-
echo "Collection for $PROCESS done"
65+
echo "Collection for PID $PID done"

plot.sh

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,49 @@
33
VERBOSE=0
44
DURATION=""
55

6+
create_combined_python_log() {
7+
echo "Creating combined Python total log..."
8+
9+
if ! ls data/python3/mem_*.log >/dev/null 2>&1; then
10+
echo "No Python process logs found to combine"
11+
return
12+
fi
13+
14+
num_files=$(find data/python3 -name "mem_*.log" ! -name "mem_total.log" | wc -l)
15+
16+
find data/python3 -name "mem_*.log" ! -name "mem_total.log" -exec cat {} \; | awk -v num_files="$num_files" '
17+
{
18+
timestamp = $1
19+
rss = $2
20+
alloc = $3
21+
system_total = $4
22+
limit = $5
23+
24+
totals[timestamp,"rss"] += rss
25+
totals[timestamp,"alloc"] += alloc
26+
totals[timestamp,"system"] = system_total
27+
totals[timestamp,"limit"] = limit
28+
29+
count[timestamp]++
30+
31+
if (!(timestamp in seen)) {
32+
timestamps[++ts_count] = timestamp
33+
seen[timestamp] = 1
34+
}
35+
}
36+
END {
37+
for (i = 1; i <= ts_count; i++) {
38+
ts = timestamps[i]
39+
if (count[ts] == num_files) {
40+
print ts, totals[ts,"rss"], totals[ts,"alloc"], totals[ts,"system"], totals[ts,"limit"]
41+
}
42+
}
43+
}
44+
' | sort > data/python3/mem_total.log
45+
46+
echo "Combined Python total log created"
47+
}
48+
649
# Parse arguments
750
while [[ $# -gt 0 ]]; do
851
case $1 in
@@ -24,12 +67,65 @@ if [ -z "$DURATION" ]; then
2467
exit 1
2568
fi
2669

70+
# Clean up old data
71+
rm -rf data/ncs.smp data/NcsJVMLauncher data/python3
72+
2773
echo "====================================== Collection for for all process ====================================================="
28-
bash collect.sh ncs.smp $DURATION $VERBOSE &
29-
bash collect.sh NcsJVMLauncher $DURATION $VERBOSE &
30-
bash collect.sh python3 $DURATION $VERBOSE &
74+
75+
# Create a signal file to coordinate process startup
76+
SIGNAL_FILE="/tmp/nso_collect_start_signal_$$"
77+
rm -f "$SIGNAL_FILE"
78+
79+
# Find and collect for each process type
80+
echo "Starting collection processes..."
81+
82+
# Collect ncs.smp or beam.smp NSO process
83+
NCS_PID=$(pgrep -f "\.smp.*-ncs true")
84+
if [ ! -z "$NCS_PID" ]; then
85+
echo "Starting collection for ncs.smp PID $NCS_PID"
86+
bash collect.sh $NCS_PID "data/ncs.smp/mem_ncs.smp.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
87+
fi
88+
89+
# Collect NcsJVMLauncher process
90+
JVM_PID=$(pgrep -f NcsJVMLauncher)
91+
if [ ! -z "$JVM_PID" ]; then
92+
echo "Starting collection for NcsJVMLauncher PID $JVM_PID"
93+
bash collect.sh $JVM_PID "data/NcsJVMLauncher/mem_NcsJVMLauncher.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
94+
fi
95+
96+
# Collect Python processes
97+
PYTHON_PIDS=$(pgrep -f "python.* .*startup\.py")
98+
if [ ! -z "$PYTHON_PIDS" ]; then
99+
mkdir -p data/python3
100+
for pid in $PYTHON_PIDS; do
101+
PYTHON_SCRIPT=$(ps -p $pid -o command | tail -n 1 | awk -F' ' '{print $9}')
102+
SCRIPT_NAME=$(basename "$PYTHON_SCRIPT" .py 2>/dev/null || echo "python_$pid")
103+
if [ ! -z "$PYTHON_SCRIPT" ]; then
104+
echo "Starting collection for Python process PID $pid: $SCRIPT_NAME"
105+
bash collect.sh $pid "data/python3/mem_$SCRIPT_NAME.log" $DURATION $VERBOSE "$SIGNAL_FILE" &
106+
fi
107+
done
108+
else
109+
echo "No Python processes found to collect"
110+
fi
111+
112+
# Give a moment for all processes to register
113+
sleep 1
114+
115+
# Signal all processes to start collecting
116+
echo "All collection processes started. Signaling to begin data collection..."
117+
touch "$SIGNAL_FILE"
118+
31119
wait
32-
echo -e "===================================== Collection for for all process done =================================================\n\n"
120+
121+
# Clean up signal file
122+
rm -f "$SIGNAL_FILE"
123+
124+
if [ ! -z "$PYTHON_PIDS" ]; then
125+
create_combined_python_log
126+
fi
127+
128+
echo "===================================== Collection for for all process done ================================================="
33129

34130

35131
echo "====================================== Ploting graph to all process ========================================================"
@@ -38,10 +134,15 @@ bash graphs.sh ncs.smp $VERBOSE
38134
echo -e "===================================== Ploting graph for ncs.smp process done =================================================\n"
39135
echo "====================================== Ploting graph for NcsJVMLauncher process ========================================================"
40136
bash graphs.sh NcsJVMLauncher $VERBOSE
41-
echo -e "===================================== Ploting graph for NcsJVMLauncher process done =================================================\n"
42-
echo "====================================== Ploting graph for python3 process ========================================================"
43-
bash graphs.sh python3 $VERBOSE
44-
echo -e "===================================== Ploting graph for python3 process done =================================================\n"
137+
echo "===================================== Ploting graph for NcsJVMLauncher process done =================================================\n"
138+
echo "====================================== Ploting graph for python3 processes ========================================================\n"
139+
if [ -d "data/python3" ]; then
140+
echo "Plotting combined graph for python3 processes"
141+
bash graphs.sh python3 $VERBOSE
142+
else
143+
echo "No python3 data directory found"
144+
fi
145+
echo "===================================== Ploting graph for python3 processes done =================================================\n"
45146
echo "====================================== Ploting graph to compare between process ========================================================"
46147
bash graphs_compare.sh $VERBOSE
47148
echo -e "====================================== Ploting graph to compare between process done ========================================================\n"

0 commit comments

Comments
 (0)