Skip to content

Commit 881ce71

Browse files
authored
Much faster memleak_translate.sh (#4604)
## Motivation The current version of the script uses `addr2line`, which is incredibly slow. ## Proposal Use `llvm-symbolizer` instead, which is much faster, and cache the translations so we don't translate the same address twice. ## Test Plan Tested this while debugging the GoL backend memory leak, and it's way way faster ## Release Plan - Nothing to do / These changes follow the usual release cycle.
1 parent a45630c commit 881ce71

File tree

1 file changed

+100
-17
lines changed

1 file changed

+100
-17
lines changed

scripts/memleak_translate.sh

Lines changed: 100 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,125 @@
22

33
# memleak_translate.sh <PID>
44
#
5-
# Works with the default `memleak-libbpf`/`memleak` output that looks like
5+
# IMPORTANT: This script is for FILE PROCESSING ONLY - NOT for real-time translation.
6+
# It reads the ENTIRE input into memory first, then translates all addresses
7+
# in a single batch for maximum performance.
8+
#
9+
# Usage:
10+
# 1. Save memleak output to a file: memleak -p <PID> > memleak_output.txt
11+
# 2. Translate the file: cat memleak_output.txt | ./memleak_translate.sh <PID>
12+
#
13+
# Works with the default `memleak-libbpf`/`memleak` output that looks like:
614
# 0 [<0000aaaaeac54784>] [/linera-server]
7-
# and replaces the address line with the addr2line translated result, which should be the function
8-
# name, file name and line number.
915
#
10-
# add2line takes a while to execute, so it is recommended to run `memleak` for a while, while
11-
# redirecting the output to a file. Then inspect the file for what you want (maybe only the last
12-
# few lines of the output), and then run this script while the binary is still running, from within
13-
# the same container.
16+
# Requires llvm-symbolizer to be installed. Makes exactly ONE call to
17+
# llvm-symbolizer for ALL unique addresses.
1418

1519
set -euo pipefail
1620

1721
[[ $# -eq 1 ]] || { echo "usage: $0 <pid>" >&2; exit 1; }
1822

23+
# Check that llvm-symbolizer is available
24+
if ! command -v llvm-symbolizer &> /dev/null; then
25+
echo "Error: llvm-symbolizer not found. Please install it first." >&2
26+
echo " Ubuntu/Debian: apt-get install llvm" >&2
27+
echo " macOS: brew install llvm" >&2
28+
exit 1
29+
fi
30+
1931
PID=$1
2032
exe="/proc/$PID/exe"
2133

2234
BASE=$(awk -v bin="$(readlink -f /proc/$PID/exe)" '$NF==bin { split($1,a,"-"); print "0x"a[1]; exit }' /proc/$PID/maps)
2335

2436
[[ -n $BASE ]] || { echo "could not find base address for $exe" >&2; exit 1; }
2537

26-
# helper: translate <$addr> using addr2line, compensating for PIE base
27-
translate () {
28-
local addr=$1
29-
addr=${addr//[<>[\]]}
30-
[[ $addr != 0x* ]] && addr="0x${addr}"
31-
local rel=$(printf "0x%x" $(( addr - BASE )))
32-
addr2line -e "$exe" -f -C -p "$rel" 2>/dev/null || echo "$addr"
33-
}
38+
# Read entire input into array
39+
mapfile -t LINES
40+
41+
# Extract all unique addresses
42+
declare -A UNIQUE_ADDRS
43+
declare -a ADDR_ORDER
44+
45+
for line in "${LINES[@]}"; do
46+
if [[ $line =~ \[\<([0-9a-fA-F]+)\>\] ]]; then
47+
addr="${BASH_REMATCH[1]}"
48+
[[ $addr != 0x* ]] && addr="0x${addr}"
49+
if [[ -z "${UNIQUE_ADDRS[$addr]:-}" ]]; then
50+
UNIQUE_ADDRS[$addr]=1
51+
ADDR_ORDER+=("$addr")
52+
fi
53+
fi
54+
done
55+
56+
# If no addresses found, just output the original lines
57+
if [[ ${#ADDR_ORDER[@]} -eq 0 ]]; then
58+
printf "%s\n" "${LINES[@]}"
59+
exit 0
60+
fi
61+
62+
# Convert all addresses to relative offsets
63+
declare -a REL_ADDRS
64+
for addr in "${ADDR_ORDER[@]}"; do
65+
rel=$(printf "0x%x" $(( addr - BASE )))
66+
REL_ADDRS+=("$rel")
67+
done
68+
69+
# Translate ALL addresses in ONE llvm-symbolizer call
70+
declare -A TRANSLATIONS
71+
72+
# Run llvm-symbolizer
73+
if ! SYMBOLS=$(printf "%s\n" "${REL_ADDRS[@]}" | llvm-symbolizer -e "$exe" --demangle --functions=linkage --inlining=false 2>&1); then
74+
echo "Error: llvm-symbolizer failed to run" >&2
75+
echo "$SYMBOLS" >&2
76+
exit 1
77+
fi
78+
79+
# Parse output - llvm-symbolizer outputs exactly 3 lines per address:
80+
# Line 1: function name, Line 2: file path and line, Line 3: empty line
81+
readarray -t SYMBOL_LINES <<< "$SYMBOLS"
82+
83+
for ((i=0; i<${#ADDR_ORDER[@]}; i++)); do
84+
addr="${ADDR_ORDER[$i]}"
85+
line_base=$((i * 3))
86+
87+
if [[ $line_base -lt ${#SYMBOL_LINES[@]} ]]; then
88+
func_line="${SYMBOL_LINES[$line_base]}"
89+
else
90+
func_line=""
91+
fi
92+
93+
if [[ $((line_base + 1)) -lt ${#SYMBOL_LINES[@]} ]]; then
94+
loc_line="${SYMBOL_LINES[$((line_base + 1))]}"
95+
else
96+
loc_line=""
97+
fi
98+
99+
translation="${func_line} ${loc_line}"
100+
translation=$(echo "$translation" | sed 's/ */ /g' | sed 's/ $//')
101+
102+
if [[ -z "$translation" || "$translation" =~ ^"?? " ]]; then
103+
translation="$addr"
104+
fi
105+
106+
TRANSLATIONS[$addr]="$translation"
107+
done
108+
109+
# Ensure all addresses have translations
110+
for addr in "${ADDR_ORDER[@]}"; do
111+
if [[ -z "${TRANSLATIONS[$addr]:-}" ]]; then
112+
TRANSLATIONS[$addr]="$addr"
113+
fi
114+
done
34115

35-
while IFS= read -r line; do
116+
# Output all lines with translations applied
117+
for line in "${LINES[@]}"; do
36118
if [[ $line =~ ([0-9]+)\ \[\<([0-9a-fA-F]+)\>\]\ \[([^]]+)\] ]]; then
37119
stack_pos="${BASH_REMATCH[1]}"
38120
addr="${BASH_REMATCH[2]}"
39121
origin="${BASH_REMATCH[3]}"
40-
echo " $stack_pos $(translate "$addr") [$origin]"
122+
[[ $addr != 0x* ]] && addr="0x${addr}"
123+
echo " $stack_pos ${TRANSLATIONS[$addr]} [$origin]"
41124
else
42125
echo "$line"
43126
fi

0 commit comments

Comments
 (0)