Skip to content

Commit dd71ae1

Browse files
committed
Slim down stack frames
This reduces the stack frame size of mp_builtin___import__ by limiting the support path length of files from 256 to 96. This function can be called recursively for nested imports so it adds up. Also reduce mp_execute_bytecode (vm.c) from 206 a bc call to 124. This too is recursive and adds up. It is reduced by preventing some inlining. It may decrease performance slightly when importing and unpacking. Adds two new scripts for debugging. One is used from gdb to print frame sizes in a backtrace. The other prints what pcs use a particular stack offset. This helps find infrequently used stack space. Fixes #8053.
1 parent 475ffc3 commit dd71ae1

File tree

4 files changed

+99
-7
lines changed

4 files changed

+99
-7
lines changed

py/circuitpy_mpconfig.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ extern void common_hal_mcu_enable_interrupts(void);
5959
//
6060
// default is 128; consider raising to reduce fragmentation.
6161
#define MICROPY_ALLOC_PARSE_CHUNK_INIT (16)
62-
// default is 512.
63-
#define MICROPY_ALLOC_PATH_MAX (256)
62+
// default is 512. Longest path in .py bundle as of June 6th, 2023 is 73 characters.
63+
#define MICROPY_ALLOC_PATH_MAX (96)
6464
#define MICROPY_CAN_OVERRIDE_BUILTINS (1)
6565
#define MICROPY_COMP_CONST (1)
6666
#define MICROPY_COMP_DOUBLE_TUPLE_ASSIGN (1)

py/runtime.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) {
201201
return elem->value;
202202
}
203203

204-
mp_obj_t mp_load_build_class(void) {
204+
mp_obj_t __attribute__((noinline)) mp_load_build_class(void) {
205205
DEBUG_OP_printf("load_build_class\n");
206206
#if MICROPY_CAN_OVERRIDE_BUILTINS
207207
if (MP_STATE_VM(mp_module_builtins_override_dict) != NULL) {
@@ -858,7 +858,7 @@ mp_obj_t mp_call_method_n_kw_var(bool have_self, size_t n_args_n_kw, const mp_ob
858858
}
859859

860860
// unpacked items are stored in reverse order into the array pointed to by items
861-
void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
861+
void __attribute__((noinline,)) mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
862862
size_t seq_len;
863863
if (mp_obj_is_type(seq_in, &mp_type_tuple) || mp_obj_is_type(seq_in, &mp_type_list)) {
864864
mp_obj_t *seq_items;
@@ -905,7 +905,7 @@ void mp_unpack_sequence(mp_obj_t seq_in, size_t num, mp_obj_t *items) {
905905
}
906906

907907
// unpacked items are stored in reverse order into the array pointed to by items
908-
void mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
908+
void __attribute__((noinline)) mp_unpack_ex(mp_obj_t seq_in, size_t num_in, mp_obj_t *items) {
909909
size_t num_left = num_in & 0xff;
910910
size_t num_right = (num_in >> 8) & 0xff;
911911
DEBUG_OP_printf("unpack ex " UINT_FMT " " UINT_FMT "\n", num_left, num_right);
@@ -1482,7 +1482,7 @@ mp_obj_t mp_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) {
14821482
return mp_builtin___import__(5, args);
14831483
}
14841484

1485-
mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
1485+
mp_obj_t __attribute__((noinline,)) mp_import_from(mp_obj_t module, qstr name) {
14861486
DEBUG_printf("import from %p %s\n", module, qstr_str(name));
14871487

14881488
mp_obj_t dest[2];
@@ -1528,7 +1528,7 @@ mp_obj_t mp_import_from(mp_obj_t module, qstr name) {
15281528
#endif
15291529
}
15301530

1531-
void mp_import_all(mp_obj_t module) {
1531+
void __attribute__((noinline)) mp_import_all(mp_obj_t module) {
15321532
DEBUG_printf("import all %p\n", module);
15331533

15341534
// TODO: Support __all__

tools/gdb-stack-size.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""Source this file into gdb `source ../../tools/gdb-stack-size.py` then run
2+
`stack-size` to print a backtrace with each frame size next to it."""
3+
4+
5+
class StackSize(gdb.Command):
6+
def __init__(self):
7+
super(StackSize, self).__init__("stack-size", gdb.COMMAND_USER)
8+
9+
def invoke(self, arg, from_tty):
10+
frame = gdb.newest_frame()
11+
total_size = 0
12+
while frame:
13+
sp = frame.read_register("sp")
14+
frame_up = frame.older()
15+
if not frame_up:
16+
break
17+
f = frame.function()
18+
l = frame.level()
19+
if l < 10:
20+
l = "#" + str(l) + " "
21+
else:
22+
l = "#" + str(l)
23+
size = frame_up.read_register("sp") - sp
24+
total_size += size
25+
print(l, sp, frame.type(), f, " " * (40 - len(str(f))), size)
26+
# print(dir(f))
27+
# Tweak this if for more detail for a specific function.
28+
if False and f.name == "mp_execute_bytecode":
29+
b = frame.block()
30+
prev_b = None
31+
while not b.is_static:
32+
print(" block", hex(b.start), hex(b.end), b.function)
33+
for sym in b:
34+
if not sym.needs_frame:
35+
continue
36+
v = sym.value(frame)
37+
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
38+
prev_b = b
39+
b = b.superblock
40+
41+
if b.function == f:
42+
break
43+
b = prev_b
44+
print("pc scan", hex(b.start), hex(b.end))
45+
seen = set()
46+
for pc in range(b.start, b.end, 2):
47+
b = gdb.block_for_pc(pc)
48+
r = (b.start, b.end)
49+
if r in seen:
50+
continue
51+
seen.add(r)
52+
print(" ", hex(pc), hex(b.start), hex(b.end), b.function)
53+
for sym in b:
54+
if not sym.needs_frame:
55+
continue
56+
# if sym.type.sizeof <= 4:
57+
# continue
58+
v = sym.value(frame)
59+
print(" ", sym.addr_class, v.address, sym.type.sizeof, sym, sym.type, v)
60+
frame = frame_up
61+
print("total size:", total_size)
62+
63+
64+
StackSize()

tools/stack-loc-to-pc.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""Prints the pcs that access each stack location in a function. Useful for finding
2+
infrequently used stack space.
3+
4+
Pipe in disassembly like so:
5+
6+
arm-none-eabi-objdump --disassemble=mp_execute_bytecode build-metro_m0_express/firmware.elf | python ../../tools/stack-loc-to-pc.py
7+
"""
8+
9+
import sys
10+
import re
11+
12+
offset = re.compile(r"sp, #(\d+)")
13+
14+
offsets = {}
15+
for line in sys.stdin:
16+
if "sp" in line:
17+
m = offset.search(line)
18+
o = int(m.groups()[0])
19+
pc = line.split(":")[0]
20+
if o not in offsets:
21+
offsets[o] = []
22+
offsets[o].append(pc.strip())
23+
24+
print("Offset", "Size", "PCs", sep="\t")
25+
last_o = 0
26+
for o in sorted(offsets):
27+
print(o, o - last_o, offsets[o], sep="\t")
28+
last_o = o

0 commit comments

Comments
 (0)