@@ -53,6 +53,104 @@ static void print_max_stack(void)
53
53
}
54
54
}
55
55
56
+ /*
57
+ * The stack tracer looks for a maximum stack at each call from a function. It
58
+ * registers a callback from ftrace, and in that callback it examines the stack
59
+ * size. It determines the stack size from the variable passed in, which is the
60
+ * address of a local variable in the stack_trace_call() callback function.
61
+ * The stack size is calculated by the address of the local variable to the top
62
+ * of the current stack. If that size is smaller than the currently saved max
63
+ * stack size, nothing more is done.
64
+ *
65
+ * If the size of the stack is greater than the maximum recorded size, then the
66
+ * following algorithm takes place.
67
+ *
68
+ * For architectures (like x86) that store the function's return address before
69
+ * saving the function's local variables, the stack will look something like
70
+ * this:
71
+ *
72
+ * [ top of stack ]
73
+ * 0: sys call entry frame
74
+ * 10: return addr to entry code
75
+ * 11: start of sys_foo frame
76
+ * 20: return addr to sys_foo
77
+ * 21: start of kernel_func_bar frame
78
+ * 30: return addr to kernel_func_bar
79
+ * 31: [ do trace stack here ]
80
+ *
81
+ * The save_stack_trace() is called returning all the functions it finds in the
82
+ * current stack. Which would be (from the bottom of the stack to the top):
83
+ *
84
+ * return addr to kernel_func_bar
85
+ * return addr to sys_foo
86
+ * return addr to entry code
87
+ *
88
+ * Now to figure out how much each of these functions' local variable size is,
89
+ * a search of the stack is made to find these values. When a match is made, it
90
+ * is added to the stack_dump_trace[] array. The offset into the stack is saved
91
+ * in the stack_trace_index[] array. The above example would show:
92
+ *
93
+ * stack_dump_trace[] | stack_trace_index[]
94
+ * ------------------ + -------------------
95
+ * return addr to kernel_func_bar | 30
96
+ * return addr to sys_foo | 20
97
+ * return addr to entry | 10
98
+ *
99
+ * The print_max_stack() function above, uses these values to print the size of
100
+ * each function's portion of the stack.
101
+ *
102
+ * for (i = 0; i < nr_entries; i++) {
103
+ * size = i == nr_entries - 1 ? stack_trace_index[i] :
104
+ * stack_trace_index[i] - stack_trace_index[i+1]
105
+ * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]);
106
+ * }
107
+ *
108
+ * The above shows
109
+ *
110
+ * depth size location
111
+ * ----- ---- --------
112
+ * 0 30 10 kernel_func_bar
113
+ * 1 20 10 sys_foo
114
+ * 2 10 10 entry code
115
+ *
116
+ * Now for architectures that might save the return address after the functions
117
+ * local variables (saving the link register before calling nested functions),
118
+ * this will cause the stack to look a little different:
119
+ *
120
+ * [ top of stack ]
121
+ * 0: sys call entry frame
122
+ * 10: start of sys_foo_frame
123
+ * 19: return addr to entry code << lr saved before calling kernel_func_bar
124
+ * 20: start of kernel_func_bar frame
125
+ * 29: return addr to sys_foo_frame << lr saved before calling next function
126
+ * 30: [ do trace stack here ]
127
+ *
128
+ * Although the functions returned by save_stack_trace() may be the same, the
129
+ * placement in the stack will be different. Using the same algorithm as above
130
+ * would yield:
131
+ *
132
+ * stack_dump_trace[] | stack_trace_index[]
133
+ * ------------------ + -------------------
134
+ * return addr to kernel_func_bar | 30
135
+ * return addr to sys_foo | 29
136
+ * return addr to entry | 19
137
+ *
138
+ * Where the mapping is off by one:
139
+ *
140
+ * kernel_func_bar stack frame size is 29 - 19 not 30 - 29!
141
+ *
142
+ * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the
143
+ * values in stack_trace_index[] are shifted by one to and the number of
144
+ * stack trace entries is decremented by one.
145
+ *
146
+ * stack_dump_trace[] | stack_trace_index[]
147
+ * ------------------ + -------------------
148
+ * return addr to kernel_func_bar | 29
149
+ * return addr to sys_foo | 19
150
+ *
151
+ * Although the entry function is not displayed, the first function (sys_foo)
152
+ * will still include the stack size of it.
153
+ */
56
154
static void check_stack (unsigned long ip , unsigned long * stack )
57
155
{
58
156
unsigned long this_size , flags ; unsigned long * p , * top , * start ;
0 commit comments