@@ -8,8 +8,9 @@ L_dyn_call_begin:
8
8
# At this point , the following registers are bound :
9
9
#
10
10
# rdi < - callee
11
- # rsi < - argv
12
- # rdx < - argc
11
+ # rsi < - process
12
+ # rdx < - argv
13
+ # rcx < - argc
13
14
#
14
15
# Save the parent base pointer for when control returns to this call frame.
15
16
# CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,16 @@ L_dyn_call_begin:
20
21
mov rbp , rsp
21
22
.cfi_def_cfa_register rbp
22
23
23
- # Save our callee and argv pointers , and argc
24
+ # Pin callee pointer to r10
24
25
mov r10 , rdi
25
- mov r11 , rsi
26
- mov rax , rdx
26
+ # Pin the argv pointer to r11
27
+ mov r11 , rdx
28
+ # The process pointer needs to be in rdi
29
+ mov rdi , rsi
27
30
28
- # Determine if spills are needed
31
+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
29
32
# In the common case in which they are not , we perform a tail call
30
- cmp rdx , 7
33
+ cmp rcx , 6
31
34
ja L_dyn_call_spill
32
35
33
36
L_dyn_call_no_spill:
@@ -38,71 +41,72 @@ L_dyn_call_no_spill:
38
41
39
42
# Calculate offset in jump table to block which handles the specific
40
43
# number of registers we have arguments for , then jump to th at block
44
+ mov rax , rcx
41
45
lea rcx , [ rip + L_dyn_call_jt ]
42
- mov rax , [ rcx + rax * 4 ]
46
+ movsxd rax , dword ptr [ rcx + 4 * rax ]
43
47
add rax , rcx
44
- jmp [ rax ]
48
+ jmp rax
45
49
46
50
# All of these basic blocks perform a tail call . As such ,
47
51
# the unwinder will skip over this frame should the callee
48
52
# throw an exception
49
53
L_dyn_call_regs0:
50
54
pop rbp
51
- jmp [ r10 ]
55
+ jmp r10
52
56
53
57
L_dyn_call_regs1:
54
- mov rdi , [ r11 ]
58
+ mov rsi , [ r11 ]
55
59
pop rbp
56
- jmp [ r10 ]
60
+ jmp r10
57
61
58
62
L_dyn_call_regs2:
59
- mov rdi , [ r11 ]
60
- mov rsi , [ r11 + 8 ]
63
+ mov rsi , [ r11 ]
64
+ mov rdx , [ r11 + 8 ]
61
65
pop rbp
62
- jmp [ r10 ]
66
+ jmp r10
63
67
64
68
L_dyn_call_regs3:
65
- mov rdi , [ r11 ]
66
- mov rsi , [ r11 + 8 ]
67
- mov rdx , [ r11 + 16 ]
69
+ mov rsi , [ r11 ]
70
+ mov rdx , [ r11 + 8 ]
71
+ mov rcx , [ r11 + 16 ]
68
72
pop rbp
69
- jmp [ r10 ]
73
+ jmp r10
70
74
71
75
L_dyn_call_regs4:
72
- mov rdi , [ r11 ]
73
- mov rsi , [ r11 + 8 ]
74
- mov rdx , [ r11 + 16 ]
75
- mov rcx , [ r11 + 24 ]
76
+ mov rsi , [ r11 ]
77
+ mov rdx , [ r11 + 8 ]
78
+ mov rcx , [ r11 + 16 ]
79
+ mov r8 , [ r11 + 24 ]
76
80
pop rbp
77
- jmp [ r10 ]
81
+ jmp r10
78
82
79
83
L_dyn_call_regs5:
80
- mov rdi , [ r11 ]
81
- mov rsi , [ r11 + 8 ]
82
- mov rdx , [ r11 + 16 ]
83
- mov rcx , [ r11 + 24 ]
84
- mov r8 , [ r11 + 32 ]
84
+ mov rsi , [ r11 ]
85
+ mov rdx , [ r11 + 8 ]
86
+ mov rcx , [ r11 + 16 ]
87
+ mov r8 , [ r11 + 24 ]
88
+ mov r9 , [ r11 + 32 ]
85
89
pop rbp
86
- jmp [ r10 ]
87
-
88
- L_dyn_call_regs6:
89
- mov rdi , [ r11 ]
90
- mov rsi , [ r11 + 8 ]
91
- mov rdx , [ r11 + 16 ]
92
- mov rcx , [ r11 + 24 ]
93
- mov r8 , [ r11 + 32 ]
94
- mov r9 , [ r11 + 40 ]
95
- pop rbp
96
- jmp [ r10 ]
90
+ jmp r10
97
91
98
92
L_dyn_call_spill:
99
93
# If we hit this block , we have identified th at there are
100
94
# arguments to spill. We perform some setup for the actual
101
95
# spilling , which is a loop built on ` rep movsq `
96
+ #
97
+ # At this point , the following registers are occupied/hold these values:
98
+ #
99
+ # r10 < - callee
100
+ # rdi < - process
101
+ # r11 < - argv
102
+ # rcx < - argc
103
+
104
+ # rcx , rdi , and rsi are used by ` rep movsq ` , so save them temporarily
105
+ mov r8 , rcx
106
+ mov r9 , rdi
102
107
103
- # Calculate spill count for later ( rep uses rcx for the iteration count ,
108
+ # Calculate spill count for later ( rep uses rcx for the iteration count `i` ,
104
109
# which in this case is the number of quadwords to copy)
105
- mov rcx , rdx
106
110
sub rcx , 6
107
111
108
112
# Calculate spill space , and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +117,22 @@ L_dyn_call_spill:
113
117
sub rsp , rax
114
118
115
119
# load source pointer (last item of argv)
116
- lea rsi , [ r11 + rdx * 8 - 8 ]
120
+ lea rsi , [ r11 + r8 * 8 - 8 ]
117
121
# load destination pointer (top of spill region)
118
- lea rdi , [ rsp + rcx * 8 - 8 ]
119
- # copy rcx quadwords from rsi to rdi , in reverse
122
+ lea rdi , [ rsp + rcx * 8 - 8 ]
123
+ # copy `i` quadwords from source to destination , in reverse
120
124
std
121
125
rep movsq
122
126
cld
123
127
124
- # We've spilled arguments , so we have at least 6 args
125
- mov rdi , [ r11 ]
126
- mov rsi , [ r11 + 8 ]
127
- mov rdx , [ r11 + 16 ]
128
- mov rcx , [ r11 + 24 ]
129
- mov r8 , [ r11 + 32 ]
130
- mov r9 , [ r11 + 40 ]
128
+ # We've spilled arguments , so we have at least 6 args , move them into their
129
+ # final destination registers in preparation for the call
130
+ mov rdi , r9
131
+ mov rsi , [ r11 ]
132
+ mov rdx , [ r11 + 8 ]
133
+ mov rcx , [ r11 + 16 ]
134
+ mov r8 , [ r11 + 24 ]
135
+ mov r9 , [ r11 + 32 ]
131
136
132
137
L_dyn_call_exec:
133
138
# If we spill arguments to the stack , we can't perform
@@ -141,7 +146,7 @@ L_dyn_call_exec:
141
146
# This instruction will push the return address and jump ,
142
147
# and we can expect rbp to be the same as we left it upon
143
148
# return.
144
- call [ r10 ]
149
+ call r10
145
150
146
151
L_dyn_call_ret:
147
152
# Non - tail call completed successfully
@@ -156,21 +161,19 @@ L_dyn_call_end:
156
161
# a variable number of register - based arguments
157
162
.p2align 2
158
163
.data_region jt32
159
- .set L_dyn_call_jt_entry0 , L_dyn_call_exec - L_dyn_call_jt
164
+ .set L_dyn_call_jt_entry0 , L_dyn_call_regs0 - L_dyn_call_jt
160
165
.set L_dyn_call_jt_entry1 , L_dyn_call_regs1 - L_dyn_call_jt
161
166
.set L_dyn_call_jt_entry2 , L_dyn_call_regs2 - L_dyn_call_jt
162
167
.set L_dyn_call_jt_entry3 , L_dyn_call_regs3 - L_dyn_call_jt
163
168
.set L_dyn_call_jt_entry4 , L_dyn_call_regs4 - L_dyn_call_jt
164
169
.set L_dyn_call_jt_entry5 , L_dyn_call_regs5 - L_dyn_call_jt
165
- .set L_dyn_call_jt_entry6 , L_dyn_call_regs6 - L_dyn_call_jt
166
170
L_dyn_call_jt:
167
171
.long L_dyn_call_jt_entry0
168
172
.long L_dyn_call_jt_entry1
169
173
.long L_dyn_call_jt_entry2
170
174
.long L_dyn_call_jt_entry3
171
175
.long L_dyn_call_jt_entry4
172
176
.long L_dyn_call_jt_entry5
173
- .long L_dyn_call_jt_entry6
174
177
.end_data_region
175
178
176
179
# The following is the LSDA metadata for exception handling
0 commit comments