Skip to content

Commit 3578d8b

Browse files
Pingfan LiuKernel Patches Daemon
authored andcommitted
kexec: Introduce kexec_pe_image to parse and load PE file
As UEFI becomes popular, a few architectures support to boot a PE format kernel image directly. But the internal of PE format varies, which means each parser for each format. This patch (with the rest in this series) introduces a common skeleton to all parsers, and leave the format parsing in bpf-prog, so the kernel code can keep relative stable. A new kexec_file_ops is implementation, named pe_image_ops. There are some place holder function in this patch. (They will take effect after the introduction of kexec bpf light skeleton and bpf helpers). Overall the parsing progress is a pipeline, the current bpf-prog parser is attached to bpf_handle_pefile(), and detatched at the end of the current stage 'disarm_bpf_prog()' the current parsed result by the current bpf-prog will be buffered in kernel 'prepare_nested_pe()' , and deliver to the next stage. For each stage, the bpf bytecode is extracted from the '.bpf' section in the PE file. Signed-off-by: Pingfan Liu <[email protected]> Cc: Baoquan He <[email protected]> Cc: Dave Young <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Philipp Rudo <[email protected]> To: [email protected]
1 parent a820301 commit 3578d8b

File tree

4 files changed

+359
-0
lines changed

4 files changed

+359
-0
lines changed

include/linux/kexec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ static inline int machine_kexec_post_load(struct kimage *image) { return 0; }
444444

445445
extern struct kimage *kexec_image;
446446
extern struct kimage *kexec_crash_image;
447+
extern const struct kexec_file_ops pe_image_ops;
447448

448449
bool kexec_load_permitted(int kexec_image_type);
449450

kernel/Kconfig.kexec

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ config KEXEC_FILE
4646
for kernel and initramfs as opposed to list of segments as
4747
accepted by kexec system call.
4848

49+
config KEXEC_PE_IMAGE
50+
bool "Enable parsing UEFI PE file through kexec file based system call"
51+
select KEEP_DECOMPRESSOR
52+
depends on KEXEC_FILE
53+
depends on DEBUG_INFO_BTF && BPF_SYSCALL
54+
help
55+
This option makes the kexec_file_load() syscall cooperates with bpf-prog
56+
to parse PE format file
57+
4958
config KEXEC_SIG
5059
bool "Verify kernel signature during kexec_file_load() syscall"
5160
depends on ARCH_SUPPORTS_KEXEC_SIG

kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_core.o
8080
obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o
8181
obj-$(CONFIG_KEXEC) += kexec.o
8282
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
83+
obj-$(CONFIG_KEXEC_PE_IMAGE) += kexec_pe_image.o
8384
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
8485
obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
8586
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o

kernel/kexec_pe_image.c

Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Kexec PE image loader
4+
5+
* Copyright (C) 2025 Red Hat, Inc
6+
*/
7+
8+
#define pr_fmt(fmt) "kexec_file(Image): " fmt
9+
10+
#include <linux/err.h>
11+
#include <linux/errno.h>
12+
#include <linux/list.h>
13+
#include <linux/kernel.h>
14+
#include <linux/vmalloc.h>
15+
#include <linux/kexec.h>
16+
#include <linux/pe.h>
17+
#include <linux/string.h>
18+
#include <linux/bpf.h>
19+
#include <linux/filter.h>
20+
#include <asm/byteorder.h>
21+
#include <asm/image.h>
22+
#include <asm/memory.h>
23+
24+
25+
#define KEXEC_RES_KERNEL_NAME "kexec:kernel"
26+
#define KEXEC_RES_INITRD_NAME "kexec:initrd"
27+
#define KEXEC_RES_CMDLINE_NAME "kexec:cmdline"
28+
29+
struct kexec_res {
30+
char *name;
31+
/* The free of buffer is deferred to kimage_file_post_load_cleanup */
32+
struct mem_range_result *r;
33+
};
34+
35+
static struct kexec_res parsed_resource[3] = {
36+
{ KEXEC_RES_KERNEL_NAME, },
37+
{ KEXEC_RES_INITRD_NAME, },
38+
{ KEXEC_RES_CMDLINE_NAME, },
39+
};
40+
41+
static bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz);
42+
43+
static bool is_valid_pe(const char *kernel_buf, unsigned long kernel_len)
44+
{
45+
struct mz_hdr *mz;
46+
struct pe_hdr *pe;
47+
48+
if (!kernel_buf)
49+
return false;
50+
mz = (struct mz_hdr *)kernel_buf;
51+
if (mz->magic != IMAGE_DOS_SIGNATURE)
52+
return false;
53+
pe = (struct pe_hdr *)(kernel_buf + mz->peaddr);
54+
if (pe->magic != IMAGE_NT_SIGNATURE)
55+
return false;
56+
if (pe->opt_hdr_size == 0) {
57+
pr_err("optional header is missing\n");
58+
return false;
59+
}
60+
61+
return pe_has_bpf_section(kernel_buf, kernel_len);
62+
}
63+
64+
static bool is_valid_format(const char *kernel_buf, unsigned long kernel_len)
65+
{
66+
return is_valid_pe(kernel_buf, kernel_len);
67+
}
68+
69+
/*
70+
* The UEFI Terse Executable (TE) image has MZ header.
71+
*/
72+
static int pe_image_probe(const char *kernel_buf, unsigned long kernel_len)
73+
{
74+
return is_valid_pe(kernel_buf, kernel_len) ? 0 : -1;
75+
}
76+
77+
static int pe_get_section(const char *file_buf, const char *sect_name,
78+
char **sect_start, unsigned long *sect_sz)
79+
{
80+
struct pe_hdr *pe_hdr;
81+
struct pe32plus_opt_hdr *opt_hdr;
82+
struct section_header *sect_hdr;
83+
int section_nr, i;
84+
struct mz_hdr *mz = (struct mz_hdr *)file_buf;
85+
86+
*sect_start = NULL;
87+
*sect_sz = 0;
88+
pe_hdr = (struct pe_hdr *)(file_buf + mz->peaddr);
89+
section_nr = pe_hdr->sections;
90+
opt_hdr = (struct pe32plus_opt_hdr *)(file_buf + mz->peaddr + sizeof(struct pe_hdr));
91+
sect_hdr = (struct section_header *)((char *)opt_hdr + pe_hdr->opt_hdr_size);
92+
93+
for (i = 0; i < section_nr; i++) {
94+
if (strcmp(sect_hdr->name, sect_name) == 0) {
95+
*sect_start = (char *)file_buf + sect_hdr->data_addr;
96+
*sect_sz = sect_hdr->raw_data_size;
97+
return 0;
98+
}
99+
sect_hdr++;
100+
}
101+
102+
return -1;
103+
}
104+
105+
static bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz)
106+
{
107+
char *sect_start = NULL;
108+
unsigned long sect_sz = 0;
109+
int ret;
110+
111+
ret = pe_get_section(file_buf, ".bpf", &sect_start, &sect_sz);
112+
if (ret < 0)
113+
return false;
114+
return true;
115+
}
116+
117+
/* Load a ELF */
118+
static int arm_bpf_prog(char *bpf_elf, unsigned long sz)
119+
{
120+
return 0;
121+
}
122+
123+
static void disarm_bpf_prog(void)
124+
{
125+
}
126+
127+
struct kexec_context {
128+
bool kdump;
129+
char *image;
130+
int image_sz;
131+
char *initrd;
132+
int initrd_sz;
133+
char *cmdline;
134+
int cmdline_sz;
135+
};
136+
137+
void bpf_handle_pefile(struct kexec_context *context);
138+
void bpf_post_handle_pefile(struct kexec_context *context);
139+
140+
141+
/*
142+
* optimize("O0") prevents inline, compiler constant propagation
143+
*/
144+
__attribute__((used, optimize("O0"))) void bpf_handle_pefile(struct kexec_context *context)
145+
{
146+
/*
147+
* To prevent linker from Identical Code Folding (ICF) with bpf_handle_pefile,
148+
* making them have different code.
149+
*/
150+
volatile int dummy = 0;
151+
152+
dummy += 1;
153+
}
154+
155+
__attribute__((used, optimize("O0"))) void bpf_post_handle_pefile(struct kexec_context *context)
156+
{
157+
volatile int dummy = 0;
158+
159+
dummy += 2;
160+
}
161+
162+
/*
163+
* PE file may be nested and should be unfold one by one.
164+
* Query 'kernel', 'initrd', 'cmdline' in cur_phase, as they are inputs for the
165+
* next phase.
166+
*/
167+
static int prepare_nested_pe(char **kernel, unsigned long *kernel_len, char **initrd,
168+
unsigned long *initrd_len, char **cmdline)
169+
{
170+
struct kexec_res *res;
171+
int ret = -1;
172+
173+
*kernel = NULL;
174+
*kernel_len = 0;
175+
176+
res = &parsed_resource[0];
177+
if (!!res->r) {
178+
*kernel = res->r->buf;
179+
*kernel_len = res->r->data_sz;
180+
ret = 0;
181+
}
182+
183+
res = &parsed_resource[1];
184+
if (!!res->r) {
185+
*initrd = res->r->buf;
186+
*initrd_len = res->r->data_sz;
187+
}
188+
189+
res = &parsed_resource[2];
190+
if (!!res->r) {
191+
*cmdline = res->r->buf;
192+
}
193+
194+
return ret;
195+
}
196+
197+
static void *pe_image_load(struct kimage *image,
198+
char *kernel, unsigned long kernel_len,
199+
char *initrd, unsigned long initrd_len,
200+
char *cmdline, unsigned long cmdline_len)
201+
{
202+
char *linux_start, *initrd_start, *cmdline_start, *bpf_start;
203+
unsigned long linux_sz, initrd_sz, cmdline_sz, bpf_sz;
204+
struct kexec_res *res;
205+
struct mem_range_result *r;
206+
void *ldata;
207+
int ret;
208+
209+
linux_start = kernel;
210+
linux_sz = kernel_len;
211+
initrd_start = initrd;
212+
initrd_sz = initrd_len;
213+
cmdline_start = cmdline;
214+
cmdline_sz = cmdline_len;
215+
216+
while (is_valid_format(linux_start, linux_sz) &&
217+
pe_has_bpf_section(linux_start, linux_sz)) {
218+
struct kexec_context context;
219+
220+
pe_get_section((const char *)linux_start, ".bpf", &bpf_start, &bpf_sz);
221+
if (!!bpf_sz) {
222+
/* load and attach bpf-prog */
223+
ret = arm_bpf_prog(bpf_start, bpf_sz);
224+
if (ret) {
225+
pr_err("Fail to load .bpf section\n");
226+
ldata = ERR_PTR(ret);
227+
goto err;
228+
}
229+
}
230+
if (image->type != KEXEC_TYPE_CRASH)
231+
context.kdump = false;
232+
else
233+
context.kdump = true;
234+
context.image = linux_start;
235+
context.image_sz = linux_sz;
236+
context.initrd = initrd_start;
237+
context.initrd_sz = initrd_sz;
238+
context.cmdline = cmdline_start;
239+
context.cmdline_sz = strlen(cmdline_start);
240+
/* bpf-prog fentry, which handle above buffers. */
241+
bpf_handle_pefile(&context);
242+
243+
prepare_nested_pe(&linux_start, &linux_sz, &initrd_start,
244+
&initrd_sz, &cmdline_start);
245+
/* bpf-prog fentry */
246+
bpf_post_handle_pefile(&context);
247+
/*
248+
* detach the current bpf-prog from their attachment points.
249+
*/
250+
disarm_bpf_prog();
251+
}
252+
253+
/*
254+
* image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should
255+
* be updated to the new content.
256+
*/
257+
258+
res = &parsed_resource[0];
259+
/* Kernel part should always be parsed */
260+
if (!res->r) {
261+
pr_err("Can not parse kernel\n");
262+
ldata = ERR_PTR(-EINVAL);
263+
goto err;
264+
}
265+
kernel = res->r->buf;
266+
kernel_len = res->r->data_sz;
267+
vfree(image->kernel_buf);
268+
image->kernel_buf = kernel;
269+
image->kernel_buf_len = kernel_len;
270+
271+
res = &parsed_resource[1];
272+
if (!!res->r) {
273+
initrd = res->r->buf;
274+
initrd_len = res->r->data_sz;
275+
vfree(image->initrd_buf);
276+
image->initrd_buf = initrd;
277+
image->initrd_buf_len = initrd_len;
278+
}
279+
res = &parsed_resource[2];
280+
if (!!res->r) {
281+
cmdline = res->r->buf;
282+
cmdline_len = res->r->data_sz;
283+
kfree(image->cmdline_buf);
284+
image->cmdline_buf = cmdline;
285+
image->cmdline_buf_len = cmdline_len;
286+
}
287+
288+
if (kernel == NULL || initrd == NULL || cmdline == NULL) {
289+
char *c, buf[64];
290+
291+
c = buf;
292+
if (kernel == NULL) {
293+
strcpy(c, "kernel ");
294+
c += strlen("kernel ");
295+
}
296+
if (initrd == NULL) {
297+
strcpy(c, "initrd ");
298+
c += strlen("initrd ");
299+
}
300+
if (cmdline == NULL) {
301+
strcpy(c, "cmdline ");
302+
c += strlen("cmdline ");
303+
}
304+
c = '\0';
305+
pr_err("Can not extract data for %s", buf);
306+
ldata = ERR_PTR(-EINVAL);
307+
goto err;
308+
}
309+
310+
ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
311+
image->kernel_buf_len);
312+
if (ret) {
313+
pr_err("Fail to find suitable image loader\n");
314+
ldata = ERR_PTR(ret);
315+
goto err;
316+
}
317+
ldata = kexec_image_load_default(image);
318+
if (IS_ERR(ldata)) {
319+
pr_err("architecture code fails to load image\n");
320+
goto err;
321+
}
322+
image->image_loader_data = ldata;
323+
324+
err:
325+
for (int i = 0; i < 3; i++) {
326+
r = parsed_resource[i].r;
327+
if (!r)
328+
continue;
329+
parsed_resource[i].r = NULL;
330+
/*
331+
* The release of buffer defers to
332+
* kimage_file_post_load_cleanup()
333+
*/
334+
r->buf = NULL;
335+
r->buf_sz = 0;
336+
mem_range_result_put(r);
337+
}
338+
339+
return ldata;
340+
}
341+
342+
const struct kexec_file_ops kexec_pe_image_ops = {
343+
.probe = pe_image_probe,
344+
.load = pe_image_load,
345+
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
346+
.verify_sig = kexec_kernel_verify_pe_sig,
347+
#endif
348+
};

0 commit comments

Comments
 (0)