Skip to content

Commit ddb0dba

Browse files
Pingfan LiuKernel Patches Daemon
authored andcommitted
tools/kexec: Introduce a bpf-prog to parse zboot image format
This BPF program aligns with the convention defined in the kernel file kexec_pe_parser_bpf.lskel.h, where the interface between the BPF program and the kernel is established, and is composed of: four maps: struct bpf_map_desc ringbuf_1; struct bpf_map_desc ringbuf_2; struct bpf_map_desc ringbuf_3; struct bpf_map_desc ringbuf_4; four sections: struct bpf_map_desc rodata; struct bpf_map_desc data; struct bpf_map_desc bss; struct bpf_map_desc rodata_str1_1; two progs: SEC("fentry.s/bpf_handle_pefile") SEC("fentry.s/bpf_post_handle_pefile") This BPF program only uses ringbuf_1, so it minimizes the size of the other three ringbufs to one byte. The size of ringbuf_1 is deduced from the size of the uncompressed file 'vmlinux.bin', which is usually less than 64MB. With the help of a group of bpf kfuncs: bpf_decompress(), bpf_copy_to_kernel(), bpf_mem_range_result_put(), this bpf-prog stores the uncompressed kernel image inside the kernel space. Signed-off-by: Pingfan Liu <[email protected]> Cc: Alexei Starovoitov <[email protected]> Cc: Baoquan He <[email protected]> Cc: Dave Young <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Philipp Rudo <[email protected]> Cc: [email protected] To: [email protected]
1 parent f99d437 commit ddb0dba

File tree

2 files changed

+240
-0
lines changed

2 files changed

+240
-0
lines changed

tools/kexec/Makefile

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# SPDX-License-Identifier: GPL-2.0
2+
3+
# Ensure Kbuild variables are available
4+
include ../scripts/Makefile.include
5+
6+
srctree := $(patsubst %/tools/kexec,%,$(CURDIR))
7+
VMLINUX = $(srctree)/vmlinux
8+
TOOLSDIR := $(srctree)/tools
9+
LIBDIR := $(TOOLSDIR)/lib
10+
BPFDIR := $(LIBDIR)/bpf
11+
ARCH ?= $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/ -e s/riscv64/riscv/ -e s/loongarch.*/loongarch/)
12+
# At present, zboot image format is used by arm64, riscv, loongarch
13+
# And arch/$(ARCH)/boot/vmlinux.bin is the uncompressed file instead of arch/$(ARCH)/boot/Image
14+
ifeq ($(ARCH),$(filter $(ARCH),arm64 riscv loongarch))
15+
EFI_IMAGE := $(srctree)/arch/$(ARCH)/boot/vmlinuz.efi
16+
KERNEL_IMAGE := $(srctree)/arch/$(ARCH)/boot/vmlinux.bin
17+
else
18+
@echo "Unsupported architecture: $(ARCH)"
19+
@exit 1
20+
endif
21+
22+
23+
CC = clang
24+
CFLAGS = -O2
25+
BPF_PROG_CFLAGS = -g -O2 -target bpf -Wall -I $(BPFDIR) -I .
26+
BPFTOOL = bpftool
27+
28+
# List of generated target files
29+
HEADERS = vmlinux.h bpf_helper_defs.h image_size.h
30+
ZBOOT_TARGETS = bytecode.c zboot_parser_bpf.o bytecode.o
31+
32+
33+
# Targets
34+
zboot: $(HEADERS) $(ZBOOT_TARGETS)
35+
36+
# Rule to generate vmlinux.h from vmlinux
37+
vmlinux.h: $(VMLINUX)
38+
@command -v $(BPFTOOL) >/dev/null 2>&1 || { echo >&2 "$(BPFTOOL) is required but not found. Please install it."; exit 1; }
39+
@$(BPFTOOL) btf dump file $(VMLINUX) format c > vmlinux.h
40+
41+
bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h
42+
@$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
43+
--file $(srctree)/tools/include/uapi/linux/bpf.h > bpf_helper_defs.h
44+
45+
image_size.h: $(KERNEL_IMAGE)
46+
@{ \
47+
if [ ! -f "$(KERNEL_IMAGE)" ]; then \
48+
echo "Error: File '$(KERNEL_IMAGE)' does not exist"; \
49+
exit 1; \
50+
fi; \
51+
FILE_SIZE=$$(stat -c '%s' "$(KERNEL_IMAGE)" 2>/dev/null); \
52+
POWER=4096; \
53+
while [ $$POWER -le $$FILE_SIZE ]; do \
54+
POWER=$$((POWER * 2)); \
55+
done; \
56+
RINGBUF_SIZE=$$POWER; \
57+
echo "#define RINGBUF1_SIZE $$RINGBUF_SIZE" > $@; \
58+
echo "#define IMAGE_SIZE $$FILE_SIZE" >> $@; \
59+
}
60+
61+
62+
# Rule to generate zboot_parser_bpf.o, depends on vmlinux.h
63+
zboot_parser_bpf.o: zboot_parser_bpf.c vmlinux.h bpf_helper_defs.h
64+
@$(CC) $(BPF_PROG_CFLAGS) -c zboot_parser_bpf.c -o zboot_parser_bpf.o
65+
66+
# Generate zboot_parser_bpf.lskel.h using bpftool
67+
# Then, extract the opts_data[] and opts_insn[] arrays and remove 'static'
68+
# keywords to avoid being optimized away.
69+
bytecode.c: zboot_parser_bpf.o
70+
@$(BPFTOOL) gen skeleton -L zboot_parser_bpf.o > zboot_parser_bpf.lskel.h
71+
@sed -n '/static const char opts_data\[\]/,/;/p' zboot_parser_bpf.lskel.h | sed 's/static const/const/' > $@
72+
@sed -n '/static const char opts_insn\[\]/,/;/p' zboot_parser_bpf.lskel.h | sed 's/static const/const/' >> $@
73+
@rm -f zboot_parser_bpf.lskel.h
74+
75+
bytecode.o: bytecode.c
76+
@$(CC) -c $< -o $@
77+
78+
# Clean up generated files
79+
clean:
80+
@rm -f $(HEADERS) $(ZBOOT_TARGETS)
81+
82+
.PHONY: all clean

tools/kexec/zboot_parser_bpf.c

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
//
3+
#include "vmlinux.h"
4+
#include <bpf_helpers.h>
5+
#include <bpf_tracing.h>
6+
#include "image_size.h"
7+
8+
/* uncompressed vmlinux.bin plus 4KB */
9+
#define MAX_RECORD_SIZE (IMAGE_SIZE + 4096)
10+
/* ringbuf 2,3,4 are useless */
11+
#define MIN_BUF_SIZE 1
12+
13+
#define KEXEC_RES_KERNEL_NAME "kexec:kernel"
14+
#define KEXEC_RES_INITRD_NAME "kexec:initrd"
15+
#define KEXEC_RES_CMDLINE_NAME "kexec:cmdline"
16+
17+
/* ringbuf is safe since the user space has no write access to them */
18+
struct {
19+
__uint(type, BPF_MAP_TYPE_RINGBUF);
20+
__uint(max_entries, RINGBUF1_SIZE);
21+
} ringbuf_1 SEC(".maps");
22+
23+
struct {
24+
__uint(type, BPF_MAP_TYPE_RINGBUF);
25+
__uint(max_entries, MIN_BUF_SIZE);
26+
} ringbuf_2 SEC(".maps");
27+
28+
struct {
29+
__uint(type, BPF_MAP_TYPE_RINGBUF);
30+
__uint(max_entries, MIN_BUF_SIZE);
31+
} ringbuf_3 SEC(".maps");
32+
33+
struct {
34+
__uint(type, BPF_MAP_TYPE_RINGBUF);
35+
__uint(max_entries, MIN_BUF_SIZE);
36+
} ringbuf_4 SEC(".maps");
37+
38+
char LICENSE[] SEC("license") = "GPL";
39+
40+
/*
41+
* This function ensures that the sections .rodata, .data .bss and .rodata.str1.1
42+
* are created for a bpf prog.
43+
*/
44+
__attribute__((used)) static int dummy(void)
45+
{
46+
static const char res_kernel[16] __attribute__((used, section(".rodata"))) = KEXEC_RES_KERNEL_NAME;
47+
static char local_name[16] __attribute__((used, section(".data"))) = KEXEC_RES_CMDLINE_NAME;
48+
static char res_cmdline[16] __attribute__((used, section(".bss")));
49+
50+
__builtin_memcpy(local_name, KEXEC_RES_INITRD_NAME, 16);
51+
return __builtin_memcmp(local_name, res_kernel, 4);
52+
}
53+
54+
extern int bpf_copy_to_kernel(const char *name, char *buf, int size) __weak __ksym;
55+
extern struct mem_range_result *bpf_decompress(char *image_gz_payload, int image_gz_sz) __weak __ksym;
56+
extern int bpf_mem_range_result_put(struct mem_range_result *result) __weak __ksym;
57+
58+
59+
60+
61+
/* see drivers/firmware/efi/libstub/zboot-header.S */
62+
struct linux_pe_zboot_header {
63+
unsigned int mz_magic;
64+
char image_type[4];
65+
unsigned int payload_offset;
66+
unsigned int payload_size;
67+
unsigned int reserved[2];
68+
char comp_type[4];
69+
unsigned int linux_pe_magic;
70+
unsigned int pe_header_offset;
71+
} __attribute__((packed));
72+
73+
74+
SEC("fentry.s/bpf_handle_pefile")
75+
int BPF_PROG(parse_pe, struct kexec_context *context)
76+
{
77+
struct linux_pe_zboot_header *zboot_header;
78+
unsigned int image_sz;
79+
char *buf;
80+
char local_name[32];
81+
82+
bpf_printk("begin parse PE\n");
83+
/* BPF verifier should know each variable initial state */
84+
if (!context->image || (context->image_sz > MAX_RECORD_SIZE)) {
85+
bpf_printk("Err: image size is greater than 0x%lx\n", MAX_RECORD_SIZE);
86+
return 0;
87+
}
88+
89+
/* In order to access bytes not aligned on 2 order, copy into ringbuf.
90+
* And allocate the memory all at once, later overwriting.
91+
*
92+
* R2 is ARG_CONST_ALLOC_SIZE_OR_ZERO, should be decided at compling time
93+
*/
94+
buf = (char *)bpf_ringbuf_reserve(&ringbuf_1, MAX_RECORD_SIZE, 0);
95+
if (!buf) {
96+
bpf_printk("Err: fail to reserve ringbuf to parse zboot header\n");
97+
return 0;
98+
}
99+
image_sz = context->image_sz;
100+
bpf_probe_read((void *)buf, sizeof(struct linux_pe_zboot_header), context->image);
101+
zboot_header = (struct linux_pe_zboot_header *)buf;
102+
if (!!__builtin_memcmp(&zboot_header->image_type, "zimg",
103+
sizeof(zboot_header->image_type))) {
104+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
105+
bpf_printk("Err: image is not zboot image\n");
106+
return 0;
107+
}
108+
109+
unsigned int payload_offset = zboot_header->payload_offset;
110+
unsigned int payload_size = zboot_header->payload_size;
111+
bpf_printk("zboot image payload offset=0x%x, size=0x%x\n", payload_offset, payload_size);
112+
/* sane check */
113+
if (payload_size > image_sz) {
114+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
115+
bpf_printk("Invalid zboot image payload offset and size\n");
116+
return 0;
117+
}
118+
if (payload_size >= MAX_RECORD_SIZE ) {
119+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
120+
bpf_printk("Err: payload_size > MAX_RECORD_SIZE\n");
121+
return 0;
122+
}
123+
/* Overwrite buf */
124+
bpf_probe_read((void *)buf, payload_size, context->image + payload_offset);
125+
bpf_printk("Calling bpf_kexec_decompress()\n");
126+
struct mem_range_result *r = bpf_decompress(buf, payload_size - 4);
127+
if (!r) {
128+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
129+
bpf_printk("Err: fail to decompress\n");
130+
return 0;
131+
}
132+
133+
image_sz = r->data_sz;
134+
if (image_sz > MAX_RECORD_SIZE) {
135+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
136+
bpf_mem_range_result_put(r);
137+
bpf_printk("Err: decompressed size too big\n");
138+
return 0;
139+
}
140+
141+
/* Since the decompressed size is bigger than original, no need to clean */
142+
bpf_probe_read((void *)buf, image_sz, r->buf);
143+
bpf_printk("Calling bpf_copy_to_kernel(), image_sz=0x%x\n", image_sz);
144+
/* Verifier is unhappy to expose .rodata.str1.1 'map' to kernel */
145+
__builtin_memcpy(local_name, KEXEC_RES_KERNEL_NAME, 32);
146+
const char *res_name = local_name;
147+
bpf_copy_to_kernel(res_name, buf, image_sz);
148+
bpf_ringbuf_discard(buf, BPF_RB_NO_WAKEUP);
149+
bpf_mem_range_result_put(r);
150+
151+
return 0;
152+
}
153+
154+
SEC("fentry.s/bpf_post_handle_pefile")
155+
int BPF_PROG(post_parse_pe, struct kexec_context *context)
156+
{
157+
return 0;
158+
}

0 commit comments

Comments
 (0)