|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Kexec PE image loader |
| 4 | +
|
| 5 | + * Copyright (C) 2025 Red Hat, Inc |
| 6 | + */ |
| 7 | + |
| 8 | +#define pr_fmt(fmt) "kexec_file(Image): " fmt |
| 9 | + |
| 10 | +#include <linux/err.h> |
| 11 | +#include <linux/errno.h> |
| 12 | +#include <linux/list.h> |
| 13 | +#include <linux/kernel.h> |
| 14 | +#include <linux/vmalloc.h> |
| 15 | +#include <linux/kexec.h> |
| 16 | +#include <linux/pe.h> |
| 17 | +#include <linux/string.h> |
| 18 | +#include <linux/bpf.h> |
| 19 | +#include <linux/filter.h> |
| 20 | +#include <asm/byteorder.h> |
| 21 | +#include <asm/image.h> |
| 22 | +#include <asm/memory.h> |
| 23 | + |
| 24 | + |
| 25 | +#define KEXEC_RES_KERNEL_NAME "kexec:kernel" |
| 26 | +#define KEXEC_RES_INITRD_NAME "kexec:initrd" |
| 27 | +#define KEXEC_RES_CMDLINE_NAME "kexec:cmdline" |
| 28 | + |
| 29 | +struct kexec_res { |
| 30 | + char *name; |
| 31 | + /* The free of buffer is deferred to kimage_file_post_load_cleanup */ |
| 32 | + struct mem_range_result *r; |
| 33 | +}; |
| 34 | + |
| 35 | +static struct kexec_res parsed_resource[3] = { |
| 36 | + { KEXEC_RES_KERNEL_NAME, }, |
| 37 | + { KEXEC_RES_INITRD_NAME, }, |
| 38 | + { KEXEC_RES_CMDLINE_NAME, }, |
| 39 | +}; |
| 40 | + |
| 41 | +static bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz); |
| 42 | + |
| 43 | +static bool is_valid_pe(const char *kernel_buf, unsigned long kernel_len) |
| 44 | +{ |
| 45 | + struct mz_hdr *mz; |
| 46 | + struct pe_hdr *pe; |
| 47 | + |
| 48 | + if (!kernel_buf) |
| 49 | + return false; |
| 50 | + mz = (struct mz_hdr *)kernel_buf; |
| 51 | + if (mz->magic != IMAGE_DOS_SIGNATURE) |
| 52 | + return false; |
| 53 | + pe = (struct pe_hdr *)(kernel_buf + mz->peaddr); |
| 54 | + if (pe->magic != IMAGE_NT_SIGNATURE) |
| 55 | + return false; |
| 56 | + if (pe->opt_hdr_size == 0) { |
| 57 | + pr_err("optional header is missing\n"); |
| 58 | + return false; |
| 59 | + } |
| 60 | + |
| 61 | + return pe_has_bpf_section(kernel_buf, kernel_len); |
| 62 | +} |
| 63 | + |
| 64 | +static bool is_valid_format(const char *kernel_buf, unsigned long kernel_len) |
| 65 | +{ |
| 66 | + return is_valid_pe(kernel_buf, kernel_len); |
| 67 | +} |
| 68 | + |
| 69 | +/* |
| 70 | + * The UEFI Terse Executable (TE) image has MZ header. |
| 71 | + */ |
| 72 | +static int pe_image_probe(const char *kernel_buf, unsigned long kernel_len) |
| 73 | +{ |
| 74 | + return is_valid_pe(kernel_buf, kernel_len) ? 0 : -1; |
| 75 | +} |
| 76 | + |
| 77 | +static int pe_get_section(const char *file_buf, const char *sect_name, |
| 78 | + char **sect_start, unsigned long *sect_sz) |
| 79 | +{ |
| 80 | + struct pe_hdr *pe_hdr; |
| 81 | + struct pe32plus_opt_hdr *opt_hdr; |
| 82 | + struct section_header *sect_hdr; |
| 83 | + int section_nr, i; |
| 84 | + struct mz_hdr *mz = (struct mz_hdr *)file_buf; |
| 85 | + |
| 86 | + *sect_start = NULL; |
| 87 | + *sect_sz = 0; |
| 88 | + pe_hdr = (struct pe_hdr *)(file_buf + mz->peaddr); |
| 89 | + section_nr = pe_hdr->sections; |
| 90 | + opt_hdr = (struct pe32plus_opt_hdr *)(file_buf + mz->peaddr + sizeof(struct pe_hdr)); |
| 91 | + sect_hdr = (struct section_header *)((char *)opt_hdr + pe_hdr->opt_hdr_size); |
| 92 | + |
| 93 | + for (i = 0; i < section_nr; i++) { |
| 94 | + if (strcmp(sect_hdr->name, sect_name) == 0) { |
| 95 | + *sect_start = (char *)file_buf + sect_hdr->data_addr; |
| 96 | + *sect_sz = sect_hdr->raw_data_size; |
| 97 | + return 0; |
| 98 | + } |
| 99 | + sect_hdr++; |
| 100 | + } |
| 101 | + |
| 102 | + return -1; |
| 103 | +} |
| 104 | + |
| 105 | +static bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz) |
| 106 | +{ |
| 107 | + char *sect_start = NULL; |
| 108 | + unsigned long sect_sz = 0; |
| 109 | + int ret; |
| 110 | + |
| 111 | + ret = pe_get_section(file_buf, ".bpf", §_start, §_sz); |
| 112 | + if (ret < 0) |
| 113 | + return false; |
| 114 | + return true; |
| 115 | +} |
| 116 | + |
| 117 | +/* Load a ELF */ |
| 118 | +static int arm_bpf_prog(char *bpf_elf, unsigned long sz) |
| 119 | +{ |
| 120 | + return 0; |
| 121 | +} |
| 122 | + |
| 123 | +static void disarm_bpf_prog(void) |
| 124 | +{ |
| 125 | +} |
| 126 | + |
| 127 | +struct kexec_context { |
| 128 | + bool kdump; |
| 129 | + char *image; |
| 130 | + int image_sz; |
| 131 | + char *initrd; |
| 132 | + int initrd_sz; |
| 133 | + char *cmdline; |
| 134 | + int cmdline_sz; |
| 135 | +}; |
| 136 | + |
| 137 | +void bpf_handle_pefile(struct kexec_context *context); |
| 138 | +void bpf_post_handle_pefile(struct kexec_context *context); |
| 139 | + |
| 140 | + |
| 141 | +/* |
| 142 | + * optimize("O0") prevents inline, compiler constant propagation |
| 143 | + */ |
| 144 | +__attribute__((used, optimize("O0"))) void bpf_handle_pefile(struct kexec_context *context) |
| 145 | +{ |
| 146 | + /* |
| 147 | + * To prevent linker from Identical Code Folding (ICF) with bpf_handle_pefile, |
| 148 | + * making them have different code. |
| 149 | + */ |
| 150 | + volatile int dummy = 0; |
| 151 | + |
| 152 | + dummy += 1; |
| 153 | +} |
| 154 | + |
| 155 | +__attribute__((used, optimize("O0"))) void bpf_post_handle_pefile(struct kexec_context *context) |
| 156 | +{ |
| 157 | + volatile int dummy = 0; |
| 158 | + |
| 159 | + dummy += 2; |
| 160 | +} |
| 161 | + |
| 162 | +/* |
| 163 | + * PE file may be nested and should be unfold one by one. |
| 164 | + * Query 'kernel', 'initrd', 'cmdline' in cur_phase, as they are inputs for the |
| 165 | + * next phase. |
| 166 | + */ |
| 167 | +static int prepare_nested_pe(char **kernel, unsigned long *kernel_len, char **initrd, |
| 168 | + unsigned long *initrd_len, char **cmdline) |
| 169 | +{ |
| 170 | + struct kexec_res *res; |
| 171 | + int ret = -1; |
| 172 | + |
| 173 | + *kernel = NULL; |
| 174 | + *kernel_len = 0; |
| 175 | + |
| 176 | + res = &parsed_resource[0]; |
| 177 | + if (!!res->r) { |
| 178 | + *kernel = res->r->buf; |
| 179 | + *kernel_len = res->r->data_sz; |
| 180 | + ret = 0; |
| 181 | + } |
| 182 | + |
| 183 | + res = &parsed_resource[1]; |
| 184 | + if (!!res->r) { |
| 185 | + *initrd = res->r->buf; |
| 186 | + *initrd_len = res->r->data_sz; |
| 187 | + } |
| 188 | + |
| 189 | + res = &parsed_resource[2]; |
| 190 | + if (!!res->r) { |
| 191 | + *cmdline = res->r->buf; |
| 192 | + } |
| 193 | + |
| 194 | + return ret; |
| 195 | +} |
| 196 | + |
| 197 | +static void *pe_image_load(struct kimage *image, |
| 198 | + char *kernel, unsigned long kernel_len, |
| 199 | + char *initrd, unsigned long initrd_len, |
| 200 | + char *cmdline, unsigned long cmdline_len) |
| 201 | +{ |
| 202 | + char *linux_start, *initrd_start, *cmdline_start, *bpf_start; |
| 203 | + unsigned long linux_sz, initrd_sz, cmdline_sz, bpf_sz; |
| 204 | + struct kexec_res *res; |
| 205 | + struct mem_range_result *r; |
| 206 | + void *ldata; |
| 207 | + int ret; |
| 208 | + |
| 209 | + linux_start = kernel; |
| 210 | + linux_sz = kernel_len; |
| 211 | + initrd_start = initrd; |
| 212 | + initrd_sz = initrd_len; |
| 213 | + cmdline_start = cmdline; |
| 214 | + cmdline_sz = cmdline_len; |
| 215 | + |
| 216 | + while (is_valid_format(linux_start, linux_sz) && |
| 217 | + pe_has_bpf_section(linux_start, linux_sz)) { |
| 218 | + struct kexec_context context; |
| 219 | + |
| 220 | + pe_get_section((const char *)linux_start, ".bpf", &bpf_start, &bpf_sz); |
| 221 | + if (!!bpf_sz) { |
| 222 | + /* load and attach bpf-prog */ |
| 223 | + ret = arm_bpf_prog(bpf_start, bpf_sz); |
| 224 | + if (ret) { |
| 225 | + pr_err("Fail to load .bpf section\n"); |
| 226 | + ldata = ERR_PTR(ret); |
| 227 | + goto err; |
| 228 | + } |
| 229 | + } |
| 230 | + if (image->type != KEXEC_TYPE_CRASH) |
| 231 | + context.kdump = false; |
| 232 | + else |
| 233 | + context.kdump = true; |
| 234 | + context.image = linux_start; |
| 235 | + context.image_sz = linux_sz; |
| 236 | + context.initrd = initrd_start; |
| 237 | + context.initrd_sz = initrd_sz; |
| 238 | + context.cmdline = cmdline_start; |
| 239 | + context.cmdline_sz = strlen(cmdline_start); |
| 240 | + /* bpf-prog fentry, which handle above buffers. */ |
| 241 | + bpf_handle_pefile(&context); |
| 242 | + |
| 243 | + prepare_nested_pe(&linux_start, &linux_sz, &initrd_start, |
| 244 | + &initrd_sz, &cmdline_start); |
| 245 | + /* bpf-prog fentry */ |
| 246 | + bpf_post_handle_pefile(&context); |
| 247 | + /* |
| 248 | + * detach the current bpf-prog from their attachment points. |
| 249 | + */ |
| 250 | + disarm_bpf_prog(); |
| 251 | + } |
| 252 | + |
| 253 | + /* |
| 254 | + * image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should |
| 255 | + * be updated to the new content. |
| 256 | + */ |
| 257 | + |
| 258 | + res = &parsed_resource[0]; |
| 259 | + /* Kernel part should always be parsed */ |
| 260 | + if (!res->r) { |
| 261 | + pr_err("Can not parse kernel\n"); |
| 262 | + ldata = ERR_PTR(-EINVAL); |
| 263 | + goto err; |
| 264 | + } |
| 265 | + kernel = res->r->buf; |
| 266 | + kernel_len = res->r->data_sz; |
| 267 | + vfree(image->kernel_buf); |
| 268 | + image->kernel_buf = kernel; |
| 269 | + image->kernel_buf_len = kernel_len; |
| 270 | + |
| 271 | + res = &parsed_resource[1]; |
| 272 | + if (!!res->r) { |
| 273 | + initrd = res->r->buf; |
| 274 | + initrd_len = res->r->data_sz; |
| 275 | + vfree(image->initrd_buf); |
| 276 | + image->initrd_buf = initrd; |
| 277 | + image->initrd_buf_len = initrd_len; |
| 278 | + } |
| 279 | + res = &parsed_resource[2]; |
| 280 | + if (!!res->r) { |
| 281 | + cmdline = res->r->buf; |
| 282 | + cmdline_len = res->r->data_sz; |
| 283 | + kfree(image->cmdline_buf); |
| 284 | + image->cmdline_buf = cmdline; |
| 285 | + image->cmdline_buf_len = cmdline_len; |
| 286 | + } |
| 287 | + |
| 288 | + if (kernel == NULL || initrd == NULL || cmdline == NULL) { |
| 289 | + char *c, buf[64]; |
| 290 | + |
| 291 | + c = buf; |
| 292 | + if (kernel == NULL) { |
| 293 | + strcpy(c, "kernel "); |
| 294 | + c += strlen("kernel "); |
| 295 | + } |
| 296 | + if (initrd == NULL) { |
| 297 | + strcpy(c, "initrd "); |
| 298 | + c += strlen("initrd "); |
| 299 | + } |
| 300 | + if (cmdline == NULL) { |
| 301 | + strcpy(c, "cmdline "); |
| 302 | + c += strlen("cmdline "); |
| 303 | + } |
| 304 | + c = '\0'; |
| 305 | + pr_err("Can not extract data for %s", buf); |
| 306 | + ldata = ERR_PTR(-EINVAL); |
| 307 | + goto err; |
| 308 | + } |
| 309 | + |
| 310 | + ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, |
| 311 | + image->kernel_buf_len); |
| 312 | + if (ret) { |
| 313 | + pr_err("Fail to find suitable image loader\n"); |
| 314 | + ldata = ERR_PTR(ret); |
| 315 | + goto err; |
| 316 | + } |
| 317 | + ldata = kexec_image_load_default(image); |
| 318 | + if (IS_ERR(ldata)) { |
| 319 | + pr_err("architecture code fails to load image\n"); |
| 320 | + goto err; |
| 321 | + } |
| 322 | + image->image_loader_data = ldata; |
| 323 | + |
| 324 | +err: |
| 325 | + for (int i = 0; i < 3; i++) { |
| 326 | + r = parsed_resource[i].r; |
| 327 | + if (!r) |
| 328 | + continue; |
| 329 | + parsed_resource[i].r = NULL; |
| 330 | + /* |
| 331 | + * The release of buffer defers to |
| 332 | + * kimage_file_post_load_cleanup() |
| 333 | + */ |
| 334 | + r->buf = NULL; |
| 335 | + r->buf_sz = 0; |
| 336 | + mem_range_result_put(r); |
| 337 | + } |
| 338 | + |
| 339 | + return ldata; |
| 340 | +} |
| 341 | + |
| 342 | +const struct kexec_file_ops kexec_pe_image_ops = { |
| 343 | + .probe = pe_image_probe, |
| 344 | + .load = pe_image_load, |
| 345 | +#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG |
| 346 | + .verify_sig = kexec_kernel_verify_pe_sig, |
| 347 | +#endif |
| 348 | +}; |
0 commit comments