Skip to content

Commit 85fbf15

Browse files
committed
Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 boot updates from Ingo Molnar: "The main changes were: - Extend the boot protocol to allow future extensions without hitting the setup_header size limit. - Add quirk to devicetree systems to disable the RTC unless it's listed as a supported device. - Fix ld.lld linker pedantry" * 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot: Introduce setup_indirect x86/boot: Introduce kernel_info.setup_type_max x86/boot: Introduce kernel_info x86/init: Allow DT configured systems to disable RTC at boot time x86/realmode: Explicitly set entry point via ENTRY in linker script
2 parents fd26159 + b3c72fc commit 85fbf15

File tree

15 files changed

+326
-17
lines changed

15 files changed

+326
-17
lines changed

Documentation/x86/boot.rst

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,25 @@ Protocol 2.12 (Kernel 3.8) Added the xloadflags field and extension fields
6868
Protocol 2.13 (Kernel 3.14) Support 32- and 64-bit flags being set in
6969
xloadflags to support booting a 64-bit kernel from 32-bit
7070
EFI
71+
72+
Protocol 2.14: BURNT BY INCORRECT COMMIT ae7e1238e68f2a472a125673ab506d49158c1889
73+
(x86/boot: Add ACPI RSDP address to setup_header)
74+
DO NOT USE!!! ASSUME SAME AS 2.13.
75+
76+
Protocol 2.15: (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
7177
============= ============================================================
7278

79+
.. note::
80+
The protocol version number should be changed only if the setup header
81+
is changed. There is no need to update the version number if boot_params
82+
or kernel_info are changed. Additionally, it is recommended to use
83+
xloadflags (in this case the protocol version number should not be
84+
updated either) or kernel_info to communicate supported Linux kernel
85+
features to the boot loader. Due to very limited space available in
86+
the original setup header every update to it should be considered
87+
with great care. Starting from the protocol 2.15 the primary way to
88+
communicate things to the boot loader is the kernel_info.
89+
7390

7491
Memory Layout
7592
=============
@@ -207,6 +224,7 @@ Offset/Size Proto Name Meaning
207224
0258/8 2.10+ pref_address Preferred loading address
208225
0260/4 2.10+ init_size Linear memory required during initialization
209226
0264/4 2.11+ handover_offset Offset of handover entry point
227+
0268/4 2.15+ kernel_info_offset Offset of the kernel_info
210228
=========== ======== ===================== ============================================
211229

212230
.. note::
@@ -809,6 +827,47 @@ Protocol: 2.09+
809827
sure to consider the case where the linked list already contains
810828
entries.
811829

830+
The setup_data is a bit awkward to use for extremely large data objects,
831+
both because the setup_data header has to be adjacent to the data object
832+
and because it has a 32-bit length field. However, it is important that
833+
intermediate stages of the boot process have a way to identify which
834+
chunks of memory are occupied by kernel data.
835+
836+
Thus setup_indirect struct and SETUP_INDIRECT type were introduced in
837+
protocol 2.15.
838+
839+
struct setup_indirect {
840+
__u32 type;
841+
__u32 reserved; /* Reserved, must be set to zero. */
842+
__u64 len;
843+
__u64 addr;
844+
};
845+
846+
The type member is a SETUP_INDIRECT | SETUP_* type. However, it cannot be
847+
SETUP_INDIRECT itself since making the setup_indirect a tree structure
848+
could require a lot of stack space in something that needs to parse it
849+
and stack space can be limited in boot contexts.
850+
851+
Let's give an example how to point to SETUP_E820_EXT data using setup_indirect.
852+
In this case setup_data and setup_indirect will look like this:
853+
854+
struct setup_data {
855+
__u64 next = 0 or <addr_of_next_setup_data_struct>;
856+
__u32 type = SETUP_INDIRECT;
857+
__u32 len = sizeof(setup_data);
858+
__u8 data[sizeof(setup_indirect)] = struct setup_indirect {
859+
__u32 type = SETUP_INDIRECT | SETUP_E820_EXT;
860+
__u32 reserved = 0;
861+
__u64 len = <len_of_SETUP_E820_EXT_data>;
862+
__u64 addr = <addr_of_SETUP_E820_EXT_data>;
863+
}
864+
}
865+
866+
.. note::
867+
SETUP_INDIRECT | SETUP_NONE objects cannot be properly distinguished
868+
from SETUP_INDIRECT itself. So, this kind of objects cannot be provided
869+
by the bootloaders.
870+
812871
============ ============
813872
Field name: pref_address
814873
Type: read (reloc)
@@ -855,6 +914,121 @@ Offset/size: 0x264/4
855914

856915
See EFI HANDOVER PROTOCOL below for more details.
857916

917+
============ ==================
918+
Field name: kernel_info_offset
919+
Type: read
920+
Offset/size: 0x268/4
921+
Protocol: 2.15+
922+
============ ==================
923+
924+
This field is the offset from the beginning of the kernel image to the
925+
kernel_info. The kernel_info structure is embedded in the Linux image
926+
in the uncompressed protected mode region.
927+
928+
929+
The kernel_info
930+
===============
931+
932+
The relationships between the headers are analogous to the various data
933+
sections:
934+
935+
setup_header = .data
936+
boot_params/setup_data = .bss
937+
938+
What is missing from the above list? That's right:
939+
940+
kernel_info = .rodata
941+
942+
We have been (ab)using .data for things that could go into .rodata or .bss for
943+
a long time, for lack of alternatives and -- especially early on -- inertia.
944+
Also, the BIOS stub is responsible for creating boot_params, so it isn't
945+
available to a BIOS-based loader (setup_data is, though).
946+
947+
setup_header is permanently limited to 144 bytes due to the reach of the
948+
2-byte jump field, which doubles as a length field for the structure, combined
949+
with the size of the "hole" in struct boot_params that a protected-mode loader
950+
or the BIOS stub has to copy it into. It is currently 119 bytes long, which
951+
leaves us with 25 very precious bytes. This isn't something that can be fixed
952+
without revising the boot protocol entirely, breaking backwards compatibility.
953+
954+
boot_params proper is limited to 4096 bytes, but can be arbitrarily extended
955+
by adding setup_data entries. It cannot be used to communicate properties of
956+
the kernel image, because it is .bss and has no image-provided content.
957+
958+
kernel_info solves this by providing an extensible place for information about
959+
the kernel image. It is readonly, because the kernel cannot rely on a
960+
bootloader copying its contents anywhere, but that is OK; if it becomes
961+
necessary it can still contain data items that an enabled bootloader would be
962+
expected to copy into a setup_data chunk.
963+
964+
All kernel_info data should be part of this structure. Fixed size data have to
965+
be put before kernel_info_var_len_data label. Variable size data have to be put
966+
after kernel_info_var_len_data label. Each chunk of variable size data has to
967+
be prefixed with header/magic and its size, e.g.:
968+
969+
kernel_info:
970+
.ascii "LToP" /* Header, Linux top (structure). */
971+
.long kernel_info_var_len_data - kernel_info
972+
.long kernel_info_end - kernel_info
973+
.long 0x01234567 /* Some fixed size data for the bootloaders. */
974+
kernel_info_var_len_data:
975+
example_struct: /* Some variable size data for the bootloaders. */
976+
.ascii "0123" /* Header/Magic. */
977+
.long example_struct_end - example_struct
978+
.ascii "Struct"
979+
.long 0x89012345
980+
example_struct_end:
981+
example_strings: /* Some variable size data for the bootloaders. */
982+
.ascii "ABCD" /* Header/Magic. */
983+
.long example_strings_end - example_strings
984+
.asciz "String_0"
985+
.asciz "String_1"
986+
example_strings_end:
987+
kernel_info_end:
988+
989+
This way the kernel_info is self-contained blob.
990+
991+
.. note::
992+
Each variable size data header/magic can be any 4-character string,
993+
without \0 at the end of the string, which does not collide with
994+
existing variable length data headers/magics.
995+
996+
997+
Details of the kernel_info Fields
998+
=================================
999+
1000+
============ ========
1001+
Field name: header
1002+
Offset/size: 0x0000/4
1003+
============ ========
1004+
1005+
Contains the magic number "LToP" (0x506f544c).
1006+
1007+
============ ========
1008+
Field name: size
1009+
Offset/size: 0x0004/4
1010+
============ ========
1011+
1012+
This field contains the size of the kernel_info including kernel_info.header.
1013+
It does not count kernel_info.kernel_info_var_len_data size. This field should be
1014+
used by the bootloaders to detect supported fixed size fields in the kernel_info
1015+
and beginning of kernel_info.kernel_info_var_len_data.
1016+
1017+
============ ========
1018+
Field name: size_total
1019+
Offset/size: 0x0008/4
1020+
============ ========
1021+
1022+
This field contains the size of the kernel_info including kernel_info.header
1023+
and kernel_info.kernel_info_var_len_data.
1024+
1025+
============ ==============
1026+
Field name: setup_type_max
1027+
Offset/size: 0x000c/4
1028+
============ ==============
1029+
1030+
This field contains maximal allowed type for setup_data and setup_indirect structs.
1031+
8581032

8591033
The Image Checksum
8601034
==================

arch/x86/boot/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
8787

8888
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
8989

90-
sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
90+
sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
9191

9292
quiet_cmd_zoffset = ZOFFSET $@
9393
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@

arch/x86/boot/compressed/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ $(obj)/../voffset.h: vmlinux FORCE
7272

7373
$(obj)/misc.o: $(obj)/../voffset.h
7474

75-
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
76-
$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
75+
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
76+
$(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
7777
$(obj)/piggy.o $(obj)/cpuflags.o
7878

7979
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o

arch/x86/boot/compressed/kaslr.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,18 @@ static bool mem_avoid_overlap(struct mem_vector *img,
459459
is_overlapping = true;
460460
}
461461

462+
if (ptr->type == SETUP_INDIRECT &&
463+
((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
464+
avoid.start = ((struct setup_indirect *)ptr->data)->addr;
465+
avoid.size = ((struct setup_indirect *)ptr->data)->len;
466+
467+
if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
468+
*overlap = avoid;
469+
earliest = overlap->start;
470+
is_overlapping = true;
471+
}
472+
}
473+
462474
ptr = (struct setup_data *)(unsigned long)ptr->next;
463475
}
464476

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
#include <asm/bootparam.h>
4+
5+
.section ".rodata.kernel_info", "a"
6+
7+
.global kernel_info
8+
9+
kernel_info:
10+
/* Header, Linux top (structure). */
11+
.ascii "LToP"
12+
/* Size. */
13+
.long kernel_info_var_len_data - kernel_info
14+
/* Size total. */
15+
.long kernel_info_end - kernel_info
16+
17+
/* Maximal allowed type for setup_data and setup_indirect structs. */
18+
.long SETUP_TYPE_MAX
19+
20+
kernel_info_var_len_data:
21+
/* Empty for time being... */
22+
kernel_info_end:

arch/x86/boot/header.S

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ _start:
300300
# Part 2 of the header, from the old setup.S
301301

302302
.ascii "HdrS" # header signature
303-
.word 0x020d # header version number (>= 0x0105)
303+
.word 0x020f # header version number (>= 0x0105)
304304
# or else old loadlin-1.5 will fail)
305305
.globl realmode_swtch
306306
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -567,6 +567,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
567567

568568
init_size: .long INIT_SIZE # kernel initialization size
569569
handover_offset: .long 0 # Filled in by build.c
570+
kernel_info_offset: .long 0 # Filled in by build.c
570571

571572
# End of setup header #####################################################
572573

arch/x86/boot/tools/build.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ u8 buf[SETUP_SECT_MAX*512];
5656
unsigned long efi32_stub_entry;
5757
unsigned long efi64_stub_entry;
5858
unsigned long efi_pe_entry;
59+
unsigned long kernel_info;
5960
unsigned long startup_64;
6061

6162
/*----------------------------------------------------------------------*/
@@ -321,6 +322,7 @@ static void parse_zoffset(char *fname)
321322
PARSE_ZOFS(p, efi32_stub_entry);
322323
PARSE_ZOFS(p, efi64_stub_entry);
323324
PARSE_ZOFS(p, efi_pe_entry);
325+
PARSE_ZOFS(p, kernel_info);
324326
PARSE_ZOFS(p, startup_64);
325327

326328
p = strchr(p, '\n');
@@ -410,6 +412,9 @@ int main(int argc, char ** argv)
410412

411413
efi_stub_entry_update();
412414

415+
/* Update kernel_info offset. */
416+
put_unaligned_le32(kernel_info, &buf[0x268]);
417+
413418
crc = partial_crc32(buf, i, crc);
414419
if (fwrite(buf, 1, i, dest) != i)
415420
die("Writing setup failed");

arch/x86/include/uapi/asm/bootparam.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#ifndef _ASM_X86_BOOTPARAM_H
33
#define _ASM_X86_BOOTPARAM_H
44

5-
/* setup_data types */
5+
/* setup_data/setup_indirect types */
66
#define SETUP_NONE 0
77
#define SETUP_E820_EXT 1
88
#define SETUP_DTB 2
@@ -11,6 +11,11 @@
1111
#define SETUP_APPLE_PROPERTIES 5
1212
#define SETUP_JAILHOUSE 6
1313

14+
#define SETUP_INDIRECT (1<<31)
15+
16+
/* SETUP_INDIRECT | max(SETUP_*) */
17+
#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE)
18+
1419
/* ram_size flags */
1520
#define RAMDISK_IMAGE_START_MASK 0x07FF
1621
#define RAMDISK_PROMPT_FLAG 0x8000
@@ -49,6 +54,14 @@ struct setup_data {
4954
__u8 data[0];
5055
};
5156

57+
/* extensible setup indirect data node */
58+
struct setup_indirect {
59+
__u32 type;
60+
__u32 reserved; /* Reserved, must be set to zero. */
61+
__u64 len;
62+
__u64 addr;
63+
};
64+
5265
struct setup_header {
5366
__u8 setup_sects;
5467
__u16 root_flags;
@@ -88,6 +101,7 @@ struct setup_header {
88101
__u64 pref_address;
89102
__u32 init_size;
90103
__u32 handover_offset;
104+
__u32 kernel_info_offset;
91105
} __attribute__((packed));
92106

93107
struct sys_desc_table {

arch/x86/kernel/e820.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,17 @@ void __init e820__reserve_setup_data(void)
999999
data = early_memremap(pa_data, sizeof(*data));
10001000
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
10011001
e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1002+
1003+
if (data->type == SETUP_INDIRECT &&
1004+
((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
1005+
e820__range_update(((struct setup_indirect *)data->data)->addr,
1006+
((struct setup_indirect *)data->data)->len,
1007+
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1008+
e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
1009+
((struct setup_indirect *)data->data)->len,
1010+
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
1011+
}
1012+
10021013
pa_data = data->next;
10031014
early_memunmap(data, sizeof(*data));
10041015
}

0 commit comments

Comments
 (0)