Skip to content

Commit 653608c

Browse files
committed
Merge tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull more xen updates from Juergen Gross: "A second round of Xen related changes and features: - a small fix of the xen-pciback driver for a warning issued by sparse - support PCI passthrough when using a PVH dom0 - enable loading the kernel in PVH mode at arbitrary addresses, avoiding conflicts with the memory map when running as a Xen dom0 using the host memory layout" * tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: x86/pvh: Add 64bit relocation page tables x86/kernel: Move page table macros to header x86/pvh: Set phys_base when calling xen_prepare_pvh() x86/pvh: Make PVH entrypoint PIC for x86-64 xen: sync elfnote.h from xen tree xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t xen/privcmd: Add new syscall to get gsi from dev xen/pvh: Setup gsi for passthrough device xen/pci: Add a function to reset device for xen
2 parents e477dba + 47ffe05 commit 653608c

File tree

17 files changed

+509
-47
lines changed

17 files changed

+509
-47
lines changed

arch/x86/include/asm/pgtable_64.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
270270

271271
#include <asm/pgtable-invert.h>
272272

273-
#endif /* !__ASSEMBLY__ */
273+
#else /* __ASSEMBLY__ */
274+
275+
#define l4_index(x) (((x) >> 39) & 511)
276+
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
277+
278+
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
279+
L4_START_KERNEL = l4_index(__START_KERNEL_map)
280+
281+
L3_START_KERNEL = pud_index(__START_KERNEL_map)
282+
283+
#define SYM_DATA_START_PAGE_ALIGNED(name) \
284+
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
285+
286+
/* Automate the creation of 1 to 1 mapping pmd entries */
287+
#define PMDS(START, PERM, COUNT) \
288+
i = 0 ; \
289+
.rept (COUNT) ; \
290+
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
291+
i = i + 1 ; \
292+
.endr
293+
294+
#endif /* __ASSEMBLY__ */
274295
#endif /* _ASM_X86_PGTABLE_64_H */

arch/x86/kernel/head_64.S

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,6 @@
3232
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
3333
* because we need identity-mapped pages.
3434
*/
35-
#define l4_index(x) (((x) >> 39) & 511)
36-
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
37-
38-
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
39-
L4_START_KERNEL = l4_index(__START_KERNEL_map)
40-
41-
L3_START_KERNEL = pud_index(__START_KERNEL_map)
4235

4336
__HEAD
4437
.code64
@@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
577570
SYM_CODE_END(vc_no_ghcb)
578571
#endif
579572

580-
#define SYM_DATA_START_PAGE_ALIGNED(name) \
581-
SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
582-
583573
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
584574
/*
585575
* Each PGD needs to be 8k long and 8k aligned. We do not
@@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb)
601591
#define PTI_USER_PGD_FILL 0
602592
#endif
603593

604-
/* Automate the creation of 1 to 1 mapping pmd entries */
605-
#define PMDS(START, PERM, COUNT) \
606-
i = 0 ; \
607-
.rept (COUNT) ; \
608-
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
609-
i = i + 1 ; \
610-
.endr
611-
612594
__INITDATA
613595
.balign 4
614596

@@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
708690
.endr
709691
SYM_DATA_END(level1_fixmap_pgt)
710692

711-
#undef PMDS
712-
713693
.data
714694
.align 16
715695

arch/x86/platform/pvh/head.S

Lines changed: 149 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
.code32
88
.text
99
#define _pa(x) ((x) - __START_KERNEL_map)
10+
#define rva(x) ((x) - pvh_start_xen)
1011

1112
#include <linux/elfnote.h>
1213
#include <linux/init.h>
@@ -15,6 +16,7 @@
1516
#include <asm/segment.h>
1617
#include <asm/asm.h>
1718
#include <asm/boot.h>
19+
#include <asm/pgtable.h>
1820
#include <asm/processor-flags.h>
1921
#include <asm/msr.h>
2022
#include <asm/nospec-branch.h>
@@ -54,22 +56,40 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
5456
UNWIND_HINT_END_OF_STACK
5557
cld
5658

57-
lgdt (_pa(gdt))
59+
/*
60+
* See the comment for startup_32 for more details. We need to
61+
* execute a call to get the execution address to be position
62+
* independent, but we don't have a stack. Save and restore the
63+
* magic field of start_info in ebx, and use that as the stack.
64+
*/
65+
mov (%ebx), %eax
66+
leal 4(%ebx), %esp
67+
ANNOTATE_INTRA_FUNCTION_CALL
68+
call 1f
69+
1: popl %ebp
70+
mov %eax, (%ebx)
71+
subl $rva(1b), %ebp
72+
movl $0, %esp
73+
74+
leal rva(gdt)(%ebp), %eax
75+
leal rva(gdt_start)(%ebp), %ecx
76+
movl %ecx, 2(%eax)
77+
lgdt (%eax)
5878

5979
mov $PVH_DS_SEL,%eax
6080
mov %eax,%ds
6181
mov %eax,%es
6282
mov %eax,%ss
6383

6484
/* Stash hvm_start_info. */
65-
mov $_pa(pvh_start_info), %edi
85+
leal rva(pvh_start_info)(%ebp), %edi
6686
mov %ebx, %esi
67-
mov _pa(pvh_start_info_sz), %ecx
87+
movl rva(pvh_start_info_sz)(%ebp), %ecx
6888
shr $2,%ecx
6989
rep
7090
movsl
7191

72-
mov $_pa(early_stack_end), %esp
92+
leal rva(early_stack_end)(%ebp), %esp
7393

7494
/* Enable PAE mode. */
7595
mov %cr4, %eax
@@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
83103
btsl $_EFER_LME, %eax
84104
wrmsr
85105

106+
mov %ebp, %ebx
107+
subl $_pa(pvh_start_xen), %ebx /* offset */
108+
jz .Lpagetable_done
109+
110+
/* Fixup page-tables for relocation. */
111+
leal rva(pvh_init_top_pgt)(%ebp), %edi
112+
movl $PTRS_PER_PGD, %ecx
113+
2:
114+
testl $_PAGE_PRESENT, 0x00(%edi)
115+
jz 1f
116+
addl %ebx, 0x00(%edi)
117+
1:
118+
addl $8, %edi
119+
decl %ecx
120+
jnz 2b
121+
122+
/* L3 ident has a single entry. */
123+
leal rva(pvh_level3_ident_pgt)(%ebp), %edi
124+
addl %ebx, 0x00(%edi)
125+
126+
leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
127+
addl %ebx, (PAGE_SIZE - 16)(%edi)
128+
addl %ebx, (PAGE_SIZE - 8)(%edi)
129+
130+
/* pvh_level2_ident_pgt is fine - large pages */
131+
132+
/* pvh_level2_kernel_pgt needs adjustment - large pages */
133+
leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
134+
movl $PTRS_PER_PMD, %ecx
135+
2:
136+
testl $_PAGE_PRESENT, 0x00(%edi)
137+
jz 1f
138+
addl %ebx, 0x00(%edi)
139+
1:
140+
addl $8, %edi
141+
decl %ecx
142+
jnz 2b
143+
144+
.Lpagetable_done:
86145
/* Enable pre-constructed page tables. */
87-
mov $_pa(init_top_pgt), %eax
146+
leal rva(pvh_init_top_pgt)(%ebp), %eax
88147
mov %eax, %cr3
89148
mov $(X86_CR0_PG | X86_CR0_PE), %eax
90149
mov %eax, %cr0
91150

92151
/* Jump to 64-bit mode. */
93-
ljmp $PVH_CS_SEL, $_pa(1f)
152+
pushl $PVH_CS_SEL
153+
leal rva(1f)(%ebp), %eax
154+
pushl %eax
155+
lretl
94156

95157
/* 64-bit entry point. */
96158
.code64
97159
1:
160+
UNWIND_HINT_END_OF_STACK
161+
98162
/* Set base address in stack canary descriptor. */
99163
mov $MSR_GS_BASE,%ecx
100-
mov $_pa(canary), %eax
164+
leal canary(%rip), %eax
101165
xor %edx, %edx
102166
wrmsr
103167

168+
/*
169+
* Calculate load offset and store in phys_base. __pa() needs
170+
* phys_base set to calculate the hypercall page in xen_pvh_init().
171+
*/
172+
movq %rbp, %rbx
173+
subq $_pa(pvh_start_xen), %rbx
174+
movq %rbx, phys_base(%rip)
104175
call xen_prepare_pvh
176+
/*
177+
* Clear phys_base. __startup_64 will *add* to its value,
178+
* so reset to 0.
179+
*/
180+
xor %rbx, %rbx
181+
movq %rbx, phys_base(%rip)
105182

106183
/* startup_64 expects boot_params in %rsi. */
107-
mov $_pa(pvh_bootparams), %rsi
108-
mov $_pa(startup_64), %rax
109-
ANNOTATE_RETPOLINE_SAFE
110-
jmp *%rax
184+
lea pvh_bootparams(%rip), %rsi
185+
jmp startup_64
111186

112187
#else /* CONFIG_X86_64 */
113188

@@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen)
143218
.balign 8
144219
SYM_DATA_START_LOCAL(gdt)
145220
.word gdt_end - gdt_start
146-
.long _pa(gdt_start)
221+
.long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
147222
.word 0
148223
SYM_DATA_END(gdt)
149224
SYM_DATA_START_LOCAL(gdt_start)
@@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
163238
.fill BOOT_STACK_SIZE, 1, 0
164239
SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
165240

241+
#ifdef CONFIG_X86_64
242+
/*
243+
* Xen PVH needs a set of identity mapped and kernel high mapping
244+
* page tables. pvh_start_xen starts running on the identity mapped
245+
* page tables, but xen_prepare_pvh calls into the high mapping.
246+
* These page tables need to be relocatable and are only used until
247+
* startup_64 transitions to init_top_pgt.
248+
*/
249+
SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
250+
.quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
251+
.org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
252+
.quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
253+
.org pvh_init_top_pgt + L4_START_KERNEL * 8, 0
254+
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
255+
.quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
256+
SYM_DATA_END(pvh_init_top_pgt)
257+
258+
SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
259+
.quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
260+
.fill 511, 8, 0
261+
SYM_DATA_END(pvh_level3_ident_pgt)
262+
SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
263+
/*
264+
* Since I easily can, map the first 1G.
265+
* Don't set NX because code runs from these pages.
266+
*
267+
* Note: This sets _PAGE_GLOBAL despite whether
268+
* the CPU supports it or it is enabled. But,
269+
* the CPU should ignore the bit.
270+
*/
271+
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
272+
SYM_DATA_END(pvh_level2_ident_pgt)
273+
SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
274+
.fill L3_START_KERNEL, 8, 0
275+
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
276+
.quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
277+
.quad 0 /* no fixmap */
278+
SYM_DATA_END(pvh_level3_kernel_pgt)
279+
280+
SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
281+
/*
282+
* Kernel high mapping.
283+
*
284+
* The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
285+
* virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
286+
* 512 MiB otherwise.
287+
*
288+
* (NOTE: after that starts the module area, see MODULES_VADDR.)
289+
*
290+
* This table is eventually used by the kernel during normal runtime.
291+
* Care must be taken to clear out undesired bits later, like _PAGE_RW
292+
* or _PAGE_GLOBAL in some cases.
293+
*/
294+
PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
295+
SYM_DATA_END(pvh_level2_kernel_pgt)
296+
297+
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
298+
.long CONFIG_PHYSICAL_ALIGN;
299+
.long LOAD_PHYSICAL_ADDR;
300+
.long KERNEL_IMAGE_SIZE - 1)
301+
#endif
302+
166303
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
167304
_ASM_PTR (pvh_start_xen - __START_KERNEL_map))

arch/x86/xen/enlighten_pvh.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/mm.h>
55

66
#include <xen/hvc-console.h>
7+
#include <xen/acpi.h>
78

89
#include <asm/bootparam.h>
910
#include <asm/io_apic.h>
@@ -28,6 +29,28 @@
2829
bool __ro_after_init xen_pvh;
2930
EXPORT_SYMBOL_GPL(xen_pvh);
3031

32+
#ifdef CONFIG_XEN_DOM0
33+
int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
34+
{
35+
int ret;
36+
struct physdev_setup_gsi setup_gsi;
37+
38+
setup_gsi.gsi = gsi;
39+
setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
40+
setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
41+
42+
ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
43+
if (ret == -EEXIST) {
44+
xen_raw_printk("Already setup the GSI :%d\n", gsi);
45+
ret = 0;
46+
} else if (ret)
47+
xen_raw_printk("Fail to setup GSI (%d)!\n", gsi);
48+
49+
return ret;
50+
}
51+
EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi);
52+
#endif
53+
3154
/*
3255
* Reserve e820 UNUSABLE regions to inflate the memory balloon.
3356
*

drivers/acpi/pci_irq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
288288
}
289289
#endif /* CONFIG_X86_IO_APIC */
290290

291-
static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
291+
struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
292292
{
293293
struct acpi_prt_entry *entry = NULL;
294294
struct pci_dev *bridge;

drivers/xen/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
261261
config XEN_PRIVCMD
262262
tristate "Xen hypercall passthrough driver"
263263
depends on XEN
264+
imply CONFIG_XEN_PCIDEV_BACKEND
264265
default m
265266
help
266267
The hypercall passthrough driver allows privileged user programs to

0 commit comments

Comments
 (0)