Skip to content

Commit a440e4d

Browse files
committed
Merge tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: "As expected, fixes started trickling in after the holidays so here is the accumulated pile of x86 fixes for 5.11: - A fix for fanotify_mark() missing the conversion of x86_32 native syscalls which take 64-bit arguments to the compat handlers due to former having a general compat handler. (Brian Gerst) - Add a forgotten pmd page destructor call to pud_free_pmd_page() where a pmd page is freed. (Dan Williams) - Make IN/OUT insns with an u8 immediate port operand handling for SEV-ES guests more precise by using only the single port byte and not the whole s32 value of the insn decoder. (Peter Gonda) - Correct a straddling end range check before returning the proper MTRR type, when the end address is the same as top of memory. (Ying-Tsun Huang) - Change PQR_ASSOC MSR update scheme when moving a task to a resctrl resource group to avoid significant performance overhead with some resctrl workloads. (Fenghua Yu) - Avoid the actual task move overhead when the task is already in the resource group. (Fenghua Yu)" * tag 'x86_urgent_for_v5.11_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/resctrl: Don't move a task to the same resource group x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR x86/mtrr: Correct the range check before performing MTRR type lookups x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling x86/mm: Fix leak of pmd ptlock fanotify: Fix sys_fanotify_mark() on native x86-32
2 parents 2ff9010 + a0195f3 commit a440e4d

File tree

8 files changed

+94
-83
lines changed

8 files changed

+94
-83
lines changed

arch/Kconfig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID
11051105
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
11061106
bool
11071107

1108+
config ARCH_SPLIT_ARG64
1109+
bool
1110+
help
1111+
If a 32-bit architecture requires 64-bit arguments to be split into
1112+
pairs of 32-bit arguments, select this option.
1113+
11081114
source "kernel/gcov/Kconfig"
11091115

11101116
source "scripts/gcc-plugins/Kconfig"

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ config X86_32
1919
select KMAP_LOCAL
2020
select MODULES_USE_ELF_REL
2121
select OLD_SIGACTION
22+
select ARCH_SPLIT_ARG64
2223

2324
config X86_64
2425
def_bool y

arch/x86/kernel/cpu/mtrr/generic.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
167167
*repeat = 0;
168168
*uniform = 1;
169169

170-
/* Make end inclusive instead of exclusive */
171-
end--;
172-
173170
prev_match = MTRR_TYPE_INVALID;
174171
for (i = 0; i < num_var_ranges; ++i) {
175172
unsigned short start_state, end_state, inclusive;
@@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
261258
int repeat;
262259
u64 partial_end;
263260

261+
/* Make end inclusive instead of exclusive */
262+
end--;
263+
264264
if (!mtrr_state_set)
265265
return MTRR_TYPE_INVALID;
266266

arch/x86/kernel/cpu/resctrl/rdtgroup.c

Lines changed: 49 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -525,89 +525,70 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp)
525525
kfree(rdtgrp);
526526
}
527527

528-
struct task_move_callback {
529-
struct callback_head work;
530-
struct rdtgroup *rdtgrp;
531-
};
532-
533-
static void move_myself(struct callback_head *head)
528+
static void _update_task_closid_rmid(void *task)
534529
{
535-
struct task_move_callback *callback;
536-
struct rdtgroup *rdtgrp;
537-
538-
callback = container_of(head, struct task_move_callback, work);
539-
rdtgrp = callback->rdtgrp;
540-
541530
/*
542-
* If resource group was deleted before this task work callback
543-
* was invoked, then assign the task to root group and free the
544-
* resource group.
531+
* If the task is still current on this CPU, update PQR_ASSOC MSR.
532+
* Otherwise, the MSR is updated when the task is scheduled in.
545533
*/
546-
if (atomic_dec_and_test(&rdtgrp->waitcount) &&
547-
(rdtgrp->flags & RDT_DELETED)) {
548-
current->closid = 0;
549-
current->rmid = 0;
550-
rdtgroup_remove(rdtgrp);
551-
}
552-
553-
if (unlikely(current->flags & PF_EXITING))
554-
goto out;
555-
556-
preempt_disable();
557-
/* update PQR_ASSOC MSR to make resource group go into effect */
558-
resctrl_sched_in();
559-
preempt_enable();
534+
if (task == current)
535+
resctrl_sched_in();
536+
}
560537

561-
out:
562-
kfree(callback);
538+
static void update_task_closid_rmid(struct task_struct *t)
539+
{
540+
if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
541+
smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
542+
else
543+
_update_task_closid_rmid(t);
563544
}
564545

565546
static int __rdtgroup_move_task(struct task_struct *tsk,
566547
struct rdtgroup *rdtgrp)
567548
{
568-
struct task_move_callback *callback;
569-
int ret;
570-
571-
callback = kzalloc(sizeof(*callback), GFP_KERNEL);
572-
if (!callback)
573-
return -ENOMEM;
574-
callback->work.func = move_myself;
575-
callback->rdtgrp = rdtgrp;
549+
/* If the task is already in rdtgrp, no need to move the task. */
550+
if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
551+
tsk->rmid == rdtgrp->mon.rmid) ||
552+
(rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
553+
tsk->closid == rdtgrp->mon.parent->closid))
554+
return 0;
576555

577556
/*
578-
* Take a refcount, so rdtgrp cannot be freed before the
579-
* callback has been invoked.
557+
* Set the task's closid/rmid before the PQR_ASSOC MSR can be
558+
* updated by them.
559+
*
560+
* For ctrl_mon groups, move both closid and rmid.
561+
* For monitor groups, can move the tasks only from
562+
* their parent CTRL group.
580563
*/
581-
atomic_inc(&rdtgrp->waitcount);
582-
ret = task_work_add(tsk, &callback->work, TWA_RESUME);
583-
if (ret) {
584-
/*
585-
* Task is exiting. Drop the refcount and free the callback.
586-
* No need to check the refcount as the group cannot be
587-
* deleted before the write function unlocks rdtgroup_mutex.
588-
*/
589-
atomic_dec(&rdtgrp->waitcount);
590-
kfree(callback);
591-
rdt_last_cmd_puts("Task exited\n");
592-
} else {
593-
/*
594-
* For ctrl_mon groups move both closid and rmid.
595-
* For monitor groups, can move the tasks only from
596-
* their parent CTRL group.
597-
*/
598-
if (rdtgrp->type == RDTCTRL_GROUP) {
599-
tsk->closid = rdtgrp->closid;
564+
565+
if (rdtgrp->type == RDTCTRL_GROUP) {
566+
tsk->closid = rdtgrp->closid;
567+
tsk->rmid = rdtgrp->mon.rmid;
568+
} else if (rdtgrp->type == RDTMON_GROUP) {
569+
if (rdtgrp->mon.parent->closid == tsk->closid) {
600570
tsk->rmid = rdtgrp->mon.rmid;
601-
} else if (rdtgrp->type == RDTMON_GROUP) {
602-
if (rdtgrp->mon.parent->closid == tsk->closid) {
603-
tsk->rmid = rdtgrp->mon.rmid;
604-
} else {
605-
rdt_last_cmd_puts("Can't move task to different control group\n");
606-
ret = -EINVAL;
607-
}
571+
} else {
572+
rdt_last_cmd_puts("Can't move task to different control group\n");
573+
return -EINVAL;
608574
}
609575
}
610-
return ret;
576+
577+
/*
578+
* Ensure the task's closid and rmid are written before determining if
579+
* the task is current that will decide if it will be interrupted.
580+
*/
581+
barrier();
582+
583+
/*
584+
* By now, the task's closid and rmid are set. If the task is current
585+
* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
586+
* group go into effect. If the task is not current, the MSR will be
587+
* updated when the task is scheduled in.
588+
*/
589+
update_task_closid_rmid(tsk);
590+
591+
return 0;
611592
}
612593

613594
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)

arch/x86/kernel/sev-es-shared.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
305305
case 0xe4:
306306
case 0xe5:
307307
*exitinfo |= IOIO_TYPE_IN;
308-
*exitinfo |= (u64)insn->immediate.value << 16;
308+
*exitinfo |= (u8)insn->immediate.value << 16;
309309
break;
310310

311311
/* OUT immediate opcodes */
312312
case 0xe6:
313313
case 0xe7:
314314
*exitinfo |= IOIO_TYPE_OUT;
315-
*exitinfo |= (u64)insn->immediate.value << 16;
315+
*exitinfo |= (u8)insn->immediate.value << 16;
316316
break;
317317

318318
/* IN register opcodes */

arch/x86/mm/pgtable.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr)
829829
}
830830

831831
free_page((unsigned long)pmd_sv);
832+
833+
pgtable_pmd_page_dtor(virt_to_page(pmd));
832834
free_page((unsigned long)pmd);
833835

834836
return 1;

fs/notify/fanotify/fanotify_user.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
12851285
return ret;
12861286
}
12871287

1288+
#ifndef CONFIG_ARCH_SPLIT_ARG64
12881289
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
12891290
__u64, mask, int, dfd,
12901291
const char __user *, pathname)
12911292
{
12921293
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
12931294
}
1295+
#endif
12941296

1295-
#ifdef CONFIG_COMPAT
1296-
COMPAT_SYSCALL_DEFINE6(fanotify_mark,
1297+
#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
1298+
SYSCALL32_DEFINE6(fanotify_mark,
12971299
int, fanotify_fd, unsigned int, flags,
1298-
__u32, mask0, __u32, mask1, int, dfd,
1300+
SC_ARG64(mask), int, dfd,
12991301
const char __user *, pathname)
13001302
{
1301-
return do_fanotify_mark(fanotify_fd, flags,
1302-
#ifdef __BIG_ENDIAN
1303-
((__u64)mask0 << 32) | mask1,
1304-
#else
1305-
((__u64)mask1 << 32) | mask0,
1306-
#endif
1307-
dfd, pathname);
1303+
return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
1304+
dfd, pathname);
13081305
}
13091306
#endif
13101307

include/linux/syscalls.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
251251
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
252252
#endif /* __SYSCALL_DEFINEx */
253253

254+
/* For split 64-bit arguments on 32-bit architectures */
255+
#ifdef __LITTLE_ENDIAN
256+
#define SC_ARG64(name) u32, name##_lo, u32, name##_hi
257+
#else
258+
#define SC_ARG64(name) u32, name##_hi, u32, name##_lo
259+
#endif
260+
#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo)
261+
262+
#ifdef CONFIG_COMPAT
263+
#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1
264+
#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2
265+
#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3
266+
#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4
267+
#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5
268+
#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6
269+
#else
270+
#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1
271+
#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2
272+
#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3
273+
#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4
274+
#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5
275+
#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6
276+
#endif
277+
254278
/*
255279
* Called before coming back to user-mode. Returning to user-mode with an
256280
* address limit different than USER_DS can allow to overwrite kernel memory.

0 commit comments

Comments
 (0)