Skip to content

Commit af38553

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "14 fixes and one selftest to verify the ipc fixes herein" * emailed patches from Andrew Morton <[email protected]>: mm: limit boost_watermark on small zones ubsan: disable UBSAN_ALIGNMENT under COMPILE_TEST mm/vmscan: remove unnecessary argument description of isolate_lru_pages() epoll: atomically remove wait entry on wake up kselftests: introduce new epoll60 testcase for catching lost wakeups percpu: make pcpu_alloc() aware of current gfp context mm/slub: fix incorrect interpretation of s->offset scripts/gdb: repair rb_first() and rb_last() eventpoll: fix missing wakeup for ovflist in ep_poll_callback arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory() scripts/decodecode: fix trapping instruction formatting kernel/kcov.c: fix typos in kcov_remote_start documentation mm/page_alloc: fix watchdog soft lockups during set_zone_contiguous() mm, memcg: fix error return value of mem_cgroup_css_alloc() ipc/mqueue.c: change __do_notify() to bypass check_kill_permission()
2 parents 79dede7 + 14f6914 commit af38553

File tree

14 files changed

+275
-78
lines changed

14 files changed

+275
-78
lines changed

arch/x86/kvm/svm/sev.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
345345
return NULL;
346346

347347
/* Pin the user virtual address. */
348-
npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
348+
npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
349349
if (npinned != npages) {
350350
pr_err("SEV: Failure locking %lu pages.\n", npages);
351351
goto err;

fs/eventpoll.c

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
11711171
{
11721172
struct eventpoll *ep = epi->ep;
11731173

1174+
/* Fast preliminary check */
1175+
if (epi->next != EP_UNACTIVE_PTR)
1176+
return false;
1177+
11741178
/* Check that the same epi has not been just chained from another CPU */
11751179
if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
11761180
return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
12371241
* chained in ep->ovflist and requeued later on.
12381242
*/
12391243
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
1240-
if (epi->next == EP_UNACTIVE_PTR &&
1241-
chain_epi_lockless(epi))
1244+
if (chain_epi_lockless(epi))
1245+
ep_pm_stay_awake_rcu(epi);
1246+
} else if (!ep_is_linked(epi)) {
1247+
/* In the usual case, add event to ready list. */
1248+
if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
12421249
ep_pm_stay_awake_rcu(epi);
1243-
goto out_unlock;
1244-
}
1245-
1246-
/* If this file is already in the ready list we exit soon */
1247-
if (!ep_is_linked(epi) &&
1248-
list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
1249-
ep_pm_stay_awake_rcu(epi);
12501250
}
12511251

12521252
/*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
18221822
{
18231823
int res = 0, eavail, timed_out = 0;
18241824
u64 slack = 0;
1825-
bool waiter = false;
18261825
wait_queue_entry_t wait;
18271826
ktime_t expires, *to = NULL;
18281827

@@ -1867,21 +1866,23 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
18671866
*/
18681867
ep_reset_busy_poll_napi_id(ep);
18691868

1870-
/*
1871-
* We don't have any available event to return to the caller. We need
1872-
* to sleep here, and we will be woken by ep_poll_callback() when events
1873-
* become available.
1874-
*/
1875-
if (!waiter) {
1876-
waiter = true;
1877-
init_waitqueue_entry(&wait, current);
1878-
1869+
do {
1870+
/*
1871+
* Internally init_wait() uses autoremove_wake_function(),
1872+
* thus wait entry is removed from the wait queue on each
1873+
* wakeup. Why it is important? In case of several waiters
1874+
* each new wakeup will hit the next waiter, giving it the
1875+
* chance to harvest new event. Otherwise wakeup can be
1876+
* lost. This is also good performance-wise, because on
1877+
* normal wakeup path no need to call __remove_wait_queue()
1878+
* explicitly, thus ep->lock is not taken, which halts the
1879+
* event delivery.
1880+
*/
1881+
init_wait(&wait);
18791882
write_lock_irq(&ep->lock);
18801883
__add_wait_queue_exclusive(&ep->wq, &wait);
18811884
write_unlock_irq(&ep->lock);
1882-
}
18831885

1884-
for (;;) {
18851886
/*
18861887
* We don't want to sleep if the ep_poll_callback() sends us
18871888
* a wakeup in between. That's why we set the task state
@@ -1911,10 +1912,20 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
19111912
timed_out = 1;
19121913
break;
19131914
}
1914-
}
1915+
1916+
/* We were woken up, thus go and try to harvest some events */
1917+
eavail = 1;
1918+
1919+
} while (0);
19151920

19161921
__set_current_state(TASK_RUNNING);
19171922

1923+
if (!list_empty_careful(&wait.entry)) {
1924+
write_lock_irq(&ep->lock);
1925+
__remove_wait_queue(&ep->wq, &wait);
1926+
write_unlock_irq(&ep->lock);
1927+
}
1928+
19181929
send_events:
19191930
/*
19201931
* Try to transfer events to user space. In case we get 0 events and
@@ -1925,12 +1936,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
19251936
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
19261937
goto fetch_events;
19271938

1928-
if (waiter) {
1929-
write_lock_irq(&ep->lock);
1930-
__remove_wait_queue(&ep->wq, &wait);
1931-
write_unlock_irq(&ep->lock);
1932-
}
1933-
19341939
return res;
19351940
}
19361941

ipc/mqueue.c

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ struct mqueue_inode_info {
142142

143143
struct sigevent notify;
144144
struct pid *notify_owner;
145+
u32 notify_self_exec_id;
145146
struct user_namespace *notify_user_ns;
146147
struct user_struct *user; /* user who created, for accounting */
147148
struct sock *notify_sock;
@@ -773,28 +774,44 @@ static void __do_notify(struct mqueue_inode_info *info)
773774
* synchronously. */
774775
if (info->notify_owner &&
775776
info->attr.mq_curmsgs == 1) {
776-
struct kernel_siginfo sig_i;
777777
switch (info->notify.sigev_notify) {
778778
case SIGEV_NONE:
779779
break;
780-
case SIGEV_SIGNAL:
781-
/* sends signal */
780+
case SIGEV_SIGNAL: {
781+
struct kernel_siginfo sig_i;
782+
struct task_struct *task;
783+
784+
/* do_mq_notify() accepts sigev_signo == 0, why?? */
785+
if (!info->notify.sigev_signo)
786+
break;
782787

783788
clear_siginfo(&sig_i);
784789
sig_i.si_signo = info->notify.sigev_signo;
785790
sig_i.si_errno = 0;
786791
sig_i.si_code = SI_MESGQ;
787792
sig_i.si_value = info->notify.sigev_value;
788-
/* map current pid/uid into info->owner's namespaces */
789793
rcu_read_lock();
794+
/* map current pid/uid into info->owner's namespaces */
790795
sig_i.si_pid = task_tgid_nr_ns(current,
791796
ns_of_pid(info->notify_owner));
792-
sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid());
797+
sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
798+
current_uid());
799+
/*
800+
* We can't use kill_pid_info(), this signal should
801+
* bypass check_kill_permission(). It is from kernel
802+
* but si_fromuser() can't know this.
803+
* We do check the self_exec_id, to avoid sending
804+
* signals to programs that don't expect them.
805+
*/
806+
task = pid_task(info->notify_owner, PIDTYPE_TGID);
807+
if (task && task->self_exec_id ==
808+
info->notify_self_exec_id) {
809+
do_send_sig_info(info->notify.sigev_signo,
810+
&sig_i, task, PIDTYPE_TGID);
811+
}
793812
rcu_read_unlock();
794-
795-
kill_pid_info(info->notify.sigev_signo,
796-
&sig_i, info->notify_owner);
797813
break;
814+
}
798815
case SIGEV_THREAD:
799816
set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
800817
netlink_sendskb(info->notify_sock, info->notify_cookie);
@@ -1383,6 +1400,7 @@ static int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
13831400
info->notify.sigev_signo = notification->sigev_signo;
13841401
info->notify.sigev_value = notification->sigev_value;
13851402
info->notify.sigev_notify = SIGEV_SIGNAL;
1403+
info->notify_self_exec_id = current->self_exec_id;
13861404
break;
13871405
}
13881406

kernel/kcov.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
740740
* kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
741741
* arbitrary 4-byte non-zero number as the instance id). This common handle
742742
* then gets saved into the task_struct of the process that issued the
743-
* KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
744-
* kernel threads, the common handle must be retrived via kcov_common_handle()
743+
* KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
744+
* kernel threads, the common handle must be retrieved via kcov_common_handle()
745745
* and passed to the spawned threads via custom annotations. Those kernel
746746
* threads must in turn be annotated with kcov_remote_start(common_handle) and
747747
* kcov_remote_stop(). All of the threads that are spawned by the same process

lib/Kconfig.ubsan

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,15 @@ config UBSAN_SANITIZE_ALL
6060
Enabling this option will get kernel image size increased
6161
significantly.
6262

63-
config UBSAN_NO_ALIGNMENT
64-
bool "Disable checking of pointers alignment"
65-
default y if HAVE_EFFICIENT_UNALIGNED_ACCESS
63+
config UBSAN_ALIGNMENT
64+
bool "Enable checks for pointers alignment"
65+
default !HAVE_EFFICIENT_UNALIGNED_ACCESS
66+
depends on !X86 || !COMPILE_TEST
6667
help
67-
This option disables the check of unaligned memory accesses.
68-
This option should be used when building allmodconfig.
69-
Disabling this option on architectures that support unaligned
68+
This option enables the check of unaligned memory accesses.
69+
Enabling this option on architectures that support unaligned
7070
accesses may produce a lot of false positives.
7171

72-
config UBSAN_ALIGNMENT
73-
def_bool !UBSAN_NO_ALIGNMENT
74-
7572
config TEST_UBSAN
7673
tristate "Module for testing for undefined behavior detection"
7774
depends on m

mm/memcontrol.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4990,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
49904990
unsigned int size;
49914991
int node;
49924992
int __maybe_unused i;
4993+
long error = -ENOMEM;
49934994

49944995
size = sizeof(struct mem_cgroup);
49954996
size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
49964997

49974998
memcg = kzalloc(size, GFP_KERNEL);
49984999
if (!memcg)
4999-
return NULL;
5000+
return ERR_PTR(error);
50005001

50015002
memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
50025003
1, MEM_CGROUP_ID_MAX,
50035004
GFP_KERNEL);
5004-
if (memcg->id.id < 0)
5005+
if (memcg->id.id < 0) {
5006+
error = memcg->id.id;
50055007
goto fail;
5008+
}
50065009

50075010
memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
50085011
if (!memcg->vmstats_local)
@@ -5046,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
50465049
fail:
50475050
mem_cgroup_id_remove(memcg);
50485051
__mem_cgroup_free(memcg);
5049-
return NULL;
5052+
return ERR_PTR(error);
50505053
}
50515054

50525055
static struct cgroup_subsys_state * __ref
@@ -5057,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
50575060
long error = -ENOMEM;
50585061

50595062
memcg = mem_cgroup_alloc();
5060-
if (!memcg)
5061-
return ERR_PTR(error);
5063+
if (IS_ERR(memcg))
5064+
return ERR_CAST(memcg);
50625065

50635066
WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
50645067
memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5108,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
51085111
fail:
51095112
mem_cgroup_id_remove(memcg);
51105113
mem_cgroup_free(memcg);
5111-
return ERR_PTR(-ENOMEM);
5114+
return ERR_PTR(error);
51125115
}
51135116

51145117
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)

mm/page_alloc.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone)
16071607
if (!__pageblock_pfn_to_page(block_start_pfn,
16081608
block_end_pfn, zone))
16091609
return;
1610+
cond_resched();
16101611
}
16111612

16121613
/* We confirm that there is no hole */
@@ -2400,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone)
24002401

24012402
if (!watermark_boost_factor)
24022403
return;
2404+
/*
2405+
* Don't bother in zones that are unlikely to produce results.
2406+
* On small machines, including kdump capture kernels running
2407+
* in a small area, boosting the watermark can cause an out of
2408+
* memory situation immediately.
2409+
*/
2410+
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
2411+
return;
24032412

24042413
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
24052414
watermark_boost_factor, 10000);

mm/percpu.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include <linux/workqueue.h>
8181
#include <linux/kmemleak.h>
8282
#include <linux/sched.h>
83+
#include <linux/sched/mm.h>
8384

8485
#include <asm/cacheflush.h>
8586
#include <asm/sections.h>
@@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
15571558
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
15581559
gfp_t gfp)
15591560
{
1560-
/* whitelisted flags that can be passed to the backing allocators */
1561-
gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1562-
bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1563-
bool do_warn = !(gfp & __GFP_NOWARN);
1561+
gfp_t pcpu_gfp;
1562+
bool is_atomic;
1563+
bool do_warn;
15641564
static int warn_limit = 10;
15651565
struct pcpu_chunk *chunk, *next;
15661566
const char *err;
@@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
15691569
void __percpu *ptr;
15701570
size_t bits, bit_align;
15711571

1572+
gfp = current_gfp_context(gfp);
1573+
/* whitelisted flags that can be passed to the backing allocators */
1574+
pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1575+
is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1576+
do_warn = !(gfp & __GFP_NOWARN);
1577+
15721578
/*
15731579
* There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
15741580
* therefore alignment must be a minimum of that many bytes.

0 commit comments

Comments
 (0)