Skip to content

Commit 9b9879f

Browse files
committed
modules: catch concurrent module loads, treat them as idempotent
This is the new-and-improved attempt at avoiding huge memory load spikes when the user space boot sequence tries to load hundreds (or even thousands) of redundant duplicate modules in parallel. See commit 9828ed3 ("module: error out early on concurrent load of the same module file") for background and an earlier failed attempt that was reverted. That earlier attempt just said "concurrently loading the same module is silly, just open the module file exclusively and return -ETXTBSY if somebody else is already loading it". While it is true that concurrent module loads of the same module is silly, the reason that earlier attempt then failed was that the concurrently loaded module would often be a prerequisite for another module. Thus failing to load the prerequisite would then cause cascading failures of the other modules, rather than just short-circuiting that one unnecessary module load. At the same time, we still really don't want to load the contents of the same module file hundreds of times, only to then wait for an eventually successful load, and have everybody else return -EEXIST. As a result, this takes another approach, and treats concurrent module loads from the same file as "idempotent" in the inode. So if one module load is ongoing, we don't start a new one, but instead just wait for the first one to complete and return the same return value as it did. So unlike the first attempt, this does not return early: the intent is not to speed up the boot, but to avoid a thundering herd problem in allocating memory (both physical and virtual) for a module more than once. Also note that this does change behavior: it used to be that when you had concurrent loads, you'd have one "winner" that would return success, and everybody else would return -EEXIST. In contrast, this idempotent logic goes all Oprah on the problem, and says "You are a winner! And you are a winner! We are ALL winners". But since there's no possible actual real semantic difference between "you loaded the module" and "somebody else already loaded the module", this is more of a feel-good change than an actual honest-to-goodness semantic change. Of course, any true Johnny-come-latelies that don't get caught in the concurrency filter will still return -EEXIST. It's no different from not even getting a seat at an Oprah taping. That's life. See the long thread on the kernel mailing list about this all, which includes some numbers for memory use before and after the patch. Link: https://lore.kernel.org/lkml/[email protected]/ Reviewed-by: Johan Hovold <[email protected]> Tested-by: Johan Hovold <[email protected]> Tested-by: Luis Chamberlain <[email protected]> Tested-by: Dan Williams <[email protected]> Tested-by: Rudi Heitbaum <[email protected]> Tested-by: David Hildenbrand <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 054a730 commit 9b9879f

File tree

1 file changed

+71
-2
lines changed

1 file changed

+71
-2
lines changed

kernel/module/main.c

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,15 +3057,82 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
30573057
return load_module(&info, uargs, 0);
30583058
}
30593059

3060+
struct idempotent {
3061+
const void *cookie;
3062+
struct hlist_node entry;
3063+
struct completion complete;
3064+
int ret;
3065+
};
3066+
3067+
#define IDEM_HASH_BITS 8
3068+
static struct hlist_head idem_hash[1 << IDEM_HASH_BITS];
3069+
static DEFINE_SPINLOCK(idem_lock);
3070+
3071+
static bool idempotent(struct idempotent *u, const void *cookie)
3072+
{
3073+
int hash = hash_ptr(cookie, IDEM_HASH_BITS);
3074+
struct hlist_head *head = idem_hash + hash;
3075+
struct idempotent *existing;
3076+
bool first;
3077+
3078+
u->ret = 0;
3079+
u->cookie = cookie;
3080+
init_completion(&u->complete);
3081+
3082+
spin_lock(&idem_lock);
3083+
first = true;
3084+
hlist_for_each_entry(existing, head, entry) {
3085+
if (existing->cookie != cookie)
3086+
continue;
3087+
first = false;
3088+
break;
3089+
}
3090+
hlist_add_head(&u->entry, idem_hash + hash);
3091+
spin_unlock(&idem_lock);
3092+
3093+
return !first;
3094+
}
3095+
3096+
/*
3097+
* We were the first one with 'cookie' on the list, and we ended
3098+
* up completing the operation. We now need to walk the list,
3099+
* remove everybody - which includes ourselves - fill in the return
3100+
* value, and then complete the operation.
3101+
*/
3102+
static void idempotent_complete(struct idempotent *u, int ret)
3103+
{
3104+
const void *cookie = u->cookie;
3105+
int hash = hash_ptr(cookie, IDEM_HASH_BITS);
3106+
struct hlist_head *head = idem_hash + hash;
3107+
struct hlist_node *next;
3108+
struct idempotent *pos;
3109+
3110+
spin_lock(&idem_lock);
3111+
hlist_for_each_entry_safe(pos, next, head, entry) {
3112+
if (pos->cookie != cookie)
3113+
continue;
3114+
hlist_del(&pos->entry);
3115+
pos->ret = ret;
3116+
complete(&pos->complete);
3117+
}
3118+
spin_unlock(&idem_lock);
3119+
}
3120+
30603121
static int init_module_from_file(struct file *f, const char __user * uargs, int flags)
30613122
{
3123+
struct idempotent idem;
30623124
struct load_info info = { };
30633125
void *buf = NULL;
3064-
int len;
3126+
int len, ret;
30653127

30663128
if (!f || !(f->f_mode & FMODE_READ))
30673129
return -EBADF;
30683130

3131+
if (idempotent(&idem, file_inode(f))) {
3132+
wait_for_completion(&idem.complete);
3133+
return idem.ret;
3134+
}
3135+
30693136
len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE);
30703137
if (len < 0) {
30713138
mod_stat_inc(&failed_kreads);
@@ -3086,7 +3153,9 @@ static int init_module_from_file(struct file *f, const char __user * uargs, int
30863153
info.len = len;
30873154
}
30883155

3089-
return load_module(&info, uargs, flags);
3156+
ret = load_module(&info, uargs, flags);
3157+
idempotent_complete(&idem, ret);
3158+
return ret;
30903159
}
30913160

30923161
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)

0 commit comments

Comments
 (0)