Skip to content

Commit d4e0340

Browse files
Saravana Kannanctmarinas
authored andcommitted
arm64/module: Optimize module load time by optimizing PLT counting
When loading a module, module_frob_arch_sections() tries to figure out the number of PLTs that'll be needed to handle all the RELAs. While doing this, it tries to dedupe PLT allocations for multiple R_AARCH64_CALL26 relocations to the same symbol. It does the same for R_AARCH64_JUMP26 relocations. To make checks for duplicates easier/faster, it sorts the relocation list by type, symbol and addend. That way, to check for a duplicate relocation, it just needs to compare with the previous entry. However, sorting the entire relocation array is unnecessary and expensive (O(n log n)) because there are a lot of other relocation types that don't need deduping or can't be deduped. So this commit partitions the array into entries that need deduping and those that don't. And then sorts just the part that needs deduping. And when CONFIG_RANDOMIZE_BASE is disabled, the sorting is skipped entirely because PLTs are not allocated for R_AARCH64_CALL26 and R_AARCH64_JUMP26 if it's disabled. This gives significant reduction in module load time for modules with large number of relocations with no measurable impact on modules with a small number of relocations. In my test setup with CONFIG_RANDOMIZE_BASE enabled, these were the results for a few downstream modules: Module Size (MB) wlan 14 video codec 3.8 drm 1.8 IPA 2.5 audio 1.2 gpu 1.8 Without this patch: Module Number of entries sorted Module load time (ms) wlan 243739 283 video codec 74029 138 drm 53837 67 IPA 42800 90 audio 21326 27 gpu 20967 32 Total time to load all these module: 637 ms With this patch: Module Number of entries sorted Module load time (ms) wlan 22454 61 video codec 10150 47 drm 13014 40 IPA 8097 63 audio 4606 16 gpu 6527 20 Total time to load all these modules: 247 Time saved during boot for just these 6 modules: 390 ms Signed-off-by: Saravana Kannan <[email protected]> Acked-by: Will Deacon <[email protected]> Cc: Ard Biesheuvel <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 9ebcfad commit d4e0340

File tree

1 file changed

+43
-3
lines changed

1 file changed

+43
-3
lines changed

arch/arm64/kernel/module-plts.c

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
253253
return ret;
254254
}
255255

256+
static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela,
257+
Elf64_Word dstidx)
258+
{
259+
260+
Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info);
261+
262+
if (s->st_shndx == dstidx)
263+
return false;
264+
265+
return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 ||
266+
ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26;
267+
}
268+
269+
/* Group branch PLT relas at the front end of the array. */
270+
static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
271+
int numrels, Elf64_Word dstidx)
272+
{
273+
int i = 0, j = numrels - 1;
274+
275+
if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
276+
return 0;
277+
278+
while (i < j) {
279+
if (branch_rela_needs_plt(syms, &rela[i], dstidx))
280+
i++;
281+
else if (branch_rela_needs_plt(syms, &rela[j], dstidx))
282+
swap(rela[i], rela[j]);
283+
else
284+
j--;
285+
}
286+
287+
return i;
288+
}
289+
256290
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
257291
char *secstrings, struct module *mod)
258292
{
@@ -290,7 +324,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
290324

291325
for (i = 0; i < ehdr->e_shnum; i++) {
292326
Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
293-
int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
327+
int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
294328
Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
295329

296330
if (sechdrs[i].sh_type != SHT_RELA)
@@ -300,8 +334,14 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
300334
if (!(dstsec->sh_flags & SHF_EXECINSTR))
301335
continue;
302336

303-
/* sort by type, symbol index and addend */
304-
sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
337+
/*
338+
* sort branch relocations requiring a PLT by type, symbol index
339+
* and addend
340+
*/
341+
nents = partition_branch_plt_relas(syms, rels, numrels,
342+
sechdrs[i].sh_info);
343+
if (nents)
344+
sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL);
305345

306346
if (!str_has_prefix(secstrings + dstsec->sh_name, ".init"))
307347
core_plts += count_plts(syms, rels, numrels,

0 commit comments

Comments
 (0)