module: Fix performance regression on modules with large symbol tables
Looking at /proc/kallsyms, one starts to ponder whether all of the extra strtab-related complexity in module.c is worth the memory savings. Instead of making the add_kallsyms() loop even more complex, I tried the other route of deleting the strmap logic and naively copying each string into core_strtab with no consideration for consolidating duplicates. Performance on an "already exists" insmod of nvidia.ko (runs add_kallsyms() but does not actually initialize the module): Original scheme: 1.230s With naive copying: 0.058s Extra space used: 35k (of a 408k module). Signed-off-by: Kevin Cernekee <cernekee@gmail.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> LKML-Reference: <73defb5e4bca04a6431392cc341112b1@localhost>
This commit is contained in:
committed by
Rusty Russell
parent
70b1e9161e
commit
48fd11880b
@@ -138,7 +138,6 @@ struct load_info {
|
|||||||
unsigned long len;
|
unsigned long len;
|
||||||
Elf_Shdr *sechdrs;
|
Elf_Shdr *sechdrs;
|
||||||
char *secstrings, *strtab;
|
char *secstrings, *strtab;
|
||||||
unsigned long *strmap;
|
|
||||||
unsigned long symoffs, stroffs;
|
unsigned long symoffs, stroffs;
|
||||||
struct _ddebug *debug;
|
struct _ddebug *debug;
|
||||||
unsigned int num_debug;
|
unsigned int num_debug;
|
||||||
@@ -2178,12 +2177,19 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We only allocate and copy the strings needed by the parts of symtab
|
||||||
|
* we keep. This is simple, but has the effect of making multiple
|
||||||
|
* copies of duplicates. We could be more sophisticated, see
|
||||||
|
* linux-kernel thread starting with
|
||||||
|
* <73defb5e4bca04a6431392cc341112b1@localhost>.
|
||||||
|
*/
|
||||||
static void layout_symtab(struct module *mod, struct load_info *info)
|
static void layout_symtab(struct module *mod, struct load_info *info)
|
||||||
{
|
{
|
||||||
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
|
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
|
||||||
Elf_Shdr *strsect = info->sechdrs + info->index.str;
|
Elf_Shdr *strsect = info->sechdrs + info->index.str;
|
||||||
const Elf_Sym *src;
|
const Elf_Sym *src;
|
||||||
unsigned int i, nsrc, ndst;
|
unsigned int i, nsrc, ndst, strtab_size;
|
||||||
|
|
||||||
/* Put symbol section at end of init part of module. */
|
/* Put symbol section at end of init part of module. */
|
||||||
symsect->sh_flags |= SHF_ALLOC;
|
symsect->sh_flags |= SHF_ALLOC;
|
||||||
@@ -2194,38 +2200,23 @@ static void layout_symtab(struct module *mod, struct load_info *info)
|
|||||||
src = (void *)info->hdr + symsect->sh_offset;
|
src = (void *)info->hdr + symsect->sh_offset;
|
||||||
nsrc = symsect->sh_size / sizeof(*src);
|
nsrc = symsect->sh_size / sizeof(*src);
|
||||||
|
|
||||||
/*
|
/* Compute total space required for the core symbols' strtab. */
|
||||||
* info->strmap has a '1' bit for each byte of .strtab we want to
|
for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
|
||||||
* keep resident in mod->core_strtab. Everything else in .strtab
|
|
||||||
* is unreferenced by the symbols in mod->core_symtab, and will be
|
|
||||||
* discarded when add_kallsyms() compacts the string table.
|
|
||||||
*/
|
|
||||||
for (ndst = i = 1; i < nsrc; ++i, ++src)
|
|
||||||
if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
|
if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
|
||||||
unsigned int j = src->st_name;
|
strtab_size += strlen(&info->strtab[src->st_name]) + 1;
|
||||||
|
ndst++;
|
||||||
while (!__test_and_set_bit(j, info->strmap)
|
|
||||||
&& info->strtab[j])
|
|
||||||
++j;
|
|
||||||
++ndst;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Append room for core symbols at end of core part. */
|
/* Append room for core symbols at end of core part. */
|
||||||
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
|
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
|
||||||
mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
|
info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
|
||||||
|
mod->core_size += strtab_size;
|
||||||
|
|
||||||
/* Put string table section at end of init part of module. */
|
/* Put string table section at end of init part of module. */
|
||||||
strsect->sh_flags |= SHF_ALLOC;
|
strsect->sh_flags |= SHF_ALLOC;
|
||||||
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
|
strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
|
||||||
info->index.str) | INIT_OFFSET_MASK;
|
info->index.str) | INIT_OFFSET_MASK;
|
||||||
DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
|
DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
|
||||||
|
|
||||||
/* Append room for core symbols' strings at end of core part. */
|
|
||||||
info->stroffs = mod->core_size;
|
|
||||||
|
|
||||||
/* First strtab byte (and first symtab entry) are zeroes. */
|
|
||||||
__set_bit(0, info->strmap);
|
|
||||||
mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_kallsyms(struct module *mod, const struct load_info *info)
|
static void add_kallsyms(struct module *mod, const struct load_info *info)
|
||||||
@@ -2246,22 +2237,19 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
|
|||||||
mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
|
mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
|
||||||
|
|
||||||
mod->core_symtab = dst = mod->module_core + info->symoffs;
|
mod->core_symtab = dst = mod->module_core + info->symoffs;
|
||||||
|
mod->core_strtab = s = mod->module_core + info->stroffs;
|
||||||
src = mod->symtab;
|
src = mod->symtab;
|
||||||
*dst = *src;
|
*dst = *src;
|
||||||
|
*s++ = 0;
|
||||||
for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
|
for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
|
||||||
if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
|
if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
dst[ndst] = *src;
|
dst[ndst] = *src;
|
||||||
dst[ndst].st_name = bitmap_weight(info->strmap,
|
dst[ndst++].st_name = s - mod->core_strtab;
|
||||||
dst[ndst].st_name);
|
s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
|
||||||
++ndst;
|
|
||||||
}
|
}
|
||||||
mod->core_num_syms = ndst;
|
mod->core_num_syms = ndst;
|
||||||
|
|
||||||
mod->core_strtab = s = mod->module_core + info->stroffs;
|
|
||||||
for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i)
|
|
||||||
if (test_bit(i, info->strmap))
|
|
||||||
*++s = mod->strtab[i];
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void layout_symtab(struct module *mod, struct load_info *info)
|
static inline void layout_symtab(struct module *mod, struct load_info *info)
|
||||||
@@ -2751,27 +2739,18 @@ static struct module *layout_and_allocate(struct load_info *info)
|
|||||||
this is done generically; there doesn't appear to be any
|
this is done generically; there doesn't appear to be any
|
||||||
special cases for the architectures. */
|
special cases for the architectures. */
|
||||||
layout_sections(mod, info);
|
layout_sections(mod, info);
|
||||||
|
|
||||||
info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)
|
|
||||||
* sizeof(long), GFP_KERNEL);
|
|
||||||
if (!info->strmap) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto free_percpu;
|
|
||||||
}
|
|
||||||
layout_symtab(mod, info);
|
layout_symtab(mod, info);
|
||||||
|
|
||||||
/* Allocate and move to the final place */
|
/* Allocate and move to the final place */
|
||||||
err = move_module(mod, info);
|
err = move_module(mod, info);
|
||||||
if (err)
|
if (err)
|
||||||
goto free_strmap;
|
goto free_percpu;
|
||||||
|
|
||||||
/* Module has been copied to its final place now: return it. */
|
/* Module has been copied to its final place now: return it. */
|
||||||
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
|
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
|
||||||
kmemleak_load_module(mod, info);
|
kmemleak_load_module(mod, info);
|
||||||
return mod;
|
return mod;
|
||||||
|
|
||||||
free_strmap:
|
|
||||||
kfree(info->strmap);
|
|
||||||
free_percpu:
|
free_percpu:
|
||||||
percpu_modfree(mod);
|
percpu_modfree(mod);
|
||||||
out:
|
out:
|
||||||
@@ -2781,7 +2760,6 @@ out:
|
|||||||
/* mod is no longer valid after this! */
|
/* mod is no longer valid after this! */
|
||||||
static void module_deallocate(struct module *mod, struct load_info *info)
|
static void module_deallocate(struct module *mod, struct load_info *info)
|
||||||
{
|
{
|
||||||
kfree(info->strmap);
|
|
||||||
percpu_modfree(mod);
|
percpu_modfree(mod);
|
||||||
module_free(mod, mod->module_init);
|
module_free(mod, mod->module_init);
|
||||||
module_free(mod, mod->module_core);
|
module_free(mod, mod->module_core);
|
||||||
@@ -2911,8 +2889,7 @@ static struct module *load_module(void __user *umod,
|
|||||||
if (err < 0)
|
if (err < 0)
|
||||||
goto unlink;
|
goto unlink;
|
||||||
|
|
||||||
/* Get rid of temporary copy and strmap. */
|
/* Get rid of temporary copy. */
|
||||||
kfree(info.strmap);
|
|
||||||
free_copy(&info);
|
free_copy(&info);
|
||||||
|
|
||||||
/* Done! */
|
/* Done! */
|
||||||
|
Reference in New Issue
Block a user