2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/scripts/gendwarfksyms/die.c
Sami Tolvanen 10e9510a6d gendwarfksyms: Add a separate pass to resolve FQNs
Using dwarf_getscopes_die to resolve fully-qualified names turns out to
be rather slow, and also results in duplicate scopes being processed,
which doesn't help. Simply adding an extra pass to resolve names for all
DIEs before processing exports is noticeably faster.

For the object files with the most exports in a defconfig+Rust build,
the performance improvement is consistently >50%:

rust/bindings.o: 1038 exports
    before: 9.5980 +- 0.0183 seconds time elapsed  ( +-  0.19% )
     after: 4.3116 +- 0.0287 seconds time elapsed  ( +-  0.67% )

rust/core.o: 424 exports
    before: 5.3584 +- 0.0204 seconds time elapsed  ( +-  0.38% )
     after: 0.05348 +- 0.00129 seconds time elapsed  ( +-  2.42% )
            ^ Not a mistake.

net/core/dev.o: 190 exports
    before: 9.0507 +- 0.0297 seconds time elapsed  ( +-  0.33% )
     after: 3.2882 +- 0.0165 seconds time elapsed  ( +-  0.50% )

rust/kernel.o: 129 exports
    before: 6.8571 +- 0.0317 seconds time elapsed  ( +-  0.46% )
     after: 2.9096 +- 0.0316 seconds time elapsed  ( +-  1.09% )

net/core/skbuff.o: 120 exports
    before: 5.4805 +- 0.0291 seconds time elapsed  ( +-  0.53% )
     after: 2.0339 +- 0.0231 seconds time elapsed  ( +-  1.14% )

drivers/gpu/drm/display/drm_dp_helper.o: 101 exports
    before: 1.7877 +- 0.0187 seconds time elapsed  ( +-  1.05% )
     after: 0.69245 +- 0.00994 seconds time elapsed  ( +-  1.44% )

net/core/sock.o: 97 exports
    before: 5.8327 +- 0.0653 seconds time elapsed  ( +-  1.12% )
     after: 2.0784 +- 0.0291 seconds time elapsed  ( +-  1.40% )

drivers/net/phy/phy_device.o: 95 exports
    before: 3.0671 +- 0.0371 seconds time elapsed  ( +-  1.21% )
     after: 1.2127 +- 0.0207 seconds time elapsed  ( +-  1.70% )

drivers/pci/pci.o: 93 exports
    before: 1.1130 +- 0.0113 seconds time elapsed  ( +-  1.01% )
     after: 0.4848 +- 0.0127 seconds time elapsed  ( +-  2.63% )

kernel/sched/core.o: 83 exports
    before: 3.5092 +- 0.0223 seconds time elapsed  ( +-  0.64% )
     after: 1.1231 +- 0.0145 seconds time elapsed  ( +-  1.29% )

Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8%
faster with this patch applied on my test system. Without Rust, there's
still a 10.4% improvement in build time when gendwarfksyms is used.

Note that symbol versions are unchanged with this patch.

Suggested-by: Giuliano Procida <gprocida@google.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
2025-03-15 21:16:11 +09:00

167 lines
3.1 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2024 Google LLC
*/
#include <string.h>
#include "gendwarfksyms.h"
#define DIE_HASH_BITS 16
/* {die->addr, state} -> struct die * */
static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
static unsigned int map_hits;
static unsigned int map_misses;
static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
{
return hash_32(addr_hash(addr) ^ (unsigned int)state);
}
static void init_die(struct die *cd)
{
cd->state = DIE_INCOMPLETE;
cd->mapped = false;
cd->fqn = NULL;
cd->tag = -1;
cd->addr = 0;
INIT_LIST_HEAD(&cd->fragments);
}
static struct die *create_die(Dwarf_Die *die, enum die_state state)
{
struct die *cd;
cd = xmalloc(sizeof(struct die));
init_die(cd);
cd->addr = (uintptr_t)die->addr;
hash_add(die_map, &cd->hash, die_hash(cd->addr, state));
return cd;
}
int __die_map_get(uintptr_t addr, enum die_state state, struct die **res)
{
struct die *cd;
hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) {
if (cd->addr == addr && cd->state == state) {
*res = cd;
return 0;
}
}
return -1;
}
struct die *die_map_get(Dwarf_Die *die, enum die_state state)
{
struct die *cd;
if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) {
map_hits++;
return cd;
}
map_misses++;
return create_die(die, state);
}
static void reset_die(struct die *cd)
{
struct die_fragment *tmp;
struct die_fragment *df;
list_for_each_entry_safe(df, tmp, &cd->fragments, list) {
if (df->type == FRAGMENT_STRING)
free(df->data.str);
free(df);
}
if (cd->fqn && *cd->fqn)
free(cd->fqn);
init_die(cd);
}
void die_map_for_each(die_map_callback_t func, void *arg)
{
struct hlist_node *tmp;
struct die *cd;
hash_for_each_safe(die_map, cd, tmp, hash) {
func(cd, arg);
}
}
void die_map_free(void)
{
struct hlist_node *tmp;
unsigned int stats[DIE_LAST + 1];
struct die *cd;
int i;
memset(stats, 0, sizeof(stats));
hash_for_each_safe(die_map, cd, tmp, hash) {
stats[cd->state]++;
reset_die(cd);
free(cd);
}
hash_init(die_map);
if (map_hits + map_misses > 0)
debug("hits %u, misses %u (hit rate %.02f%%)", map_hits,
map_misses,
(100.0f * map_hits) / (map_hits + map_misses));
for (i = 0; i <= DIE_LAST; i++)
debug("%s: %u entries", die_state_name(i), stats[i]);
}
static struct die_fragment *append_item(struct die *cd)
{
struct die_fragment *df;
df = xmalloc(sizeof(struct die_fragment));
df->type = FRAGMENT_EMPTY;
list_add_tail(&df->list, &cd->fragments);
return df;
}
void die_map_add_string(struct die *cd, const char *str)
{
struct die_fragment *df;
if (!cd)
return;
df = append_item(cd);
df->data.str = xstrdup(str);
df->type = FRAGMENT_STRING;
}
void die_map_add_linebreak(struct die *cd, int linebreak)
{
struct die_fragment *df;
if (!cd)
return;
df = append_item(cd);
df->data.linebreak = linebreak;
df->type = FRAGMENT_LINEBREAK;
}
void die_map_add_die(struct die *cd, struct die *child)
{
struct die_fragment *df;
if (!cd)
return;
df = append_item(cd);
df->data.addr = child->addr;
df->type = FRAGMENT_DIE;
}