mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

Using dwarf_getscopes_die to resolve fully-qualified names turns out to be rather slow, and also results in duplicate scopes being processed, which doesn't help. Simply adding an extra pass to resolve names for all DIEs before processing exports is noticeably faster. For the object files with the most exports in a defconfig+Rust build, the performance improvement is consistently >50%: rust/bindings.o: 1038 exports before: 9.5980 +- 0.0183 seconds time elapsed ( +- 0.19% ) after: 4.3116 +- 0.0287 seconds time elapsed ( +- 0.67% ) rust/core.o: 424 exports before: 5.3584 +- 0.0204 seconds time elapsed ( +- 0.38% ) after: 0.05348 +- 0.00129 seconds time elapsed ( +- 2.42% ) ^ Not a mistake. net/core/dev.o: 190 exports before: 9.0507 +- 0.0297 seconds time elapsed ( +- 0.33% ) after: 3.2882 +- 0.0165 seconds time elapsed ( +- 0.50% ) rust/kernel.o: 129 exports before: 6.8571 +- 0.0317 seconds time elapsed ( +- 0.46% ) after: 2.9096 +- 0.0316 seconds time elapsed ( +- 1.09% ) net/core/skbuff.o: 120 exports before: 5.4805 +- 0.0291 seconds time elapsed ( +- 0.53% ) after: 2.0339 +- 0.0231 seconds time elapsed ( +- 1.14% ) drivers/gpu/drm/display/drm_dp_helper.o: 101 exports before: 1.7877 +- 0.0187 seconds time elapsed ( +- 1.05% ) after: 0.69245 +- 0.00994 seconds time elapsed ( +- 1.44% ) net/core/sock.o: 97 exports before: 5.8327 +- 0.0653 seconds time elapsed ( +- 1.12% ) after: 2.0784 +- 0.0291 seconds time elapsed ( +- 1.40% ) drivers/net/phy/phy_device.o: 95 exports before: 3.0671 +- 0.0371 seconds time elapsed ( +- 1.21% ) after: 1.2127 +- 0.0207 seconds time elapsed ( +- 1.70% ) drivers/pci/pci.o: 93 exports before: 1.1130 +- 0.0113 seconds time elapsed ( +- 1.01% ) after: 0.4848 +- 0.0127 seconds time elapsed ( +- 2.63% ) kernel/sched/core.o: 83 exports before: 3.5092 +- 0.0223 seconds time elapsed ( +- 0.64% ) after: 1.1231 +- 0.0145 seconds time elapsed ( +- 1.29% ) Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8% faster with this patch applied on my test system. Without Rust, there's still a 10.4% improvement in build time when gendwarfksyms is used. Note that symbol versions are unchanged with this patch. Suggested-by: Giuliano Procida <gprocida@google.com> Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
167 lines
3.1 KiB
C
167 lines
3.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2024 Google LLC
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include "gendwarfksyms.h"
|
|
|
|
#define DIE_HASH_BITS 16
|
|
|
|
/* {die->addr, state} -> struct die * */
|
|
static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
|
|
|
|
static unsigned int map_hits;
|
|
static unsigned int map_misses;
|
|
|
|
static inline unsigned int die_hash(uintptr_t addr, enum die_state state)
|
|
{
|
|
return hash_32(addr_hash(addr) ^ (unsigned int)state);
|
|
}
|
|
|
|
static void init_die(struct die *cd)
|
|
{
|
|
cd->state = DIE_INCOMPLETE;
|
|
cd->mapped = false;
|
|
cd->fqn = NULL;
|
|
cd->tag = -1;
|
|
cd->addr = 0;
|
|
INIT_LIST_HEAD(&cd->fragments);
|
|
}
|
|
|
|
static struct die *create_die(Dwarf_Die *die, enum die_state state)
|
|
{
|
|
struct die *cd;
|
|
|
|
cd = xmalloc(sizeof(struct die));
|
|
init_die(cd);
|
|
cd->addr = (uintptr_t)die->addr;
|
|
|
|
hash_add(die_map, &cd->hash, die_hash(cd->addr, state));
|
|
return cd;
|
|
}
|
|
|
|
int __die_map_get(uintptr_t addr, enum die_state state, struct die **res)
|
|
{
|
|
struct die *cd;
|
|
|
|
hash_for_each_possible(die_map, cd, hash, die_hash(addr, state)) {
|
|
if (cd->addr == addr && cd->state == state) {
|
|
*res = cd;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
struct die *die_map_get(Dwarf_Die *die, enum die_state state)
|
|
{
|
|
struct die *cd;
|
|
|
|
if (__die_map_get((uintptr_t)die->addr, state, &cd) == 0) {
|
|
map_hits++;
|
|
return cd;
|
|
}
|
|
|
|
map_misses++;
|
|
return create_die(die, state);
|
|
}
|
|
|
|
static void reset_die(struct die *cd)
|
|
{
|
|
struct die_fragment *tmp;
|
|
struct die_fragment *df;
|
|
|
|
list_for_each_entry_safe(df, tmp, &cd->fragments, list) {
|
|
if (df->type == FRAGMENT_STRING)
|
|
free(df->data.str);
|
|
free(df);
|
|
}
|
|
|
|
if (cd->fqn && *cd->fqn)
|
|
free(cd->fqn);
|
|
init_die(cd);
|
|
}
|
|
|
|
void die_map_for_each(die_map_callback_t func, void *arg)
|
|
{
|
|
struct hlist_node *tmp;
|
|
struct die *cd;
|
|
|
|
hash_for_each_safe(die_map, cd, tmp, hash) {
|
|
func(cd, arg);
|
|
}
|
|
}
|
|
|
|
void die_map_free(void)
|
|
{
|
|
struct hlist_node *tmp;
|
|
unsigned int stats[DIE_LAST + 1];
|
|
struct die *cd;
|
|
int i;
|
|
|
|
memset(stats, 0, sizeof(stats));
|
|
|
|
hash_for_each_safe(die_map, cd, tmp, hash) {
|
|
stats[cd->state]++;
|
|
reset_die(cd);
|
|
free(cd);
|
|
}
|
|
hash_init(die_map);
|
|
|
|
if (map_hits + map_misses > 0)
|
|
debug("hits %u, misses %u (hit rate %.02f%%)", map_hits,
|
|
map_misses,
|
|
(100.0f * map_hits) / (map_hits + map_misses));
|
|
|
|
for (i = 0; i <= DIE_LAST; i++)
|
|
debug("%s: %u entries", die_state_name(i), stats[i]);
|
|
}
|
|
|
|
static struct die_fragment *append_item(struct die *cd)
|
|
{
|
|
struct die_fragment *df;
|
|
|
|
df = xmalloc(sizeof(struct die_fragment));
|
|
df->type = FRAGMENT_EMPTY;
|
|
list_add_tail(&df->list, &cd->fragments);
|
|
return df;
|
|
}
|
|
|
|
void die_map_add_string(struct die *cd, const char *str)
|
|
{
|
|
struct die_fragment *df;
|
|
|
|
if (!cd)
|
|
return;
|
|
|
|
df = append_item(cd);
|
|
df->data.str = xstrdup(str);
|
|
df->type = FRAGMENT_STRING;
|
|
}
|
|
|
|
void die_map_add_linebreak(struct die *cd, int linebreak)
|
|
{
|
|
struct die_fragment *df;
|
|
|
|
if (!cd)
|
|
return;
|
|
|
|
df = append_item(cd);
|
|
df->data.linebreak = linebreak;
|
|
df->type = FRAGMENT_LINEBREAK;
|
|
}
|
|
|
|
void die_map_add_die(struct die *cd, struct die *child)
|
|
{
|
|
struct die_fragment *df;
|
|
|
|
if (!cd)
|
|
return;
|
|
|
|
df = append_item(cd);
|
|
df->data.addr = child->addr;
|
|
df->type = FRAGMENT_DIE;
|
|
}
|