2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

irqchip/gic-v3-its: Refactor LPI allocator

Our current LPI allocator relies on a bitmap, each bit representing
a chunk of 32 LPIs, meaning that each device gets allocated LPIs
in multiple of 32. It served us well so far, but new use cases now
require much more finer grain allocations, down the the individual
LPI.

Given the size of the IntID space (up to 32bit), it isn't practical
to continue using a bitmap, so let's use a different data structure
altogether.

We switch to a list, where each element represent a contiguous range
of LPIs. On allocation, we simply grab the first group big enough to
satisfy the allocation, and substract what we need from it. If the
group becomes empty, we just remove it. On freeing interrupts, we
insert a new group of interrupt in the list, sort it and fuse the
adjacent groups.

This makes freeing interrupt much more expensive than allocating
them (an unusual behaviour), but that's fine as long as we consider
that freeing interrupts is an extremely rare event.

We still allocate interrupts in blocks of 32 for the time being,
but subsequent patches will relax this.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
This commit is contained in:
Marc Zyngier 2018-05-27 16:14:15 +01:00
parent 9d3cce1e8b
commit 880cb3cddd

View File

@ -23,6 +23,8 @@
#include <linux/dma-iommu.h> #include <linux/dma-iommu.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/irqdomain.h> #include <linux/irqdomain.h>
#include <linux/list.h>
#include <linux/list_sort.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/msi.h> #include <linux/msi.h>
@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = {
.irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity,
}; };
/* /*
* How we allocate LPIs: * How we allocate LPIs:
* *
* The GIC has id_bits bits for interrupt identifiers. From there, we * lpi_range_list contains ranges of LPIs that are to available to
* must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as * allocate from. To allocate LPIs, just pick the first range that
* we allocate LPIs by chunks of 32, we can shift the whole thing by 5 * fits the required allocation, and reduce it by the required
* bits to the right. * amount. Once empty, remove the range from the list.
* *
* This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. * To free a range of LPIs, add a free range to the list, sort it and
* merge the result if the new range happens to be adjacent to an
* already free block.
*
* The consequence of the above is that allocation is cost is low, but
* freeing is expensive. We assumes that freeing rarely occurs.
*/
/*
* Compatibility defines until we fully refactor the allocator
*/ */
#define IRQS_PER_CHUNK_SHIFT 5 #define IRQS_PER_CHUNK_SHIFT 5
#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) #define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT)
#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ #define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */
static unsigned long *lpi_bitmap; static DEFINE_MUTEX(lpi_range_lock);
static u32 lpi_chunks; static LIST_HEAD(lpi_range_list);
static DEFINE_SPINLOCK(lpi_lock);
static int its_lpi_to_chunk(int lpi) struct lpi_range {
struct list_head entry;
u32 base_id;
u32 span;
};
static struct lpi_range *mk_lpi_range(u32 base, u32 span)
{ {
return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; struct lpi_range *range;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (range) {
INIT_LIST_HEAD(&range->entry);
range->base_id = base;
range->span = span;
} }
static int its_chunk_to_lpi(int chunk) return range;
}
static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b)
{ {
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; struct lpi_range *ra, *rb;
ra = container_of(a, struct lpi_range, entry);
rb = container_of(b, struct lpi_range, entry);
return rb->base_id - ra->base_id;
}
static void merge_lpi_ranges(void)
{
struct lpi_range *range, *tmp;
list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
if (!list_is_last(&range->entry, &lpi_range_list) &&
(tmp->base_id == (range->base_id + range->span))) {
tmp->base_id = range->base_id;
tmp->span += range->span;
list_del(&range->entry);
kfree(range);
}
}
}
static int alloc_lpi_range(u32 nr_lpis, u32 *base)
{
struct lpi_range *range, *tmp;
int err = -ENOSPC;
mutex_lock(&lpi_range_lock);
list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
if (range->span >= nr_lpis) {
*base = range->base_id;
range->base_id += nr_lpis;
range->span -= nr_lpis;
if (range->span == 0) {
list_del(&range->entry);
kfree(range);
}
err = 0;
break;
}
}
mutex_unlock(&lpi_range_lock);
pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis);
return err;
}
static int free_lpi_range(u32 base, u32 nr_lpis)
{
struct lpi_range *new;
int err = 0;
mutex_lock(&lpi_range_lock);
new = mk_lpi_range(base, nr_lpis);
if (!new) {
err = -ENOMEM;
goto out;
}
list_add(&new->entry, &lpi_range_list);
list_sort(NULL, &lpi_range_list, lpi_range_cmp);
merge_lpi_ranges();
out:
mutex_unlock(&lpi_range_lock);
return err;
} }
static int __init its_lpi_init(u32 id_bits) static int __init its_lpi_init(u32 id_bits)
{ {
lpi_chunks = its_lpi_to_chunk(1UL << id_bits); u32 lpis = (1UL << id_bits) - 8192;
int err;
lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), /*
GFP_KERNEL); * Initializing the allocator is just the same as freeing the
if (!lpi_bitmap) { * full range of LPIs.
lpi_chunks = 0; */
return -ENOMEM; err = free_lpi_range(8192, lpis);
pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis);
return err;
} }
pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids)
return 0;
}
static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids)
{ {
unsigned long *bitmap = NULL; unsigned long *bitmap = NULL;
int chunk_id; int err = 0;
int nr_chunks; int nr_lpis;
int i;
nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK);
spin_lock(&lpi_lock);
do { do {
chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, err = alloc_lpi_range(nr_lpis, base);
0, nr_chunks, 0); if (!err)
if (chunk_id < lpi_chunks)
break; break;
nr_chunks--; nr_lpis -= IRQS_PER_CHUNK;
} while (nr_chunks > 0); } while (nr_lpis > 0);
if (!nr_chunks) if (err)
goto out; goto out;
bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC);
sizeof(long),
GFP_ATOMIC);
if (!bitmap) if (!bitmap)
goto out; goto out;
for (i = 0; i < nr_chunks; i++) *nr_ids = nr_lpis;
set_bit(chunk_id + i, lpi_bitmap);
*base = its_chunk_to_lpi(chunk_id);
*nr_ids = nr_chunks * IRQS_PER_CHUNK;
out: out:
spin_unlock(&lpi_lock);
if (!bitmap) if (!bitmap)
*base = *nr_ids = 0; *base = *nr_ids = 0;
return bitmap; return bitmap;
} }
static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids)
{ {
int lpi; WARN_ON(free_lpi_range(base, nr_ids));
spin_lock(&lpi_lock);
for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) {
int chunk = its_lpi_to_chunk(lpi);
BUG_ON(chunk > lpi_chunks);
if (test_bit(chunk, lpi_bitmap)) {
clear_bit(chunk, lpi_bitmap);
} else {
pr_err("Bad LPI chunk %d\n", chunk);
}
}
spin_unlock(&lpi_lock);
kfree(bitmap); kfree(bitmap);
} }