2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00
linux/drivers/net/ethernet/sfc/efx_channels.c
Ignat Korchagin 99ba0ea616 sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues
efx->xdp_tx_queue_count is initially initialized to num_possible_cpus() and is
later used to allocate and traverse efx->xdp_tx_queues lookup array. However,
we may end up not initializing all the array slots with real queues during
probing. This results, for example, in a NULL pointer dereference, when running
"# ethtool -S <iface>", similar to below

[2570283.664955][T4126959] BUG: kernel NULL pointer dereference, address: 00000000000000f8
[2570283.681283][T4126959] #PF: supervisor read access in kernel mode
[2570283.695678][T4126959] #PF: error_code(0x0000) - not-present page
[2570283.710013][T4126959] PGD 0 P4D 0
[2570283.721649][T4126959] Oops: 0000 [#1] SMP PTI
[2570283.734108][T4126959] CPU: 23 PID: 4126959 Comm: ethtool Tainted: G           O      5.10.20-cloudflare-2021.3.1 #1
[2570283.752641][T4126959] Hardware name: <redacted>
[2570283.781408][T4126959] RIP: 0010:efx_ethtool_get_stats+0x2ca/0x330 [sfc]
[2570283.796073][T4126959] Code: 00 85 c0 74 39 48 8b 95 a8 0f 00 00 48 85 d2 74 2d 31 c0 eb 07 48 8b 95 a8 0f 00 00 48 63 c8 49 83 c4 08 83 c0 01 48 8b 14 ca <48> 8b 92 f8 00 00 00 49 89 54 24 f8 39 85 a0 0f 00 00 77 d7 48 8b
[2570283.831259][T4126959] RSP: 0018:ffffb79a77657ce8 EFLAGS: 00010202
[2570283.845121][T4126959] RAX: 0000000000000019 RBX: ffffb799cd0c9280 RCX: 0000000000000018
[2570283.860872][T4126959] RDX: 0000000000000000 RSI: ffff96dd970ce000 RDI: 0000000000000005
[2570283.876525][T4126959] RBP: ffff96dd86f0a000 R08: ffff96dd970ce480 R09: 000000000000005f
[2570283.892014][T4126959] R10: ffffb799cd0c9fff R11: ffffb799cd0c9000 R12: ffffb799cd0c94f8
[2570283.907406][T4126959] R13: ffffffffc11b1090 R14: ffff96dd970ce000 R15: ffffffffc11cd66c
[2570283.922705][T4126959] FS:  00007fa7723f8740(0000) GS:ffff96f51fac0000(0000) knlGS:0000000000000000
[2570283.938848][T4126959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[2570283.952524][T4126959] CR2: 00000000000000f8 CR3: 0000001a73e6e006 CR4: 00000000007706e0
[2570283.967529][T4126959] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[2570283.982400][T4126959] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[2570283.997308][T4126959] PKRU: 55555554
[2570284.007649][T4126959] Call Trace:
[2570284.017598][T4126959]  dev_ethtool+0x1832/0x2830

Fix this by adjusting efx->xdp_tx_queue_count after probing to reflect the true
value of initialized slots in efx->xdp_tx_queues.

Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Fixes: e26ca4b535 ("sfc: reduce the number of requested xdp ev queues")
Cc: <stable@vger.kernel.org> # 5.12.x
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-04-27 15:38:29 -07:00

1284 lines
32 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/****************************************************************************
* Driver for Solarflare network controllers and boards
* Copyright 2018 Solarflare Communications Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation, incorporated herein by reference.
*/
#include "net_driver.h"
#include <linux/module.h>
#include "efx_channels.h"
#include "efx.h"
#include "efx_common.h"
#include "tx_common.h"
#include "rx_common.h"
#include "nic.h"
#include "sriov.h"
#include "workarounds.h"
/* This is the first interrupt mode to try out of:
* 0 => MSI-X
* 1 => MSI
* 2 => legacy
*/
unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
* i.e. the number of CPUs among which we may distribute simultaneous
* interrupt handling.
*
* Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
* The default (0) means to assign an interrupt to each core.
*/
unsigned int rss_cpus;
static unsigned int irq_adapt_low_thresh = 8000;
module_param(irq_adapt_low_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_low_thresh,
"Threshold score for reducing IRQ moderation");
static unsigned int irq_adapt_high_thresh = 16000;
module_param(irq_adapt_high_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_high_thresh,
"Threshold score for increasing IRQ moderation");
/* This is the weight assigned to each of the (per-channel) virtual
* NAPI devices.
*/
static int napi_weight = 64;
/***************
* Housekeeping
***************/
int efx_channel_dummy_op_int(struct efx_channel *channel)
{
return 0;
}
void efx_channel_dummy_op_void(struct efx_channel *channel)
{
}
static const struct efx_channel_type efx_default_channel_type = {
.pre_probe = efx_channel_dummy_op_int,
.post_remove = efx_channel_dummy_op_void,
.get_name = efx_get_channel_name,
.copy = efx_copy_channel,
.want_txqs = efx_default_channel_want_txqs,
.keep_eventq = false,
.want_pio = true,
};
/*************
* INTERRUPTS
*************/
static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
{
cpumask_var_t thread_mask;
unsigned int count;
int cpu;
if (rss_cpus) {
count = rss_cpus;
} else {
if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
netif_warn(efx, probe, efx->net_dev,
"RSS disabled due to allocation failure\n");
return 1;
}
count = 0;
for_each_online_cpu(cpu) {
if (!cpumask_test_cpu(cpu, thread_mask)) {
++count;
cpumask_or(thread_mask, thread_mask,
topology_sibling_cpumask(cpu));
}
}
free_cpumask_var(thread_mask);
}
if (count > EFX_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
"Reducing number of rx queues from %u to %u.\n",
count, EFX_MAX_RX_QUEUES);
count = EFX_MAX_RX_QUEUES;
}
/* If RSS is requested for the PF *and* VFs then we can't write RSS
* table entries that are inaccessible to VFs
*/
#ifdef CONFIG_SFC_SRIOV
if (efx->type->sriov_wanted) {
if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
count > efx_vf_size(efx)) {
netif_warn(efx, probe, efx->net_dev,
"Reducing number of RSS channels from %u to %u for "
"VF support. Increase vf-msix-limit to use more "
"channels on the PF.\n",
count, efx_vf_size(efx));
count = efx_vf_size(efx);
}
}
#endif
return count;
}
static int efx_allocate_msix_channels(struct efx_nic *efx,
unsigned int max_channels,
unsigned int extra_channels,
unsigned int parallelism)
{
unsigned int n_channels = parallelism;
int vec_count;
int tx_per_ev;
int n_xdp_tx;
int n_xdp_ev;
if (efx_separate_tx_channels)
n_channels *= 2;
n_channels += extra_channels;
/* To allow XDP transmit to happen from arbitrary NAPI contexts
* we allocate a TX queue per CPU. We share event queues across
* multiple tx queues, assuming tx and ev queues are both
* maximum size.
*/
tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
n_xdp_tx = num_possible_cpus();
n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
vec_count = pci_msix_vec_count(efx->pci_dev);
if (vec_count < 0)
return vec_count;
max_channels = min_t(unsigned int, vec_count, max_channels);
/* Check resources.
* We need a channel per event queue, plus a VI per tx queue.
* This may be more pessimistic than it needs to be.
*/
if (n_channels + n_xdp_ev > max_channels) {
netif_err(efx, drv, efx->net_dev,
"Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
n_xdp_ev, n_channels, max_channels);
efx->n_xdp_channels = 0;
efx->xdp_tx_per_channel = 0;
efx->xdp_tx_queue_count = 0;
} else if (n_channels + n_xdp_tx > efx->max_vis) {
netif_err(efx, drv, efx->net_dev,
"Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
n_xdp_tx, n_channels, efx->max_vis);
efx->n_xdp_channels = 0;
efx->xdp_tx_per_channel = 0;
efx->xdp_tx_queue_count = 0;
} else {
efx->n_xdp_channels = n_xdp_ev;
efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
efx->xdp_tx_queue_count = n_xdp_tx;
n_channels += n_xdp_ev;
netif_dbg(efx, drv, efx->net_dev,
"Allocating %d TX and %d event queues for XDP\n",
n_xdp_tx, n_xdp_ev);
}
if (vec_count < n_channels) {
netif_err(efx, drv, efx->net_dev,
"WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
vec_count, n_channels);
netif_err(efx, drv, efx->net_dev,
"WARNING: Performance may be reduced.\n");
n_channels = vec_count;
}
n_channels = min(n_channels, max_channels);
efx->n_channels = n_channels;
/* Ignore XDP tx channels when creating rx channels. */
n_channels -= efx->n_xdp_channels;
if (efx_separate_tx_channels) {
efx->n_tx_channels =
min(max(n_channels / 2, 1U),
efx->max_tx_channels);
efx->tx_channel_offset =
n_channels - efx->n_tx_channels;
efx->n_rx_channels =
max(n_channels -
efx->n_tx_channels, 1U);
} else {
efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
efx->tx_channel_offset = 0;
efx->n_rx_channels = n_channels;
}
efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
efx->xdp_channel_offset = n_channels;
netif_dbg(efx, drv, efx->net_dev,
"Allocating %u RX channels\n",
efx->n_rx_channels);
return efx->n_channels;
}
/* Probe the number and type of interrupts we are able to obtain, and
* the resulting numbers of channels and RX queues.
*/
int efx_probe_interrupts(struct efx_nic *efx)
{
unsigned int extra_channels = 0;
unsigned int rss_spread;
unsigned int i, j;
int rc;
for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
if (efx->extra_channel_type[i])
++extra_channels;
if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
unsigned int parallelism = efx_wanted_parallelism(efx);
struct msix_entry xentries[EFX_MAX_CHANNELS];
unsigned int n_channels;
rc = efx_allocate_msix_channels(efx, efx->max_channels,
extra_channels, parallelism);
if (rc >= 0) {
n_channels = rc;
for (i = 0; i < n_channels; i++)
xentries[i].entry = i;
rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
n_channels);
}
if (rc < 0) {
/* Fall back to single channel MSI */
netif_err(efx, drv, efx->net_dev,
"could not enable MSI-X\n");
if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
efx->interrupt_mode = EFX_INT_MODE_MSI;
else
return rc;
} else if (rc < n_channels) {
netif_err(efx, drv, efx->net_dev,
"WARNING: Insufficient MSI-X vectors"
" available (%d < %u).\n", rc, n_channels);
netif_err(efx, drv, efx->net_dev,
"WARNING: Performance may be reduced.\n");
n_channels = rc;
}
if (rc > 0) {
for (i = 0; i < efx->n_channels; i++)
efx_get_channel(efx, i)->irq =
xentries[i].vector;
}
}
/* Try single interrupt MSI */
if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
efx->n_channels = 1;
efx->n_rx_channels = 1;
efx->n_tx_channels = 1;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
rc = pci_enable_msi(efx->pci_dev);
if (rc == 0) {
efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
} else {
netif_err(efx, drv, efx->net_dev,
"could not enable MSI\n");
if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
efx->interrupt_mode = EFX_INT_MODE_LEGACY;
else
return rc;
}
}
/* Assume legacy interrupts */
if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
efx->n_rx_channels = 1;
efx->n_tx_channels = 1;
efx->n_xdp_channels = 0;
efx->xdp_channel_offset = efx->n_channels;
efx->legacy_irq = efx->pci_dev->irq;
}
/* Assign extra channels if possible, before XDP channels */
efx->n_extra_tx_channels = 0;
j = efx->xdp_channel_offset;
for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
if (!efx->extra_channel_type[i])
continue;
if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
efx->extra_channel_type[i]->handle_no_channel(efx);
} else {
--j;
efx_get_channel(efx, j)->type =
efx->extra_channel_type[i];
if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
efx->n_extra_tx_channels++;
}
}
rss_spread = efx->n_rx_channels;
/* RSS might be usable on VFs even if it is disabled on the PF */
#ifdef CONFIG_SFC_SRIOV
if (efx->type->sriov_wanted) {
efx->rss_spread = ((rss_spread > 1 ||
!efx->type->sriov_wanted(efx)) ?
rss_spread : efx_vf_size(efx));
return 0;
}
#endif
efx->rss_spread = rss_spread;
return 0;
}
#if defined(CONFIG_SMP)
void efx_set_interrupt_affinity(struct efx_nic *efx)
{
struct efx_channel *channel;
unsigned int cpu;
efx_for_each_channel(channel, efx) {
cpu = cpumask_local_spread(channel->channel,
pcibus_to_node(efx->pci_dev->bus));
irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
}
}
void efx_clear_interrupt_affinity(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
irq_set_affinity_hint(channel->irq, NULL);
}
#else
void
efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
{
}
void
efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
{
}
#endif /* CONFIG_SMP */
void efx_remove_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel;
/* Remove MSI/MSI-X interrupts */
efx_for_each_channel(channel, efx)
channel->irq = 0;
pci_disable_msi(efx->pci_dev);
pci_disable_msix(efx->pci_dev);
/* Remove legacy interrupt */
efx->legacy_irq = 0;
}
/***************
* EVENT QUEUES
***************/
/* Create event queue
* Event queue memory allocations are done only once. If the channel
* is reset, the memory buffer will be reused; this guards against
* errors during channel reset and also simplifies interrupt handling.
*/
int efx_probe_eventq(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
unsigned long entries;
netif_dbg(efx, probe, efx->net_dev,
"chan %d create event queue\n", channel->channel);
/* Build an event queue with room for one event per tx and rx buffer,
* plus some extra for link state events and MCDI completions.
*/
entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
return efx_nic_probe_eventq(channel);
}
/* Prepare channel's event queue */
int efx_init_eventq(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
int rc;
EFX_WARN_ON_PARANOID(channel->eventq_init);
netif_dbg(efx, drv, efx->net_dev,
"chan %d init event queue\n", channel->channel);
rc = efx_nic_init_eventq(channel);
if (rc == 0) {
efx->type->push_irq_moderation(channel);
channel->eventq_read_ptr = 0;
channel->eventq_init = true;
}
return rc;
}
/* Enable event queue processing and NAPI */
void efx_start_eventq(struct efx_channel *channel)
{
netif_dbg(channel->efx, ifup, channel->efx->net_dev,
"chan %d start event queue\n", channel->channel);
/* Make sure the NAPI handler sees the enabled flag set */
channel->enabled = true;
smp_wmb();
napi_enable(&channel->napi_str);
efx_nic_eventq_read_ack(channel);
}
/* Disable event queue processing and NAPI */
void efx_stop_eventq(struct efx_channel *channel)
{
if (!channel->enabled)
return;
napi_disable(&channel->napi_str);
channel->enabled = false;
}
void efx_fini_eventq(struct efx_channel *channel)
{
if (!channel->eventq_init)
return;
netif_dbg(channel->efx, drv, channel->efx->net_dev,
"chan %d fini event queue\n", channel->channel);
efx_nic_fini_eventq(channel);
channel->eventq_init = false;
}
void efx_remove_eventq(struct efx_channel *channel)
{
netif_dbg(channel->efx, drv, channel->efx->net_dev,
"chan %d remove event queue\n", channel->channel);
efx_nic_remove_eventq(channel);
}
/**************************************************************************
*
* Channel handling
*
*************************************************************************/
#ifdef CONFIG_RFS_ACCEL
static void efx_filter_rfs_expire(struct work_struct *data)
{
struct delayed_work *dwork = to_delayed_work(data);
struct efx_channel *channel;
unsigned int time, quota;
channel = container_of(dwork, struct efx_channel, filter_work);
time = jiffies - channel->rfs_last_expiry;
quota = channel->rfs_filter_count * time / (30 * HZ);
if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
channel->rfs_last_expiry += time;
/* Ensure we do more work eventually even if NAPI poll is not happening */
schedule_delayed_work(dwork, 30 * HZ);
}
#endif
/* Allocate and initialise a channel structure. */
static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
{
struct efx_rx_queue *rx_queue;
struct efx_tx_queue *tx_queue;
struct efx_channel *channel;
int j;
channel = kzalloc(sizeof(*channel), GFP_KERNEL);
if (!channel)
return NULL;
channel->efx = efx;
channel->channel = i;
channel->type = &efx_default_channel_type;
for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j];
tx_queue->efx = efx;
tx_queue->queue = -1;
tx_queue->label = j;
tx_queue->channel = channel;
}
#ifdef CONFIG_RFS_ACCEL
INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
#endif
rx_queue = &channel->rx_queue;
rx_queue->efx = efx;
timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
return channel;
}
int efx_init_channels(struct efx_nic *efx)
{
unsigned int i;
for (i = 0; i < EFX_MAX_CHANNELS; i++) {
efx->channel[i] = efx_alloc_channel(efx, i);
if (!efx->channel[i])
return -ENOMEM;
efx->msi_context[i].efx = efx;
efx->msi_context[i].index = i;
}
/* Higher numbered interrupt modes are less capable! */
efx->interrupt_mode = min(efx->type->min_interrupt_mode,
efx_interrupt_mode);
efx->max_channels = EFX_MAX_CHANNELS;
efx->max_tx_channels = EFX_MAX_CHANNELS;
return 0;
}
void efx_fini_channels(struct efx_nic *efx)
{
unsigned int i;
for (i = 0; i < EFX_MAX_CHANNELS; i++)
if (efx->channel[i]) {
kfree(efx->channel[i]);
efx->channel[i] = NULL;
}
}
/* Allocate and initialise a channel structure, copying parameters
* (but not resources) from an old channel structure.
*/
struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
{
struct efx_rx_queue *rx_queue;
struct efx_tx_queue *tx_queue;
struct efx_channel *channel;
int j;
channel = kmalloc(sizeof(*channel), GFP_KERNEL);
if (!channel)
return NULL;
*channel = *old_channel;
channel->napi_dev = NULL;
INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
channel->napi_str.napi_id = 0;
channel->napi_str.state = 0;
memset(&channel->eventq, 0, sizeof(channel->eventq));
for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
tx_queue = &channel->tx_queue[j];
if (tx_queue->channel)
tx_queue->channel = channel;
tx_queue->buffer = NULL;
tx_queue->cb_page = NULL;
memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
}
rx_queue = &channel->rx_queue;
rx_queue->buffer = NULL;
memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
#ifdef CONFIG_RFS_ACCEL
INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
#endif
return channel;
}
static int efx_probe_channel(struct efx_channel *channel)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
int rc;
netif_dbg(channel->efx, probe, channel->efx->net_dev,
"creating channel %d\n", channel->channel);
rc = channel->type->pre_probe(channel);
if (rc)
goto fail;
rc = efx_probe_eventq(channel);
if (rc)
goto fail;
efx_for_each_channel_tx_queue(tx_queue, channel) {
rc = efx_probe_tx_queue(tx_queue);
if (rc)
goto fail;
}
efx_for_each_channel_rx_queue(rx_queue, channel) {
rc = efx_probe_rx_queue(rx_queue);
if (rc)
goto fail;
}
channel->rx_list = NULL;
return 0;
fail:
efx_remove_channel(channel);
return rc;
}
void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
{
struct efx_nic *efx = channel->efx;
const char *type;
int number;
number = channel->channel;
if (number >= efx->xdp_channel_offset &&
!WARN_ON_ONCE(!efx->n_xdp_channels)) {
type = "-xdp";
number -= efx->xdp_channel_offset;
} else if (efx->tx_channel_offset == 0) {
type = "";
} else if (number < efx->tx_channel_offset) {
type = "-rx";
} else {
type = "-tx";
number -= efx->tx_channel_offset;
}
snprintf(buf, len, "%s%s-%d", efx->name, type, number);
}
void efx_set_channel_names(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
channel->type->get_name(channel,
efx->msi_context[channel->channel].name,
sizeof(efx->msi_context[0].name));
}
int efx_probe_channels(struct efx_nic *efx)
{
struct efx_channel *channel;
int rc;
/* Restart special buffer allocation */
efx->next_buffer_table = 0;
/* Probe channels in reverse, so that any 'extra' channels
* use the start of the buffer table. This allows the traffic
* channels to be resized without moving them or wasting the
* entries before them.
*/
efx_for_each_channel_rev(channel, efx) {
rc = efx_probe_channel(channel);
if (rc) {
netif_err(efx, probe, efx->net_dev,
"failed to create channel %d\n",
channel->channel);
goto fail;
}
}
efx_set_channel_names(efx);
return 0;
fail:
efx_remove_channels(efx);
return rc;
}
void efx_remove_channel(struct efx_channel *channel)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
netif_dbg(channel->efx, drv, channel->efx->net_dev,
"destroy chan %d\n", channel->channel);
efx_for_each_channel_rx_queue(rx_queue, channel)
efx_remove_rx_queue(rx_queue);
efx_for_each_channel_tx_queue(tx_queue, channel)
efx_remove_tx_queue(tx_queue);
efx_remove_eventq(channel);
channel->type->post_remove(channel);
}
void efx_remove_channels(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
efx_remove_channel(channel);
kfree(efx->xdp_tx_queues);
}
int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
{
struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
unsigned int i, next_buffer_table = 0;
u32 old_rxq_entries, old_txq_entries;
int rc, rc2;
rc = efx_check_disabled(efx);
if (rc)
return rc;
/* Not all channels should be reallocated. We must avoid
* reallocating their buffer table entries.
*/
efx_for_each_channel(channel, efx) {
struct efx_rx_queue *rx_queue;
struct efx_tx_queue *tx_queue;
if (channel->type->copy)
continue;
next_buffer_table = max(next_buffer_table,
channel->eventq.index +
channel->eventq.entries);
efx_for_each_channel_rx_queue(rx_queue, channel)
next_buffer_table = max(next_buffer_table,
rx_queue->rxd.index +
rx_queue->rxd.entries);
efx_for_each_channel_tx_queue(tx_queue, channel)
next_buffer_table = max(next_buffer_table,
tx_queue->txd.index +
tx_queue->txd.entries);
}
efx_device_detach_sync(efx);
efx_stop_all(efx);
efx_soft_disable_interrupts(efx);
/* Clone channels (where possible) */
memset(other_channel, 0, sizeof(other_channel));
for (i = 0; i < efx->n_channels; i++) {
channel = efx->channel[i];
if (channel->type->copy)
channel = channel->type->copy(channel);
if (!channel) {
rc = -ENOMEM;
goto out;
}
other_channel[i] = channel;
}
/* Swap entry counts and channel pointers */
old_rxq_entries = efx->rxq_entries;
old_txq_entries = efx->txq_entries;
efx->rxq_entries = rxq_entries;
efx->txq_entries = txq_entries;
for (i = 0; i < efx->n_channels; i++) {
channel = efx->channel[i];
efx->channel[i] = other_channel[i];
other_channel[i] = channel;
}
/* Restart buffer table allocation */
efx->next_buffer_table = next_buffer_table;
for (i = 0; i < efx->n_channels; i++) {
channel = efx->channel[i];
if (!channel->type->copy)
continue;
rc = efx_probe_channel(channel);
if (rc)
goto rollback;
efx_init_napi_channel(efx->channel[i]);
}
out:
/* Destroy unused channel structures */
for (i = 0; i < efx->n_channels; i++) {
channel = other_channel[i];
if (channel && channel->type->copy) {
efx_fini_napi_channel(channel);
efx_remove_channel(channel);
kfree(channel);
}
}
rc2 = efx_soft_enable_interrupts(efx);
if (rc2) {
rc = rc ? rc : rc2;
netif_err(efx, drv, efx->net_dev,
"unable to restart interrupts on channel reallocation\n");
efx_schedule_reset(efx, RESET_TYPE_DISABLE);
} else {
efx_start_all(efx);
efx_device_attach_if_not_resetting(efx);
}
return rc;
rollback:
/* Swap back */
efx->rxq_entries = old_rxq_entries;
efx->txq_entries = old_txq_entries;
for (i = 0; i < efx->n_channels; i++) {
channel = efx->channel[i];
efx->channel[i] = other_channel[i];
other_channel[i] = channel;
}
goto out;
}
int efx_set_channels(struct efx_nic *efx)
{
struct efx_tx_queue *tx_queue;
struct efx_channel *channel;
unsigned int next_queue = 0;
int xdp_queue_number;
int rc;
efx->tx_channel_offset =
efx_separate_tx_channels ?
efx->n_channels - efx->n_tx_channels : 0;
if (efx->xdp_tx_queue_count) {
EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
/* Allocate array for XDP TX queue lookup. */
efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
sizeof(*efx->xdp_tx_queues),
GFP_KERNEL);
if (!efx->xdp_tx_queues)
return -ENOMEM;
}
/* We need to mark which channels really have RX and TX
* queues, and adjust the TX queue numbers if we have separate
* RX-only and TX-only channels.
*/
xdp_queue_number = 0;
efx_for_each_channel(channel, efx) {
if (channel->channel < efx->n_rx_channels)
channel->rx_queue.core_index = channel->channel;
else
channel->rx_queue.core_index = -1;
if (channel->channel >= efx->tx_channel_offset) {
if (efx_channel_is_xdp_tx(channel)) {
efx_for_each_channel_tx_queue(tx_queue, channel) {
tx_queue->queue = next_queue++;
netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
channel->channel, tx_queue->label,
xdp_queue_number, tx_queue->queue);
/* We may have a few left-over XDP TX
* queues owing to xdp_tx_queue_count
* not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
* We still allocate and probe those
* TXQs, but never use them.
*/
if (xdp_queue_number < efx->xdp_tx_queue_count)
efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
xdp_queue_number++;
}
} else {
efx_for_each_channel_tx_queue(tx_queue, channel) {
tx_queue->queue = next_queue++;
netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
channel->channel, tx_queue->label,
tx_queue->queue);
}
}
}
}
if (xdp_queue_number)
efx->xdp_tx_queue_count = xdp_queue_number;
rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
if (rc)
return rc;
return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
}
bool efx_default_channel_want_txqs(struct efx_channel *channel)
{
return channel->channel - channel->efx->tx_channel_offset <
channel->efx->n_tx_channels;
}
/*************
* START/STOP
*************/
int efx_soft_enable_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel, *end_channel;
int rc;
BUG_ON(efx->state == STATE_DISABLED);
efx->irq_soft_enabled = true;
smp_wmb();
efx_for_each_channel(channel, efx) {
if (!channel->type->keep_eventq) {
rc = efx_init_eventq(channel);
if (rc)
goto fail;
}
efx_start_eventq(channel);
}
efx_mcdi_mode_event(efx);
return 0;
fail:
end_channel = channel;
efx_for_each_channel(channel, efx) {
if (channel == end_channel)
break;
efx_stop_eventq(channel);
if (!channel->type->keep_eventq)
efx_fini_eventq(channel);
}
return rc;
}
void efx_soft_disable_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel;
if (efx->state == STATE_DISABLED)
return;
efx_mcdi_mode_poll(efx);
efx->irq_soft_enabled = false;
smp_wmb();
if (efx->legacy_irq)
synchronize_irq(efx->legacy_irq);
efx_for_each_channel(channel, efx) {
if (channel->irq)
synchronize_irq(channel->irq);
efx_stop_eventq(channel);
if (!channel->type->keep_eventq)
efx_fini_eventq(channel);
}
/* Flush the asynchronous MCDI request queue */
efx_mcdi_flush_async(efx);
}
int efx_enable_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel, *end_channel;
int rc;
/* TODO: Is this really a bug? */
BUG_ON(efx->state == STATE_DISABLED);
if (efx->eeh_disabled_legacy_irq) {
enable_irq(efx->legacy_irq);
efx->eeh_disabled_legacy_irq = false;
}
efx->type->irq_enable_master(efx);
efx_for_each_channel(channel, efx) {
if (channel->type->keep_eventq) {
rc = efx_init_eventq(channel);
if (rc)
goto fail;
}
}
rc = efx_soft_enable_interrupts(efx);
if (rc)
goto fail;
return 0;
fail:
end_channel = channel;
efx_for_each_channel(channel, efx) {
if (channel == end_channel)
break;
if (channel->type->keep_eventq)
efx_fini_eventq(channel);
}
efx->type->irq_disable_non_ev(efx);
return rc;
}
void efx_disable_interrupts(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_soft_disable_interrupts(efx);
efx_for_each_channel(channel, efx) {
if (channel->type->keep_eventq)
efx_fini_eventq(channel);
}
efx->type->irq_disable_non_ev(efx);
}
void efx_start_channels(struct efx_nic *efx)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
struct efx_channel *channel;
efx_for_each_channel(channel, efx) {
efx_for_each_channel_tx_queue(tx_queue, channel) {
efx_init_tx_queue(tx_queue);
atomic_inc(&efx->active_queues);
}
efx_for_each_channel_rx_queue(rx_queue, channel) {
efx_init_rx_queue(rx_queue);
atomic_inc(&efx->active_queues);
efx_stop_eventq(channel);
efx_fast_push_rx_descriptors(rx_queue, false);
efx_start_eventq(channel);
}
WARN_ON(channel->rx_pkt_n_frags);
}
}
void efx_stop_channels(struct efx_nic *efx)
{
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
struct efx_channel *channel;
int rc = 0;
/* Stop RX refill */
efx_for_each_channel(channel, efx) {
efx_for_each_channel_rx_queue(rx_queue, channel)
rx_queue->refill_enabled = false;
}
efx_for_each_channel(channel, efx) {
/* RX packet processing is pipelined, so wait for the
* NAPI handler to complete. At least event queue 0
* might be kept active by non-data events, so don't
* use napi_synchronize() but actually disable NAPI
* temporarily.
*/
if (efx_channel_has_rx_queue(channel)) {
efx_stop_eventq(channel);
efx_start_eventq(channel);
}
}
if (efx->type->fini_dmaq)
rc = efx->type->fini_dmaq(efx);
if (rc) {
netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
} else {
netif_dbg(efx, drv, efx->net_dev,
"successfully flushed all queues\n");
}
efx_for_each_channel(channel, efx) {
efx_for_each_channel_rx_queue(rx_queue, channel)
efx_fini_rx_queue(rx_queue);
efx_for_each_channel_tx_queue(tx_queue, channel)
efx_fini_tx_queue(tx_queue);
}
}
/**************************************************************************
*
* NAPI interface
*
*************************************************************************/
/* Process channel's event queue
*
* This function is responsible for processing the event queue of a
* single channel. The caller must guarantee that this function will
* never be concurrently called more than once on the same channel,
* though different channels may be being processed concurrently.
*/
static int efx_process_channel(struct efx_channel *channel, int budget)
{
struct efx_tx_queue *tx_queue;
struct list_head rx_list;
int spent;
if (unlikely(!channel->enabled))
return 0;
/* Prepare the batch receive list */
EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
INIT_LIST_HEAD(&rx_list);
channel->rx_list = &rx_list;
efx_for_each_channel_tx_queue(tx_queue, channel) {
tx_queue->pkts_compl = 0;
tx_queue->bytes_compl = 0;
}
spent = efx_nic_process_eventq(channel, budget);
if (spent && efx_channel_has_rx_queue(channel)) {
struct efx_rx_queue *rx_queue =
efx_channel_get_rx_queue(channel);
efx_rx_flush_packet(channel);
efx_fast_push_rx_descriptors(rx_queue, true);
}
/* Update BQL */
efx_for_each_channel_tx_queue(tx_queue, channel) {
if (tx_queue->bytes_compl) {
netdev_tx_completed_queue(tx_queue->core_txq,
tx_queue->pkts_compl,
tx_queue->bytes_compl);
}
}
/* Receive any packets we queued up */
netif_receive_skb_list(channel->rx_list);
channel->rx_list = NULL;
return spent;
}
static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
{
int step = efx->irq_mod_step_us;
if (channel->irq_mod_score < irq_adapt_low_thresh) {
if (channel->irq_moderation_us > step) {
channel->irq_moderation_us -= step;
efx->type->push_irq_moderation(channel);
}
} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
if (channel->irq_moderation_us <
efx->irq_rx_moderation_us) {
channel->irq_moderation_us += step;
efx->type->push_irq_moderation(channel);
}
}
channel->irq_count = 0;
channel->irq_mod_score = 0;
}
/* NAPI poll handler
*
* NAPI guarantees serialisation of polls of the same device, which
* provides the guarantee required by efx_process_channel().
*/
static int efx_poll(struct napi_struct *napi, int budget)
{
struct efx_channel *channel =
container_of(napi, struct efx_channel, napi_str);
struct efx_nic *efx = channel->efx;
#ifdef CONFIG_RFS_ACCEL
unsigned int time;
#endif
int spent;
netif_vdbg(efx, intr, efx->net_dev,
"channel %d NAPI poll executing on CPU %d\n",
channel->channel, raw_smp_processor_id());
spent = efx_process_channel(channel, budget);
xdp_do_flush_map();
if (spent < budget) {
if (efx_channel_has_rx_queue(channel) &&
efx->irq_rx_adaptive &&
unlikely(++channel->irq_count == 1000)) {
efx_update_irq_mod(efx, channel);
}
#ifdef CONFIG_RFS_ACCEL
/* Perhaps expire some ARFS filters */
time = jiffies - channel->rfs_last_expiry;
/* Would our quota be >= 20? */
if (channel->rfs_filter_count * time >= 600 * HZ)
mod_delayed_work(system_wq, &channel->filter_work, 0);
#endif
/* There is no race here; although napi_disable() will
* only wait for napi_complete(), this isn't a problem
* since efx_nic_eventq_read_ack() will have no effect if
* interrupts have already been disabled.
*/
if (napi_complete_done(napi, spent))
efx_nic_eventq_read_ack(channel);
}
return spent;
}
void efx_init_napi_channel(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
channel->napi_dev = efx->net_dev;
netif_napi_add(channel->napi_dev, &channel->napi_str,
efx_poll, napi_weight);
}
void efx_init_napi(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
efx_init_napi_channel(channel);
}
void efx_fini_napi_channel(struct efx_channel *channel)
{
if (channel->napi_dev)
netif_napi_del(&channel->napi_str);
channel->napi_dev = NULL;
}
void efx_fini_napi(struct efx_nic *efx)
{
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
efx_fini_napi_channel(channel);
}