Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
idpf: add XDP support

Alexander Lobakin says:

Add XDP support (w/o XSk for now) to the idpf driver using the libeth_xdp
sublib. All possible verdicts, .ndo_xdp_xmit(), multi-buffer etc. are here.
In general, nothing outstanding comparing to ice, except performance --
let's say, up to 2x for .ndo_xdp_xmit() on certain platforms and
scenarios.
idpf doesn't support VLAN Rx offload, so only the hash hint is
available for now.

Patches 1-7 are prereqs, without which XDP would either not work at all
or work slower/worse/...

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  idpf: add XDP RSS hash hint
  idpf: add support for .ndo_xdp_xmit()
  idpf: add support for XDP on Rx
  idpf: use generic functions to build xdp_buff and skb
  idpf: implement XDP_SETUP_PROG in ndo_bpf for splitq
  idpf: prepare structures to support XDP
  idpf: add support for nointerrupt queues
  idpf: remove SW marker handling from NAPI
  idpf: add 4-byte completion descriptor definition
  idpf: link NAPIs to queues
  idpf: use a saner limit for default number of queues to allocate
  idpf: fix Rx descriptor ready check barrier in splitq
  xdp, libeth: make the xdp_init_buff() micro-optimization generic
====================

Link: https://patch.msgid.link/20250908195748.1707057-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-09-09 18:44:07 -07:00
17 changed files with 1219 additions and 429 deletions

View File

@@ -6,7 +6,7 @@ config IDPF
depends on PCI_MSI
depends on PTP_1588_CLOCK_OPTIONAL
select DIMLIB
select LIBETH
select LIBETH_XDP
help
This driver supports Intel(R) Infrastructure Data Path Function
devices.

View File

@@ -21,3 +21,5 @@ idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o
idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_ptp.o
idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_virtchnl_ptp.o
idpf-y += xdp.o

View File

@@ -40,6 +40,7 @@ struct idpf_vport_max_q;
#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \
((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz))
#define IDPF_WAIT_FOR_MARKER_TIMEO 500
#define IDPF_MAX_WAIT 500
/* available message levels */
@@ -248,13 +249,10 @@ enum idpf_vport_reset_cause {
/**
* enum idpf_vport_flags - Vport flags
* @IDPF_VPORT_DEL_QUEUES: To send delete queues message
* @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets
* processing is done
* @IDPF_VPORT_FLAGS_NBITS: Must be last
*/
enum idpf_vport_flags {
IDPF_VPORT_DEL_QUEUES,
IDPF_VPORT_SW_MARKER,
IDPF_VPORT_FLAGS_NBITS,
};
@@ -289,6 +287,10 @@ struct idpf_fsteer_fltr {
* @txq_model: Split queue or single queue queuing model
* @txqs: Used only in hotpath to get to the right queue very fast
* @crc_enable: Enable CRC insertion offload
* @xdpsq_share: whether XDPSQ sharing is enabled
* @num_xdp_txq: number of XDPSQs
* @xdp_txq_offset: index of the first XDPSQ (== number of regular SQs)
* @xdp_prog: installed XDP program
* @num_rxq: Number of allocated RX queues
* @num_bufq: Number of allocated buffer queues
* @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors
@@ -314,13 +316,15 @@ struct idpf_fsteer_fltr {
* @num_q_vectors: Number of IRQ vectors allocated
* @q_vectors: Array of queue vectors
* @q_vector_idxs: Starting index of queue vectors
* @noirq_dyn_ctl: register to enable/disable the vector for NOIRQ queues
* @noirq_dyn_ctl_ena: value to write to the above to enable it
* @noirq_v_idx: ID of the NOIRQ vector
* @max_mtu: device given max possible MTU
* @default_mac_addr: device will give a default MAC to use
* @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation
* @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation
* @port_stats: per port csum, header split, and other offload stats
* @link_up: True if link is up
* @sw_marker_wq: workqueue for marker packets
* @tx_tstamp_caps: Capabilities negotiated for Tx timestamping
* @tstamp_config: The Tx tstamp config
* @tstamp_task: Tx timestamping task
@@ -337,6 +341,11 @@ struct idpf_vport {
struct idpf_tx_queue **txqs;
bool crc_enable;
bool xdpsq_share;
u16 num_xdp_txq;
u16 xdp_txq_offset;
struct bpf_prog *xdp_prog;
u16 num_rxq;
u16 num_bufq;
u32 rxq_desc_count;
@@ -361,6 +370,11 @@ struct idpf_vport {
u16 num_q_vectors;
struct idpf_q_vector *q_vectors;
u16 *q_vector_idxs;
void __iomem *noirq_dyn_ctl;
u32 noirq_dyn_ctl_ena;
u16 noirq_v_idx;
u16 max_mtu;
u8 default_mac_addr[ETH_ALEN];
u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS];
@@ -369,8 +383,6 @@ struct idpf_vport {
bool link_up;
wait_queue_head_t sw_marker_wq;
struct idpf_ptp_vport_tx_tstamp_caps *tx_tstamp_caps;
struct kernel_hwtstamp_config tstamp_config;
struct work_struct tstamp_task;
@@ -435,6 +447,7 @@ struct idpf_q_coalesce {
* ethtool
* @num_req_rxq_desc: Number of user requested RX queue descriptors through
* ethtool
* @xdp_prog: requested XDP program to install
* @user_flags: User toggled config flags
* @mac_filter_list: List of MAC filters
* @num_fsteer_fltrs: number of flow steering filters
@@ -449,6 +462,7 @@ struct idpf_vport_user_config_data {
u16 num_req_rx_qs;
u32 num_req_txq_desc;
u32 num_req_rxq_desc;
struct bpf_prog *xdp_prog;
DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS);
struct list_head mac_filter_list;
u32 num_fsteer_fltrs;
@@ -678,6 +692,11 @@ static inline int idpf_is_queue_model_split(u16 q_model)
q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
}
static inline bool idpf_xdp_enabled(const struct idpf_vport *vport)
{
return vport->adapter && vport->xdp_prog;
}
#define idpf_is_cap_ena(adapter, field, flag) \
idpf_is_capability_ena(adapter, false, field, flag)
#define idpf_is_cap_ena_all(adapter, field, flag) \

View File

@@ -77,7 +77,7 @@ static int idpf_intr_reg_init(struct idpf_vport *vport)
int num_vecs = vport->num_q_vectors;
struct idpf_vec_regs *reg_vals;
int num_regs, i, err = 0;
u32 rx_itr, tx_itr;
u32 rx_itr, tx_itr, val;
u16 total_vecs;
total_vecs = idpf_get_reserved_vecs(vport->adapter);
@@ -121,6 +121,15 @@ static int idpf_intr_reg_init(struct idpf_vport *vport)
intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
}
/* Data vector for NOIRQ queues */
val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg;
vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val);
val = PF_GLINT_DYN_CTL_WB_ON_ITR_M | PF_GLINT_DYN_CTL_INTENA_MSK_M |
FIELD_PREP(PF_GLINT_DYN_CTL_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX);
vport->noirq_dyn_ctl_ena = val;
free_reg_vals:
kfree(reg_vals);

View File

@@ -186,13 +186,17 @@ struct idpf_base_tx_desc {
__le64 qw1; /* type_cmd_offset_bsz_l2tag1 */
}; /* read used with buffer queues */
struct idpf_splitq_tx_compl_desc {
struct idpf_splitq_4b_tx_compl_desc {
/* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */
__le16 qid_comptype_gen;
union {
__le16 q_head; /* Queue head */
__le16 compl_tag; /* Completion tag */
} q_head_compl_tag;
}; /* writeback used with completion queues */
struct idpf_splitq_tx_compl_desc {
struct idpf_splitq_4b_tx_compl_desc common;
u8 ts[3];
u8 rsvd; /* Reserved */
}; /* writeback used with completion queues */

View File

@@ -4,6 +4,7 @@
#include "idpf.h"
#include "idpf_virtchnl.h"
#include "idpf_ptp.h"
#include "xdp.h"
static const struct net_device_ops idpf_netdev_ops;
@@ -835,6 +836,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
netdev->hw_features |= netdev->features | other_offloads;
netdev->vlan_features |= netdev->features | other_offloads;
netdev->hw_enc_features |= dflt_features | other_offloads;
idpf_xdp_set_features(vport);
idpf_set_ethtool_ops(netdev);
netif_set_affinity_auto(netdev);
SET_NETDEV_DEV(netdev, &adapter->pdev->dev);
@@ -884,14 +887,18 @@ static void idpf_remove_features(struct idpf_vport *vport)
/**
* idpf_vport_stop - Disable a vport
* @vport: vport to disable
* @rtnl: whether to take RTNL lock
*/
static void idpf_vport_stop(struct idpf_vport *vport)
static void idpf_vport_stop(struct idpf_vport *vport, bool rtnl)
{
struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
if (np->state <= __IDPF_VPORT_DOWN)
return;
if (rtnl)
rtnl_lock();
netif_carrier_off(vport->netdev);
netif_tx_disable(vport->netdev);
@@ -910,9 +917,13 @@ static void idpf_vport_stop(struct idpf_vport *vport)
vport->link_up = false;
idpf_vport_intr_deinit(vport);
idpf_xdp_rxq_info_deinit_all(vport);
idpf_vport_queues_rel(vport);
idpf_vport_intr_rel(vport);
np->state = __IDPF_VPORT_DOWN;
if (rtnl)
rtnl_unlock();
}
/**
@@ -936,7 +947,7 @@ static int idpf_stop(struct net_device *netdev)
idpf_vport_ctrl_lock(netdev);
vport = idpf_netdev_to_vport(netdev);
idpf_vport_stop(vport);
idpf_vport_stop(vport, false);
idpf_vport_ctrl_unlock(netdev);
@@ -1029,7 +1040,7 @@ static void idpf_vport_dealloc(struct idpf_vport *vport)
idpf_idc_deinit_vport_aux_device(vport->vdev_info);
idpf_deinit_mac_addr(vport);
idpf_vport_stop(vport);
idpf_vport_stop(vport, true);
if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags))
idpf_decfg_netdev(vport);
@@ -1135,7 +1146,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
if (!vport)
return vport;
num_max_q = max(max_q->max_txq, max_q->max_rxq);
num_max_q = max(max_q->max_txq, max_q->max_rxq) + IDPF_RESERVED_VECS;
if (!adapter->vport_config[idx]) {
struct idpf_vport_config *vport_config;
struct idpf_q_coalesce *q_coal;
@@ -1309,13 +1320,13 @@ static void idpf_restore_features(struct idpf_vport *vport)
*/
static int idpf_set_real_num_queues(struct idpf_vport *vport)
{
int err;
int err, txq = vport->num_txq - vport->num_xdp_txq;
err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq);
if (err)
return err;
return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq);
return netif_set_real_num_tx_queues(vport->netdev, txq);
}
/**
@@ -1370,8 +1381,9 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
/**
* idpf_vport_open - Bring up a vport
* @vport: vport to bring up
* @rtnl: whether to take RTNL lock
*/
static int idpf_vport_open(struct idpf_vport *vport)
static int idpf_vport_open(struct idpf_vport *vport, bool rtnl)
{
struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
struct idpf_adapter *adapter = vport->adapter;
@@ -1381,6 +1393,9 @@ static int idpf_vport_open(struct idpf_vport *vport)
if (np->state != __IDPF_VPORT_DOWN)
return -EBUSY;
if (rtnl)
rtnl_lock();
/* we do not allow interface up just yet */
netif_carrier_off(vport->netdev);
@@ -1388,7 +1403,7 @@ static int idpf_vport_open(struct idpf_vport *vport)
if (err) {
dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n",
vport->vport_id, err);
return err;
goto err_rtnl_unlock;
}
err = idpf_vport_queues_alloc(vport);
@@ -1424,20 +1439,29 @@ static int idpf_vport_open(struct idpf_vport *vport)
}
idpf_rx_init_buf_tail(vport);
err = idpf_xdp_rxq_info_init_all(vport);
if (err) {
netdev_err(vport->netdev,
"Failed to initialize XDP RxQ info for vport %u: %pe\n",
vport->vport_id, ERR_PTR(err));
goto intr_deinit;
}
idpf_vport_intr_ena(vport);
err = idpf_send_config_queues_msg(vport);
if (err) {
dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n",
vport->vport_id, err);
goto intr_deinit;
goto rxq_deinit;
}
err = idpf_send_map_unmap_queue_vector_msg(vport, true);
if (err) {
dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n",
vport->vport_id, err);
goto intr_deinit;
goto rxq_deinit;
}
err = idpf_send_enable_queues_msg(vport);
@@ -1475,6 +1499,9 @@ static int idpf_vport_open(struct idpf_vport *vport)
goto deinit_rss;
}
if (rtnl)
rtnl_unlock();
return 0;
deinit_rss:
@@ -1485,6 +1512,8 @@ disable_queues:
idpf_send_disable_queues_msg(vport);
unmap_queue_vectors:
idpf_send_map_unmap_queue_vector_msg(vport, false);
rxq_deinit:
idpf_xdp_rxq_info_deinit_all(vport);
intr_deinit:
idpf_vport_intr_deinit(vport);
queues_rel:
@@ -1492,6 +1521,10 @@ queues_rel:
intr_rel:
idpf_vport_intr_rel(vport);
err_rtnl_unlock:
if (rtnl)
rtnl_unlock();
return err;
}
@@ -1548,8 +1581,6 @@ void idpf_init_task(struct work_struct *work)
index = vport->idx;
vport_config = adapter->vport_config[index];
init_waitqueue_head(&vport->sw_marker_wq);
spin_lock_init(&vport_config->mac_filter_list_lock);
INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
@@ -1572,7 +1603,7 @@ void idpf_init_task(struct work_struct *work)
np = netdev_priv(vport->netdev);
np->state = __IDPF_VPORT_DOWN;
if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags))
idpf_vport_open(vport);
idpf_vport_open(vport, true);
/* Spawn and return 'idpf_init_task' work queue until all the
* default vports are created
@@ -1962,7 +1993,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
idpf_send_delete_queues_msg(vport);
} else {
set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags);
idpf_vport_stop(vport);
idpf_vport_stop(vport, false);
}
idpf_deinit_rss(vport);
@@ -1992,7 +2023,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
goto err_open;
if (current_state == __IDPF_VPORT_UP)
err = idpf_vport_open(vport);
err = idpf_vport_open(vport, false);
goto free_vport;
@@ -2002,7 +2033,7 @@ err_reset:
err_open:
if (current_state == __IDPF_VPORT_UP)
idpf_vport_open(vport);
idpf_vport_open(vport, false);
free_vport:
kfree(new_vport);
@@ -2240,7 +2271,7 @@ static int idpf_open(struct net_device *netdev)
if (err)
goto unlock;
err = idpf_vport_open(vport);
err = idpf_vport_open(vport, false);
unlock:
idpf_vport_ctrl_unlock(netdev);
@@ -2585,4 +2616,6 @@ static const struct net_device_ops idpf_netdev_ops = {
.ndo_tx_timeout = idpf_tx_timeout,
.ndo_hwtstamp_get = idpf_hwtstamp_get,
.ndo_hwtstamp_set = idpf_hwtstamp_set,
.ndo_bpf = idpf_xdp,
.ndo_xdp_xmit = idpf_xdp_xmit,
};

View File

@@ -9,6 +9,7 @@
MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS("LIBETH");
MODULE_IMPORT_NS("LIBETH_XDP");
MODULE_LICENSE("GPL");
/**

View File

@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2023 Intel Corporation */
#include <net/libeth/rx.h>
#include <net/libeth/tx.h>
#include <net/libeth/xdp.h>
#include "idpf.h"
@@ -655,7 +654,7 @@ static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq,
bool ipv4, ipv6;
/* check if Rx checksum is enabled */
if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded))
if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded))
return;
/* check if HW has decoded the packet and checksum */
@@ -794,7 +793,7 @@ static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
{
u64 mask, qw1;
if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded))
return;
mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
@@ -822,7 +821,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
const union virtchnl2_rx_desc *rx_desc,
struct libeth_rx_pt decoded)
{
if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded))
if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded))
return;
if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
@@ -834,7 +833,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
}
/**
* idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
* __idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx
* descriptor
* @rx_q: Rx ring being processed
* @skb: pointer to current skb being populated
@@ -846,17 +845,14 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
* other fields within the skb.
*/
static void
idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
struct sk_buff *skb,
const union virtchnl2_rx_desc *rx_desc,
u16 ptype)
__idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
struct sk_buff *skb,
const union virtchnl2_rx_desc *rx_desc,
u16 ptype)
{
struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype];
struct libeth_rx_csum csum_bits;
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_q->netdev);
/* Check if we're using base mode descriptor IDs */
if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded);
@@ -867,7 +863,6 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
}
idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded);
skb_record_rx_queue(skb, rx_q->idx);
}
/**
@@ -1003,6 +998,32 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
}
static bool
idpf_rx_singleq_process_skb_fields(struct sk_buff *skb,
const struct libeth_xdp_buff *xdp,
struct libeth_rq_napi_stats *rs)
{
struct libeth_rqe_info fields;
struct idpf_rx_queue *rxq;
rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq);
idpf_rx_singleq_extract_fields(rxq, xdp->desc, &fields);
__idpf_rx_singleq_process_skb_fields(rxq, skb, xdp->desc,
fields.ptype);
return true;
}
static void idpf_xdp_run_pass(struct libeth_xdp_buff *xdp,
struct napi_struct *napi,
struct libeth_rq_napi_stats *rs,
const union virtchnl2_rx_desc *desc)
{
libeth_xdp_run_pass(xdp, NULL, napi, rs, desc, NULL,
idpf_rx_singleq_process_skb_fields);
}
/**
* idpf_rx_singleq_clean - Reclaim resources after receive completes
* @rx_q: rx queue to clean
@@ -1012,14 +1033,15 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
*/
static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
struct sk_buff *skb = rx_q->skb;
struct libeth_rq_napi_stats rs = { };
u16 ntc = rx_q->next_to_clean;
LIBETH_XDP_ONSTACK_BUFF(xdp);
u16 cleaned_count = 0;
bool failure = false;
libeth_xdp_init_buff(xdp, &rx_q->xdp, &rx_q->xdp_rxq);
/* Process Rx packets bounded by budget */
while (likely(total_rx_pkts < (unsigned int)budget)) {
while (likely(rs.packets < budget)) {
struct libeth_rqe_info fields = { };
union virtchnl2_rx_desc *rx_desc;
struct idpf_rx_buf *rx_buf;
@@ -1046,73 +1068,41 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);
rx_buf = &rx_q->rx_buf[ntc];
if (!libeth_rx_sync_for_cpu(rx_buf, fields.len))
goto skip_data;
if (skb)
idpf_rx_add_frag(rx_buf, skb, fields.len);
else
skb = idpf_rx_build_skb(rx_buf, fields.len);
/* exit if we failed to retrieve a buffer */
if (!skb)
break;
skip_data:
libeth_xdp_process_buff(xdp, rx_buf, fields.len);
rx_buf->netmem = 0;
IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
cleaned_count++;
/* skip if it is non EOP desc */
if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb))
if (idpf_rx_singleq_is_non_eop(rx_desc) ||
unlikely(!xdp->data))
continue;
#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M)
if (unlikely(idpf_rx_singleq_test_staterr(rx_desc,
IDPF_RXD_ERR_S))) {
dev_kfree_skb_any(skb);
skb = NULL;
libeth_xdp_return_buff_slow(xdp);
continue;
}
/* pad skb if needed (to make valid ethernet frame) */
if (eth_skb_pad(skb)) {
skb = NULL;
continue;
}
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
/* protocol */
idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc,
fields.ptype);
/* send completed skb up the stack */
napi_gro_receive(rx_q->pp->p.napi, skb);
skb = NULL;
/* update budget accounting */
total_rx_pkts++;
idpf_xdp_run_pass(xdp, rx_q->pp->p.napi, &rs, rx_desc);
}
rx_q->skb = skb;
rx_q->next_to_clean = ntc;
libeth_xdp_save_buff(&rx_q->xdp, xdp);
page_pool_nid_changed(rx_q->pp, numa_mem_id());
if (cleaned_count)
failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);
u64_stats_update_begin(&rx_q->stats_sync);
u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
u64_stats_add(&rx_q->q_stats.packets, rs.packets);
u64_stats_add(&rx_q->q_stats.bytes, rs.bytes);
u64_stats_update_end(&rx_q->stats_sync);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
return rs.packets;
}
/**

View File

@@ -1,12 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2023 Intel Corporation */
#include <net/libeth/rx.h>
#include <net/libeth/tx.h>
#include "idpf.h"
#include "idpf_ptp.h"
#include "idpf_virtchnl.h"
#include "xdp.h"
#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv)
LIBETH_SQE_CHECK_PRIV(u32);
@@ -62,8 +60,10 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
{
struct libeth_sq_napi_stats ss = { };
struct xdp_frame_bulk bq;
struct libeth_cq_pp cp = {
.dev = txq->dev,
.bq = &bq,
.ss = &ss,
};
u32 i;
@@ -72,9 +72,13 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
if (!txq->tx_buf)
return;
xdp_frame_bulk_init(&bq);
/* Free all the Tx buffer sk_buffs */
for (i = 0; i < txq->buf_pool_size; i++)
libeth_tx_complete(&txq->tx_buf[i], &cp);
libeth_tx_complete_any(&txq->tx_buf[i], &cp);
xdp_flush_frame_bulk(&bq);
kfree(txq->tx_buf);
txq->tx_buf = NULL;
@@ -88,13 +92,20 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq)
*/
static void idpf_tx_desc_rel(struct idpf_tx_queue *txq)
{
bool xdp = idpf_queue_has(XDP, txq);
if (xdp)
libeth_xdpsq_deinit_timer(txq->timer);
idpf_tx_buf_rel_all(txq);
netdev_tx_reset_subqueue(txq->netdev, txq->idx);
if (!xdp)
netdev_tx_reset_subqueue(txq->netdev, txq->idx);
if (!txq->desc_ring)
return;
if (txq->refillq)
if (!xdp && txq->refillq)
kfree(txq->refillq->ring);
dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma);
@@ -115,8 +126,8 @@ static void idpf_compl_desc_rel(struct idpf_compl_queue *complq)
return;
dma_free_coherent(complq->netdev->dev.parent, complq->size,
complq->comp, complq->dma);
complq->comp = NULL;
complq->desc_ring, complq->dma);
complq->desc_ring = NULL;
complq->next_to_use = 0;
complq->next_to_clean = 0;
}
@@ -246,12 +257,16 @@ err_alloc:
static int idpf_compl_desc_alloc(const struct idpf_vport *vport,
struct idpf_compl_queue *complq)
{
complq->size = array_size(complq->desc_count, sizeof(*complq->comp));
u32 desc_size;
complq->comp = dma_alloc_coherent(complq->netdev->dev.parent,
complq->size, &complq->dma,
GFP_KERNEL);
if (!complq->comp)
desc_size = idpf_queue_has(FLOW_SCH_EN, complq) ?
sizeof(*complq->comp) : sizeof(*complq->comp_4b);
complq->size = array_size(complq->desc_count, desc_size);
complq->desc_ring = dma_alloc_coherent(complq->netdev->dev.parent,
complq->size, &complq->dma,
GFP_KERNEL);
if (!complq->desc_ring)
return -ENOMEM;
complq->next_to_use = 0;
@@ -405,10 +420,7 @@ static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev,
if (!rxq)
return;
if (rxq->skb) {
dev_kfree_skb_any(rxq->skb);
rxq->skb = NULL;
}
libeth_xdp_return_stash(&rxq->xdp);
if (!idpf_is_queue_model_split(model))
idpf_rx_buf_rel_all(rxq);
@@ -517,6 +529,7 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq)
struct libeth_fq fq = {
.count = bufq->desc_count,
.type = LIBETH_FQE_HDR,
.xdp = idpf_xdp_enabled(bufq->q_vector->vport),
.nid = idpf_q_vector_to_mem(bufq->q_vector),
};
int ret;
@@ -716,6 +729,7 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq,
.count = bufq->desc_count,
.type = type,
.hsplit = idpf_queue_has(HSPLIT_EN, bufq),
.xdp = idpf_xdp_enabled(bufq->q_vector->vport),
.nid = idpf_q_vector_to_mem(bufq->q_vector),
};
int ret;
@@ -743,6 +757,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport)
bool split = idpf_is_queue_model_split(vport->rxq_model);
int i, j, err;
idpf_xdp_copy_prog_to_rqs(vport, vport->xdp_prog);
for (i = 0; i < vport->num_rxq_grp; i++) {
struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
u32 truesize = 0;
@@ -1021,8 +1037,12 @@ static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport)
*/
void idpf_vport_queues_rel(struct idpf_vport *vport)
{
idpf_xdp_copy_prog_to_rqs(vport, NULL);
idpf_tx_desc_rel_all(vport);
idpf_rx_desc_rel_all(vport);
idpf_xdpsqs_put(vport);
idpf_vport_queue_grp_rel_all(vport);
kfree(vport->txqs);
@@ -1096,6 +1116,18 @@ void idpf_vport_init_num_qs(struct idpf_vport *vport,
if (idpf_is_queue_model_split(vport->rxq_model))
vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq);
vport->xdp_prog = config_data->xdp_prog;
if (idpf_xdp_enabled(vport)) {
vport->xdp_txq_offset = config_data->num_req_tx_qs;
vport->num_xdp_txq = le16_to_cpu(vport_msg->num_tx_q) -
vport->xdp_txq_offset;
vport->xdpsq_share = libeth_xdpsq_shared(vport->num_xdp_txq);
} else {
vport->xdp_txq_offset = 0;
vport->num_xdp_txq = 0;
vport->xdpsq_share = false;
}
/* Adjust number of buffer queues per Rx queue group. */
if (!idpf_is_queue_model_split(vport->rxq_model)) {
vport->num_bufqs_per_qgrp = 0;
@@ -1167,22 +1199,17 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx,
int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0;
int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0;
u16 num_req_tx_qs = 0, num_req_rx_qs = 0;
struct idpf_vport_user_config_data *user;
struct idpf_vport_config *vport_config;
u16 num_txq_grps, num_rxq_grps;
u32 num_qs;
u32 num_qs, num_xdpsq;
vport_config = adapter->vport_config[vport_idx];
if (vport_config) {
num_req_tx_qs = vport_config->user_config.num_req_tx_qs;
num_req_rx_qs = vport_config->user_config.num_req_rx_qs;
} else {
int num_cpus;
/* Restrict num of queues to cpus online as a default
* configuration to give best performance. User can always
* override to a max number of queues via ethtool.
*/
num_cpus = num_online_cpus();
u32 num_cpus = netif_get_num_default_rss_queues();
dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus);
dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus);
@@ -1217,6 +1244,24 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx,
vport_msg->num_rx_bufq = 0;
}
if (!vport_config)
return 0;
user = &vport_config->user_config;
user->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q);
user->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q);
if (vport_config->user_config.xdp_prog)
num_xdpsq = libeth_xdpsq_num(user->num_req_rx_qs,
user->num_req_tx_qs,
vport_config->max_q.max_txq);
else
num_xdpsq = 0;
vport_msg->num_tx_q = cpu_to_le16(user->num_req_tx_qs + num_xdpsq);
if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model)))
vport_msg->num_tx_complq = vport_msg->num_tx_q;
return 0;
}
@@ -1266,14 +1311,13 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport,
static void idpf_rxq_set_descids(const struct idpf_vport *vport,
struct idpf_rx_queue *q)
{
if (idpf_is_queue_model_split(vport->rxq_model)) {
q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
} else {
if (vport->base_rxd)
q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M;
else
q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
}
if (idpf_is_queue_model_split(vport->rxq_model))
return;
if (vport->base_rxd)
q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M;
else
q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
}
/**
@@ -1481,7 +1525,6 @@ skip_splitq_rx_init:
setup_rxq:
q->desc_count = vport->rxq_desc_count;
q->rx_ptype_lkup = vport->rx_ptype_lkup;
q->netdev = vport->netdev;
q->bufq_sets = rx_qgrp->splitq.bufq_sets;
q->idx = (i * num_rxq) + j;
q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK;
@@ -1542,6 +1585,14 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport)
if (err)
goto err_out;
err = idpf_vport_init_fast_path_txqs(vport);
if (err)
goto err_out;
err = idpf_xdpsqs_get(vport);
if (err)
goto err_out;
err = idpf_tx_desc_alloc_all(vport);
if (err)
goto err_out;
@@ -1550,10 +1601,6 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport)
if (err)
goto err_out;
err = idpf_vport_init_fast_path_txqs(vport);
if (err)
goto err_out;
return 0;
err_out:
@@ -1562,32 +1609,6 @@ err_out:
return err;
}
/**
* idpf_tx_handle_sw_marker - Handle queue marker packet
* @tx_q: tx queue to handle software marker
*/
static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q)
{
struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev);
struct idpf_vport *vport = priv->vport;
int i;
idpf_queue_clear(SW_MARKER, tx_q);
/* Hardware must write marker packets to all queues associated with
* completion queues. So check if all queues received marker packets
*/
for (i = 0; i < vport->num_txq; i++)
/* If we're still waiting on any other TXQ marker completions,
* just return now since we cannot wake up the marker_wq yet.
*/
if (idpf_queue_has(SW_MARKER, vport->txqs[i]))
return;
/* Drain complete */
set_bit(IDPF_VPORT_SW_MARKER, vport->flags);
wake_up(&vport->sw_marker_wq);
}
/**
* idpf_tx_read_tstamp - schedule a work to read Tx timestamp value
* @txq: queue to read the timestamp from
@@ -1765,7 +1786,7 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq,
/* RS completion contains queue head for queue based scheduling or
* completion tag for flow based scheduling.
*/
u16 rs_compl_val = le16_to_cpu(desc->q_head_compl_tag.q_head);
u16 rs_compl_val = le16_to_cpu(desc->common.q_head_compl_tag.q_head);
if (!idpf_queue_has(FLOW_SCH_EN, txq)) {
idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false);
@@ -1800,19 +1821,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
do {
struct libeth_sq_napi_stats cleaned_stats = { };
struct idpf_tx_queue *tx_q;
__le16 hw_head;
int rel_tx_qid;
u16 hw_head;
u8 ctype; /* completion type */
u16 gen;
/* if the descriptor isn't done, no work yet to do */
gen = le16_get_bits(tx_desc->qid_comptype_gen,
gen = le16_get_bits(tx_desc->common.qid_comptype_gen,
IDPF_TXD_COMPLQ_GEN_M);
if (idpf_queue_has(GEN_CHK, complq) != gen)
break;
/* Find necessary info of TX queue to clean buffers */
rel_tx_qid = le16_get_bits(tx_desc->qid_comptype_gen,
rel_tx_qid = le16_get_bits(tx_desc->common.qid_comptype_gen,
IDPF_TXD_COMPLQ_QID_M);
if (rel_tx_qid >= complq->txq_grp->num_txq ||
!complq->txq_grp->txqs[rel_tx_qid]) {
@@ -1822,22 +1843,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
tx_q = complq->txq_grp->txqs[rel_tx_qid];
/* Determine completion type */
ctype = le16_get_bits(tx_desc->qid_comptype_gen,
ctype = le16_get_bits(tx_desc->common.qid_comptype_gen,
IDPF_TXD_COMPLQ_COMPL_TYPE_M);
switch (ctype) {
case IDPF_TXD_COMPLT_RE:
hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head);
hw_head = tx_desc->common.q_head_compl_tag.q_head;
idpf_tx_splitq_clean(tx_q, hw_head, budget,
&cleaned_stats, true);
idpf_tx_splitq_clean(tx_q, le16_to_cpu(hw_head),
budget, &cleaned_stats, true);
break;
case IDPF_TXD_COMPLT_RS:
idpf_tx_handle_rs_completion(tx_q, tx_desc,
&cleaned_stats, budget);
break;
case IDPF_TXD_COMPLT_SW_MARKER:
idpf_tx_handle_sw_marker(tx_q);
break;
default:
netdev_err(tx_q->netdev,
"Unknown TX completion type: %d\n", ctype);
@@ -1909,6 +1927,69 @@ fetch_next_desc:
return !!complq_budget;
}
/**
* idpf_wait_for_sw_marker_completion - wait for SW marker of disabled Tx queue
* @txq: disabled Tx queue
*
* When Tx queue is requested for disabling, the CP sends a special completion
* descriptor called "SW marker", meaning the queue is ready to be destroyed.
* If, for some reason, the marker is not received within 500 ms, break the
* polling to not hang the driver.
*/
void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq)
{
struct idpf_compl_queue *complq;
unsigned long timeout;
bool flow, gen_flag;
u32 ntc;
if (!idpf_queue_has(SW_MARKER, txq))
return;
complq = idpf_queue_has(XDP, txq) ? txq->complq : txq->txq_grp->complq;
ntc = complq->next_to_clean;
flow = idpf_queue_has(FLOW_SCH_EN, complq);
gen_flag = idpf_queue_has(GEN_CHK, complq);
timeout = jiffies + msecs_to_jiffies(IDPF_WAIT_FOR_MARKER_TIMEO);
do {
struct idpf_splitq_4b_tx_compl_desc *tx_desc;
struct idpf_tx_queue *target;
u32 ctype_gen, id;
tx_desc = flow ? &complq->comp[ntc].common :
&complq->comp_4b[ntc];
ctype_gen = le16_to_cpu(tx_desc->qid_comptype_gen);
if (!!(ctype_gen & IDPF_TXD_COMPLQ_GEN_M) != gen_flag) {
usleep_range(500, 1000);
continue;
}
if (FIELD_GET(IDPF_TXD_COMPLQ_COMPL_TYPE_M, ctype_gen) !=
IDPF_TXD_COMPLT_SW_MARKER)
goto next;
id = FIELD_GET(IDPF_TXD_COMPLQ_QID_M, ctype_gen);
target = complq->txq_grp->txqs[id];
idpf_queue_clear(SW_MARKER, target);
if (target == txq)
break;
next:
if (unlikely(++ntc == complq->desc_count)) {
ntc = 0;
gen_flag = !gen_flag;
}
} while (time_before(jiffies, timeout));
idpf_queue_assign(GEN_CHK, complq, gen_flag);
complq->next_to_clean = ntc;
}
/**
* idpf_tx_splitq_build_ctb - populate command tag and size for queue
* based scheduling descriptors
@@ -2673,10 +2754,11 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
*/
netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev)
{
struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
const struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
struct idpf_tx_queue *tx_q;
if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) {
if (unlikely(skb_get_queue_mapping(skb) >=
vport->num_txq - vport->num_xdp_txq)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
@@ -2713,7 +2795,7 @@ idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb,
{
u32 hash;
if (!libeth_rx_pt_has_hash(rxq->netdev, decoded))
if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, decoded))
return;
hash = le16_to_cpu(rx_desc->hash1) |
@@ -2739,7 +2821,7 @@ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
bool ipv4, ipv6;
/* check if Rx checksum is enabled */
if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded))
if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded))
return;
/* check if HW has decoded the packet and checksum */
@@ -2911,7 +2993,7 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq,
}
/**
* idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor
* __idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor
* @rxq: Rx descriptor ring packet is being transacted on
* @skb: pointer to current skb being populated
* @rx_desc: Receive descriptor
@@ -2921,8 +3003,8 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq,
* other fields within the skb.
*/
static int
idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
__idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc)
{
struct libeth_rx_csum csum_bits;
struct libeth_rx_pt decoded;
@@ -2938,9 +3020,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
if (idpf_queue_has(PTP, rxq))
idpf_rx_hwtstamp(rxq, rx_desc, skb);
skb->protocol = eth_type_trans(skb, rxq->netdev);
skb_record_rx_queue(skb, rxq->idx);
if (le16_get_bits(rx_desc->hdrlen_flags,
VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
return idpf_rx_rsc(rxq, skb, rx_desc, decoded);
@@ -2951,25 +3030,24 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb,
return 0;
}
/**
* idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag
* @rx_buf: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
*
* This function will add the data contained in rx_buf->page to the skb.
* It will just attach the page as a frag to the skb.
* The function will then update the page offset.
*/
void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size)
static bool idpf_rx_process_skb_fields(struct sk_buff *skb,
const struct libeth_xdp_buff *xdp,
struct libeth_rq_napi_stats *rs)
{
u32 hr = netmem_get_pp(rx_buf->netmem)->p.offset;
struct idpf_rx_queue *rxq;
skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, rx_buf->netmem,
rx_buf->offset + hr, size, rx_buf->truesize);
rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq);
return !__idpf_rx_process_skb_fields(rxq, skb, xdp->desc);
}
LIBETH_XDP_DEFINE_START();
LIBETH_XDP_DEFINE_RUN(static idpf_xdp_run_pass, idpf_xdp_run_prog,
idpf_xdp_tx_flush_bulk, idpf_rx_process_skb_fields);
LIBETH_XDP_DEFINE_FINALIZE(static idpf_xdp_finalize_rx, idpf_xdp_tx_flush_bulk,
idpf_xdp_tx_finalize);
LIBETH_XDP_DEFINE_END();
/**
* idpf_rx_hsplit_wa - handle header buffer overflows and split errors
* @hdr: Rx buffer for the headers
@@ -3011,36 +3089,6 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr,
return copy;
}
/**
* idpf_rx_build_skb - Allocate skb and populate it from header buffer
* @buf: Rx buffer to pull data from
* @size: the length of the packet
*
* This function allocates an skb. It then populates it with the page data from
* the current receive descriptor, taking care to set up the skb correctly.
*/
struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size)
{
struct page *buf_page = __netmem_to_page(buf->netmem);
u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset;
struct sk_buff *skb;
void *va;
va = page_address(buf_page) + buf->offset;
prefetch(va + hr);
skb = napi_build_skb(va, buf->truesize);
if (unlikely(!skb))
return NULL;
skb_mark_for_recycle(skb);
skb_reserve(skb, hr);
__skb_put(skb, size);
return skb;
}
/**
* idpf_rx_splitq_test_staterr - tests bits in Rx descriptor
* status and error fields
@@ -3082,13 +3130,18 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de
*/
static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget)
{
int total_rx_bytes = 0, total_rx_pkts = 0;
struct idpf_buf_queue *rx_bufq = NULL;
struct sk_buff *skb = rxq->skb;
struct libeth_rq_napi_stats rs = { };
u16 ntc = rxq->next_to_clean;
LIBETH_XDP_ONSTACK_BUFF(xdp);
LIBETH_XDP_ONSTACK_BULK(bq);
libeth_xdp_tx_init_bulk(&bq, rxq->xdp_prog, rxq->xdp_rxq.dev,
rxq->xdpsqs, rxq->num_xdp_txq);
libeth_xdp_init_buff(xdp, &rxq->xdp, &rxq->xdp_rxq);
/* Process Rx packets bounded by budget */
while (likely(total_rx_pkts < budget)) {
while (likely(rs.packets < budget)) {
struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc;
struct libeth_fqe *hdr, *rx_buf = NULL;
struct idpf_sw_queue *refillq = NULL;
@@ -3102,18 +3155,14 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget)
/* get the Rx desc from Rx queue based on 'next_to_clean' */
rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc
*/
dma_rmb();
/* if the descriptor isn't done, no work yet to do */
gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id,
VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M);
if (idpf_queue_has(GEN_CHK, rxq) != gen_id)
break;
dma_rmb();
rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M,
rx_desc->rxdid_ucast);
if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) {
@@ -3158,7 +3207,7 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget)
hdr = &rx_bufq->hdr_buf[buf_id];
if (unlikely(!hdr_len && !skb)) {
if (unlikely(!hdr_len && !xdp->data)) {
hdr_len = idpf_rx_hsplit_wa(hdr, rx_buf, pkt_len);
/* If failed, drop both buffers by setting len to 0 */
pkt_len -= hdr_len ? : pkt_len;
@@ -3168,75 +3217,37 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget)
u64_stats_update_end(&rxq->stats_sync);
}
if (libeth_rx_sync_for_cpu(hdr, hdr_len)) {
skb = idpf_rx_build_skb(hdr, hdr_len);
if (!skb)
break;
u64_stats_update_begin(&rxq->stats_sync);
u64_stats_inc(&rxq->q_stats.hsplit_pkts);
u64_stats_update_end(&rxq->stats_sync);
}
if (libeth_xdp_process_buff(xdp, hdr, hdr_len))
rs.hsplit++;
hdr->netmem = 0;
payload:
if (!libeth_rx_sync_for_cpu(rx_buf, pkt_len))
goto skip_data;
if (skb)
idpf_rx_add_frag(rx_buf, skb, pkt_len);
else
skb = idpf_rx_build_skb(rx_buf, pkt_len);
/* exit if we failed to retrieve a buffer */
if (!skb)
break;
skip_data:
libeth_xdp_process_buff(xdp, rx_buf, pkt_len);
rx_buf->netmem = 0;
idpf_post_buf_refill(refillq, buf_id);
IDPF_RX_BUMP_NTC(rxq, ntc);
/* skip if it is non EOP desc */
if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!skb))
if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!xdp->data))
continue;
/* pad skb if needed (to make valid ethernet frame) */
if (eth_skb_pad(skb)) {
skb = NULL;
continue;
}
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
/* protocol */
if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) {
dev_kfree_skb_any(skb);
skb = NULL;
continue;
}
/* send completed skb up the stack */
napi_gro_receive(rxq->napi, skb);
skb = NULL;
/* update budget accounting */
total_rx_pkts++;
idpf_xdp_run_pass(xdp, &bq, rxq->napi, &rs, rx_desc);
}
rxq->next_to_clean = ntc;
idpf_xdp_finalize_rx(&bq);
rxq->next_to_clean = ntc;
libeth_xdp_save_buff(&rxq->xdp, xdp);
rxq->skb = skb;
u64_stats_update_begin(&rxq->stats_sync);
u64_stats_add(&rxq->q_stats.packets, total_rx_pkts);
u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes);
u64_stats_add(&rxq->q_stats.packets, rs.packets);
u64_stats_add(&rxq->q_stats.bytes, rs.bytes);
u64_stats_add(&rxq->q_stats.hsplit_pkts, rs.hsplit);
u64_stats_update_end(&rxq->stats_sync);
/* guarantee a trip back through this routine if there was a failure */
return total_rx_pkts;
return rs.packets;
}
/**
@@ -3434,6 +3445,20 @@ void idpf_vport_intr_rel(struct idpf_vport *vport)
vport->q_vectors = NULL;
}
static void idpf_q_vector_set_napi(struct idpf_q_vector *q_vector, bool link)
{
struct napi_struct *napi = link ? &q_vector->napi : NULL;
struct net_device *dev = q_vector->vport->netdev;
for (u32 i = 0; i < q_vector->num_rxq; i++)
netif_queue_set_napi(dev, q_vector->rx[i]->idx,
NETDEV_QUEUE_TYPE_RX, napi);
for (u32 i = 0; i < q_vector->num_txq; i++)
netif_queue_set_napi(dev, q_vector->tx[i]->idx,
NETDEV_QUEUE_TYPE_TX, napi);
}
/**
* idpf_vport_intr_rel_irq - Free the IRQ association with the OS
* @vport: main vport structure
@@ -3454,6 +3479,7 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport)
vidx = vport->q_vector_idxs[vector];
irq_num = adapter->msix_entries[vidx].vector;
idpf_q_vector_set_napi(q_vector, false);
kfree(free_irq(irq_num, q_vector));
}
}
@@ -3467,6 +3493,8 @@ static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport)
struct idpf_q_vector *q_vector = vport->q_vectors;
int q_idx;
writel(0, vport->noirq_dyn_ctl);
for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++)
writel(0, q_vector[q_idx].intr_reg.dyn_ctl);
}
@@ -3641,6 +3669,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
"Request_irq failed, error: %d\n", err);
goto free_q_irqs;
}
idpf_q_vector_set_napi(q_vector, true);
}
return 0;
@@ -3708,6 +3738,8 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
if (qv->num_txq || qv->num_rxq)
idpf_vport_intr_update_itr_ena_irq(qv);
}
writel(vport->noirq_dyn_ctl_ena, vport->noirq_dyn_ctl);
}
/**
@@ -3901,14 +3933,6 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
return budget;
}
/* Switch to poll mode in the tear-down path after sending disable
* queues virtchnl message, as the interrupts will be disabled after
* that.
*/
if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
q_vector->tx[0])))
return budget;
work_done = min_t(int, work_done, budget - 1);
/* Exit the polling mode, but don't re-enable interrupts if stack might
@@ -3930,8 +3954,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
*/
static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport)
{
u16 num_txq_grp = vport->num_txq_grp - vport->num_xdp_txq;
bool split = idpf_is_queue_model_split(vport->rxq_model);
u16 num_txq_grp = vport->num_txq_grp;
struct idpf_rxq_group *rx_qgrp;
struct idpf_txq_group *tx_qgrp;
u32 i, qv_idx, q_index;
@@ -4027,6 +4051,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport)
for (i = 0; i < vport->num_q_vectors; i++)
vport->q_vectors[i].v_idx = vport->q_vector_idxs[i];
vport->noirq_v_idx = vport->q_vector_idxs[i];
return 0;
}
@@ -4040,6 +4066,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport)
for (i = 0; i < vport->num_q_vectors; i++)
vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]];
vport->noirq_v_idx = vecids[vport->q_vector_idxs[i]];
kfree(vecids);
return 0;

View File

@@ -7,8 +7,10 @@
#include <linux/dim.h>
#include <net/libeth/cache.h>
#include <net/tcp.h>
#include <net/libeth/types.h>
#include <net/netdev_queues.h>
#include <net/tcp.h>
#include <net/xdp.h>
#include "idpf_lan_txrx.h"
#include "virtchnl2_lan_desc.h"
@@ -58,6 +60,8 @@
#define IDPF_MBX_Q_VEC 1
#define IDPF_MIN_Q_VEC 1
#define IDPF_MIN_RDMA_VEC 2
/* Data vector for NOIRQ queues */
#define IDPF_RESERVED_VECS 1
#define IDPF_DFLT_TX_Q_DESC_COUNT 512
#define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512
@@ -275,11 +279,12 @@ struct idpf_ptype_state {
* bit and Q_RFL_GEN is the SW bit.
* @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
* @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
* @__IDPF_Q_POLL_MODE: Enable poll mode
* @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode
* @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq)
* @__IDPF_Q_PTP: indicates whether the Rx timestamping is enabled for the
* queue
* @__IDPF_Q_NOIRQ: queue is polling-driven and has no interrupt
* @__IDPF_Q_XDP: this is an XDP queue
* @__IDPF_Q_FLAGS_NBITS: Must be last
*/
enum idpf_queue_flags_t {
@@ -287,10 +292,11 @@ enum idpf_queue_flags_t {
__IDPF_Q_RFL_GEN_CHK,
__IDPF_Q_FLOW_SCH_EN,
__IDPF_Q_SW_MARKER,
__IDPF_Q_POLL_MODE,
__IDPF_Q_CRC_EN,
__IDPF_Q_HSPLIT_EN,
__IDPF_Q_PTP,
__IDPF_Q_NOIRQ,
__IDPF_Q_XDP,
__IDPF_Q_FLAGS_NBITS,
};
@@ -461,21 +467,24 @@ struct idpf_tx_queue_stats {
* @desc_ring: virtual descriptor ring address
* @bufq_sets: Pointer to the array of buffer queues in splitq mode
* @napi: NAPI instance corresponding to this queue (splitq)
* @xdp_prog: attached XDP program
* @rx_buf: See struct &libeth_fqe
* @pp: Page pool pointer in singleq mode
* @netdev: &net_device corresponding to this queue
* @tail: Tail offset. Used for both queue models single and split.
* @flags: See enum idpf_queue_flags_t
* @idx: For RX queue, it is used to index to total RX queue across groups and
* used for skb reporting.
* @desc_count: Number of descriptors
* @num_xdp_txq: total number of XDP Tx queues
* @xdpsqs: shortcut for XDP Tx queues array
* @rxdids: Supported RX descriptor ids
* @truesize: data buffer truesize in singleq
* @rx_ptype_lkup: LUT of Rx ptypes
* @xdp_rxq: XDP queue info
* @next_to_use: Next descriptor to use
* @next_to_clean: Next descriptor to clean
* @next_to_alloc: RX buffer to allocate at
* @skb: Pointer to the skb
* @truesize: data buffer truesize in singleq
* @xdp: XDP buffer with the current frame
* @cached_phc_time: Cached PHC time for the Rx queue
* @stats_sync: See struct u64_stats_sync
* @q_stats: See union idpf_rx_queue_stats
@@ -500,30 +509,38 @@ struct idpf_rx_queue {
struct {
struct idpf_bufq_set *bufq_sets;
struct napi_struct *napi;
struct bpf_prog __rcu *xdp_prog;
};
struct {
struct libeth_fqe *rx_buf;
struct page_pool *pp;
void __iomem *tail;
};
};
struct net_device *netdev;
void __iomem *tail;
DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
u16 idx;
u16 desc_count;
u32 rxdids;
u32 num_xdp_txq;
union {
struct idpf_tx_queue **xdpsqs;
struct {
u32 rxdids;
u32 truesize;
};
};
const struct libeth_rx_pt *rx_ptype_lkup;
struct xdp_rxq_info xdp_rxq;
__cacheline_group_end_aligned(read_mostly);
__cacheline_group_begin_aligned(read_write);
u16 next_to_use;
u16 next_to_clean;
u16 next_to_alloc;
u32 next_to_use;
u32 next_to_clean;
u32 next_to_alloc;
struct sk_buff *skb;
u32 truesize;
struct libeth_xdp_buff_stash xdp;
u64 cached_phc_time;
struct u64_stats_sync stats_sync;
@@ -543,8 +560,11 @@ struct idpf_rx_queue {
u16 rx_max_pkt_size;
__cacheline_group_end_aligned(cold);
};
libeth_cacheline_set_assert(struct idpf_rx_queue, 64,
88 + sizeof(struct u64_stats_sync),
libeth_cacheline_set_assert(struct idpf_rx_queue,
ALIGN(64, __alignof(struct xdp_rxq_info)) +
sizeof(struct xdp_rxq_info),
96 + offsetof(struct idpf_rx_queue, q_stats) -
offsetofend(struct idpf_rx_queue, cached_phc_time),
32);
/**
@@ -556,6 +576,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64,
* @desc_ring: virtual descriptor ring address
* @tx_buf: See struct idpf_tx_buf
* @txq_grp: See struct idpf_txq_group
* @complq: corresponding completion queue in XDP mode
* @dev: Device back pointer for DMA mapping
* @tail: Tail offset. Used for both queue models single and split
* @flags: See enum idpf_queue_flags_t
@@ -563,26 +584,7 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64,
* hot path TX pointers stored in vport. Used in both singleq/splitq.
* @desc_count: Number of descriptors
* @tx_min_pkt_len: Min supported packet length
* @compl_tag_gen_s: Completion tag generation bit
* The format of the completion tag will change based on the TXQ
* descriptor ring size so that we can maintain roughly the same level
* of "uniqueness" across all descriptor sizes. For example, if the
* TXQ descriptor ring size is 64 (the minimum size supported), the
* completion tag will be formatted as below:
* 15 6 5 0
* --------------------------------
* | GEN=0-1023 |IDX = 0-63|
* --------------------------------
*
* This gives us 64*1024 = 65536 possible unique values. Similarly, if
* the TXQ descriptor ring size is 8160 (the maximum size supported),
* the completion tag will be formatted as below:
* 15 13 12 0
* --------------------------------
* |GEN | IDX = 0-8159 |
* --------------------------------
*
* This gives us 8*8160 = 65280 possible unique values.
* @thresh: XDP queue cleaning threshold
* @netdev: &net_device corresponding to this queue
* @next_to_use: Next descriptor to use
* @next_to_clean: Next descriptor to clean
@@ -599,6 +601,10 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64,
* @clean_budget: singleq only, queue cleaning budget
* @cleaned_pkts: Number of packets cleaned for the above said case
* @refillq: Pointer to refill queue
* @pending: number of pending descriptors to send in QB
* @xdp_tx: number of pending &xdp_buff or &xdp_frame buffers
* @timer: timer for XDP Tx queue cleanup
* @xdp_lock: lock for XDP Tx queues sharing
* @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP
* @tstamp_task: Work that handles Tx timestamp read
* @stats_sync: See struct u64_stats_sync
@@ -620,7 +626,10 @@ struct idpf_tx_queue {
void *desc_ring;
};
struct libeth_sqe *tx_buf;
struct idpf_txq_group *txq_grp;
union {
struct idpf_txq_group *txq_grp;
struct idpf_compl_queue *complq;
};
struct device *dev;
void __iomem *tail;
@@ -628,24 +637,39 @@ struct idpf_tx_queue {
u16 idx;
u16 desc_count;
u16 tx_min_pkt_len;
union {
u16 tx_min_pkt_len;
u32 thresh;
};
struct net_device *netdev;
__cacheline_group_end_aligned(read_mostly);
__cacheline_group_begin_aligned(read_write);
u16 next_to_use;
u16 next_to_clean;
u16 last_re;
u16 tx_max_bufs;
u32 next_to_use;
u32 next_to_clean;
union {
u32 cleaned_bytes;
u32 clean_budget;
};
u16 cleaned_pkts;
struct {
u16 last_re;
u16 tx_max_bufs;
struct idpf_sw_queue *refillq;
union {
u32 cleaned_bytes;
u32 clean_budget;
};
u16 cleaned_pkts;
struct idpf_sw_queue *refillq;
};
struct {
u32 pending;
u32 xdp_tx;
struct libeth_xdpsq_timer *timer;
struct libeth_xdpsq_lock xdp_lock;
};
};
struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps;
struct work_struct *tstamp_task;
@@ -664,7 +688,11 @@ struct idpf_tx_queue {
__cacheline_group_end_aligned(cold);
};
libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
104 + sizeof(struct u64_stats_sync),
104 +
offsetof(struct idpf_tx_queue, cached_tstamp_caps) -
offsetofend(struct idpf_tx_queue, timer) +
offsetof(struct idpf_tx_queue, q_stats) -
offsetofend(struct idpf_tx_queue, tstamp_task),
32);
/**
@@ -728,7 +756,9 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32);
/**
* struct idpf_compl_queue - software structure representing a completion queue
* @comp: completion descriptor array
* @comp: 8-byte completion descriptor array
* @comp_4b: 4-byte completion descriptor array
* @desc_ring: virtual descriptor ring address
* @txq_grp: See struct idpf_txq_group
* @flags: See enum idpf_queue_flags_t
* @desc_count: Number of descriptors
@@ -748,7 +778,12 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32);
*/
struct idpf_compl_queue {
__cacheline_group_begin_aligned(read_mostly);
struct idpf_splitq_tx_compl_desc *comp;
union {
struct idpf_splitq_tx_compl_desc *comp;
struct idpf_splitq_4b_tx_compl_desc *comp_4b;
void *desc_ring;
};
struct idpf_txq_group *txq_grp;
DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
@@ -1012,9 +1047,6 @@ int idpf_config_rss(struct idpf_vport *vport);
int idpf_init_rss(struct idpf_vport *vport);
void idpf_deinit_rss(struct idpf_vport *vport);
int idpf_rx_bufs_init_all(struct idpf_vport *vport);
void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size);
struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size);
void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
bool xmit_more);
unsigned int idpf_size_to_txd_count(unsigned int size);
@@ -1029,4 +1061,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
u16 cleaned_count);
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq);
#endif /* !_IDPF_TXRX_H_ */

View File

@@ -76,7 +76,7 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
int num_vecs = vport->num_q_vectors;
struct idpf_vec_regs *reg_vals;
int num_regs, i, err = 0;
u32 rx_itr, tx_itr;
u32 rx_itr, tx_itr, val;
u16 total_vecs;
total_vecs = idpf_get_reserved_vecs(vport->adapter);
@@ -120,6 +120,15 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport)
intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr);
}
/* Data vector for NOIRQ queues */
val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg;
vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val);
val = VF_INT_DYN_CTLN_WB_ON_ITR_M | VF_INT_DYN_CTLN_INTENA_MSK_M |
FIELD_PREP(VF_INT_DYN_CTLN_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX);
vport->noirq_dyn_ctl_ena = val;
free_reg_vals:
kfree(reg_vals);

View File

@@ -724,21 +724,17 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter)
**/
static int idpf_wait_for_marker_event(struct idpf_vport *vport)
{
int event;
int i;
bool markers_rcvd = true;
for (i = 0; i < vport->num_txq; i++)
idpf_queue_set(SW_MARKER, vport->txqs[i]);
for (u32 i = 0; i < vport->num_txq; i++) {
struct idpf_tx_queue *txq = vport->txqs[i];
event = wait_event_timeout(vport->sw_marker_wq,
test_and_clear_bit(IDPF_VPORT_SW_MARKER,
vport->flags),
msecs_to_jiffies(500));
idpf_queue_set(SW_MARKER, txq);
idpf_wait_for_sw_marker_completion(txq);
markers_rcvd &= !idpf_queue_has(SW_MARKER, txq);
}
for (i = 0; i < vport->num_txq; i++)
idpf_queue_clear(POLL_MODE, vport->txqs[i]);
if (event)
if (markers_rcvd)
return 0;
dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n");
@@ -1061,21 +1057,35 @@ int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter,
struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues;
struct virtchnl2_get_capabilities *caps = &adapter->caps;
u16 default_vports = idpf_get_default_vports(adapter);
int max_rx_q, max_tx_q;
u32 max_rx_q, max_tx_q, max_buf_q, max_compl_q;
mutex_lock(&adapter->queue_lock);
/* Caps are device-wide. Give each vport an equal piece */
max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports;
max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports;
if (adapter->num_alloc_vports < default_vports) {
max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q);
max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q);
} else {
max_q->max_rxq = IDPF_MIN_Q;
max_q->max_txq = IDPF_MIN_Q;
max_buf_q = le16_to_cpu(caps->max_rx_bufq) / default_vports;
max_compl_q = le16_to_cpu(caps->max_tx_complq) / default_vports;
if (adapter->num_alloc_vports >= default_vports) {
max_rx_q = IDPF_MIN_Q;
max_tx_q = IDPF_MIN_Q;
}
max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP;
max_q->max_complq = max_q->max_txq;
/*
* Harmonize the numbers. The current implementation always creates
* `IDPF_MAX_BUFQS_PER_RXQ_GRP` buffer queues for each Rx queue and
* one completion queue for each Tx queue for best performance.
* If less buffer or completion queues is available, cap the number
* of the corresponding Rx/Tx queues.
*/
max_rx_q = min(max_rx_q, max_buf_q / IDPF_MAX_BUFQS_PER_RXQ_GRP);
max_tx_q = min(max_tx_q, max_compl_q);
max_q->max_rxq = max_rx_q;
max_q->max_txq = max_tx_q;
max_q->max_bufq = max_rx_q * IDPF_MAX_BUFQS_PER_RXQ_GRP;
max_q->max_complq = max_tx_q;
if (avail_queues->avail_rxq < max_q->max_rxq ||
avail_queues->avail_txq < max_q->max_txq ||
@@ -1506,7 +1516,7 @@ int idpf_send_destroy_vport_msg(struct idpf_vport *vport)
xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT;
xn_params.send_buf.iov_base = &v_id;
xn_params.send_buf.iov_len = sizeof(v_id);
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
return reply_sz < 0 ? reply_sz : 0;
@@ -1554,7 +1564,7 @@ int idpf_send_disable_vport_msg(struct idpf_vport *vport)
xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT;
xn_params.send_buf.iov_base = &v_id;
xn_params.send_buf.iov_len = sizeof(v_id);
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
return reply_sz < 0 ? reply_sz : 0;
@@ -1738,9 +1748,12 @@ setup_rxqs:
for (j = 0; j < num_rxq; j++, k++) {
const struct idpf_bufq_set *sets;
struct idpf_rx_queue *rxq;
u32 rxdids;
if (!idpf_is_queue_model_split(vport->rxq_model)) {
rxq = rx_qgrp->singleq.rxqs[j];
rxdids = rxq->rxdids;
goto common_qi_fields;
}
@@ -1773,6 +1786,8 @@ setup_rxqs:
cpu_to_le16(rxq->rx_hbuf_size);
}
rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
common_qi_fields:
qi[k].queue_id = cpu_to_le32(rxq->q_id);
qi[k].model = cpu_to_le16(vport->rxq_model);
@@ -1783,7 +1798,7 @@ common_qi_fields:
qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size);
qi[k].qflags |=
cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE);
qi[k].desc_ids = cpu_to_le64(rxq->rxdids);
qi[k].desc_ids = cpu_to_le64(rxdids);
}
}
@@ -1845,7 +1860,9 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL;
struct virtchnl2_queue_chunk *qc __free(kfree) = NULL;
u32 num_msgs, num_chunks, num_txq, num_rxq, num_q;
struct idpf_vc_xn_params xn_params = {};
struct idpf_vc_xn_params xn_params = {
.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC,
};
struct virtchnl2_queue_chunks *qcs;
u32 config_sz, chunk_sz, buf_sz;
ssize_t reply_sz;
@@ -1946,13 +1963,10 @@ send_msg:
if (!eq)
return -ENOMEM;
if (ena) {
if (ena)
xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
} else {
else
xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES;
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
}
for (i = 0, k = 0; i < num_msgs; i++) {
memset(eq, 0, buf_sz);
@@ -1990,7 +2004,9 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
{
struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL;
struct virtchnl2_queue_vector *vqv __free(kfree) = NULL;
struct idpf_vc_xn_params xn_params = {};
struct idpf_vc_xn_params xn_params = {
.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC,
};
u32 config_sz, chunk_sz, buf_sz;
u32 num_msgs, num_chunks, num_q;
ssize_t reply_sz;
@@ -2007,27 +2023,36 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
const struct idpf_tx_queue *txq = tx_qgrp->txqs[j];
const struct idpf_q_vector *vec;
u32 v_idx, tx_itr_idx;
vqv[k].queue_type =
cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
vqv[k].queue_id = cpu_to_le32(txq->q_id);
if (idpf_is_queue_model_split(vport->txq_model)) {
vqv[k].vector_id =
cpu_to_le16(tx_qgrp->complq->q_vector->v_idx);
vqv[k].itr_idx =
cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx);
if (idpf_queue_has(NOIRQ, txq))
vec = NULL;
else if (idpf_queue_has(XDP, txq))
vec = txq->complq->q_vector;
else if (idpf_is_queue_model_split(vport->txq_model))
vec = txq->txq_grp->complq->q_vector;
else
vec = txq->q_vector;
if (vec) {
v_idx = vec->v_idx;
tx_itr_idx = vec->tx_itr_idx;
} else {
vqv[k].vector_id =
cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx);
vqv[k].itr_idx =
cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx);
v_idx = vport->noirq_v_idx;
tx_itr_idx = VIRTCHNL2_ITR_IDX_1;
}
vqv[k].vector_id = cpu_to_le16(v_idx);
vqv[k].itr_idx = cpu_to_le32(tx_itr_idx);
}
}
if (vport->num_txq != k)
return -EINVAL;
for (i = 0; i < vport->num_rxq_grp; i++) {
struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
u16 num_rxq;
@@ -2039,6 +2064,7 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
for (j = 0; j < num_rxq; j++, k++) {
struct idpf_rx_queue *rxq;
u32 v_idx, rx_itr_idx;
if (idpf_is_queue_model_split(vport->rxq_model))
rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
@@ -2048,18 +2074,22 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
vqv[k].queue_type =
cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
vqv[k].queue_id = cpu_to_le32(rxq->q_id);
vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx);
vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx);
if (idpf_queue_has(NOIRQ, rxq)) {
v_idx = vport->noirq_v_idx;
rx_itr_idx = VIRTCHNL2_ITR_IDX_0;
} else {
v_idx = rxq->q_vector->v_idx;
rx_itr_idx = rxq->q_vector->rx_itr_idx;
}
vqv[k].vector_id = cpu_to_le16(v_idx);
vqv[k].itr_idx = cpu_to_le32(rx_itr_idx);
}
}
if (idpf_is_queue_model_split(vport->txq_model)) {
if (vport->num_rxq != k - vport->num_complq)
return -EINVAL;
} else {
if (vport->num_rxq != k - vport->num_txq)
return -EINVAL;
}
if (k != num_q)
return -EINVAL;
/* Chunk up the vector info into multiple messages */
config_sz = sizeof(struct virtchnl2_queue_vector_maps);
@@ -2074,13 +2104,10 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
if (!vqvm)
return -ENOMEM;
if (map) {
if (map)
xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
} else {
else
xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR;
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
}
for (i = 0, k = 0; i < num_msgs; i++) {
memset(vqvm, 0, buf_sz);
@@ -2125,24 +2152,12 @@ int idpf_send_enable_queues_msg(struct idpf_vport *vport)
*/
int idpf_send_disable_queues_msg(struct idpf_vport *vport)
{
int err, i;
int err;
err = idpf_send_ena_dis_queues_msg(vport, false);
if (err)
return err;
/* switch to poll mode as interrupts will be disabled after disable
* queues virtchnl message is sent
*/
for (i = 0; i < vport->num_txq; i++)
idpf_queue_set(POLL_MODE, vport->txqs[i]);
/* schedule the napi to receive all the marker packets */
local_bh_disable();
for (i = 0; i < vport->num_q_vectors; i++)
napi_schedule(&vport->q_vectors[i].napi);
local_bh_enable();
return idpf_wait_for_marker_event(vport);
}
@@ -2207,7 +2222,7 @@ int idpf_send_delete_queues_msg(struct idpf_vport *vport)
num_chunks);
xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES;
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
xn_params.send_buf.iov_base = eq;
xn_params.send_buf.iov_len = buf_size;
reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params);
@@ -2371,7 +2386,7 @@ int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter)
xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS;
xn_params.send_buf.iov_base = vcs;
xn_params.send_buf.iov_len = buf_size;
xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC;
xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC;
reply_sz = idpf_vc_xn_exec(adapter, &xn_params);
if (reply_sz < 0)
return reply_sz;
@@ -3285,9 +3300,17 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport)
{
struct idpf_vector_info vec_info;
int num_alloc_vecs;
u32 req;
vec_info.num_curr_vecs = vport->num_q_vectors;
vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq);
if (vec_info.num_curr_vecs)
vec_info.num_curr_vecs += IDPF_RESERVED_VECS;
/* XDPSQs are all bound to the NOIRQ vector from IDPF_RESERVED_VECS */
req = max(vport->num_txq - vport->num_xdp_txq, vport->num_rxq) +
IDPF_RESERVED_VECS;
vec_info.num_req_vecs = req;
vec_info.default_vport = vport->default_vport;
vec_info.index = vport->idx;
@@ -3300,7 +3323,7 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport)
return -EINVAL;
}
vport->num_q_vectors = num_alloc_vecs;
vport->num_q_vectors = num_alloc_vecs - IDPF_RESERVED_VECS;
return 0;
}

View File

@@ -4,7 +4,6 @@
#ifndef _IDPF_VIRTCHNL_H_
#define _IDPF_VIRTCHNL_H_
#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000
#define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000)
#define IDPF_VC_XN_IDX_M GENMASK(7, 0)
#define IDPF_VC_XN_SALT_M GENMASK(15, 8)

View File

@@ -0,0 +1,454 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2025 Intel Corporation */
#include "idpf.h"
#include "idpf_virtchnl.h"
#include "xdp.h"
static int idpf_rxq_for_each(const struct idpf_vport *vport,
int (*fn)(struct idpf_rx_queue *rxq, void *arg),
void *arg)
{
bool splitq = idpf_is_queue_model_split(vport->rxq_model);
if (!vport->rxq_grps)
return -ENETDOWN;
for (u32 i = 0; i < vport->num_rxq_grp; i++) {
const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
u32 num_rxq;
if (splitq)
num_rxq = rx_qgrp->splitq.num_rxq_sets;
else
num_rxq = rx_qgrp->singleq.num_rxq;
for (u32 j = 0; j < num_rxq; j++) {
struct idpf_rx_queue *q;
int err;
if (splitq)
q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
else
q = rx_qgrp->singleq.rxqs[j];
err = fn(q, arg);
if (err)
return err;
}
}
return 0;
}
static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg)
{
const struct idpf_vport *vport = rxq->q_vector->vport;
bool split = idpf_is_queue_model_split(vport->rxq_model);
const struct page_pool *pp;
int err;
err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx,
rxq->q_vector->napi.napi_id,
rxq->rx_buf_size);
if (err)
return err;
pp = split ? rxq->bufq_sets[0].bufq.pp : rxq->pp;
xdp_rxq_info_attach_page_pool(&rxq->xdp_rxq, pp);
if (!split)
return 0;
rxq->xdpsqs = &vport->txqs[vport->xdp_txq_offset];
rxq->num_xdp_txq = vport->num_xdp_txq;
return 0;
}
int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport)
{
return idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_init, NULL);
}
static int __idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, void *arg)
{
if (idpf_is_queue_model_split((size_t)arg)) {
rxq->xdpsqs = NULL;
rxq->num_xdp_txq = 0;
}
xdp_rxq_info_detach_mem_model(&rxq->xdp_rxq);
xdp_rxq_info_unreg(&rxq->xdp_rxq);
return 0;
}
void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport)
{
idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_deinit,
(void *)(size_t)vport->rxq_model);
}
static int idpf_xdp_rxq_assign_prog(struct idpf_rx_queue *rxq, void *arg)
{
struct bpf_prog *prog = arg;
struct bpf_prog *old;
if (prog)
bpf_prog_inc(prog);
old = rcu_replace_pointer(rxq->xdp_prog, prog, lockdep_rtnl_is_held());
if (old)
bpf_prog_put(old);
return 0;
}
void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport,
struct bpf_prog *xdp_prog)
{
idpf_rxq_for_each(vport, idpf_xdp_rxq_assign_prog, xdp_prog);
}
static void idpf_xdp_tx_timer(struct work_struct *work);
int idpf_xdpsqs_get(const struct idpf_vport *vport)
{
struct libeth_xdpsq_timer **timers __free(kvfree) = NULL;
struct net_device *dev;
u32 sqs;
if (!idpf_xdp_enabled(vport))
return 0;
timers = kvcalloc(vport->num_xdp_txq, sizeof(*timers), GFP_KERNEL);
if (!timers)
return -ENOMEM;
for (u32 i = 0; i < vport->num_xdp_txq; i++) {
timers[i] = kzalloc_node(sizeof(*timers[i]), GFP_KERNEL,
cpu_to_mem(i));
if (!timers[i]) {
for (int j = i - 1; j >= 0; j--)
kfree(timers[j]);
return -ENOMEM;
}
}
dev = vport->netdev;
sqs = vport->xdp_txq_offset;
for (u32 i = sqs; i < vport->num_txq; i++) {
struct idpf_tx_queue *xdpsq = vport->txqs[i];
xdpsq->complq = xdpsq->txq_grp->complq;
kfree(xdpsq->refillq);
xdpsq->refillq = NULL;
idpf_queue_clear(FLOW_SCH_EN, xdpsq);
idpf_queue_clear(FLOW_SCH_EN, xdpsq->complq);
idpf_queue_set(NOIRQ, xdpsq);
idpf_queue_set(XDP, xdpsq);
idpf_queue_set(XDP, xdpsq->complq);
xdpsq->timer = timers[i - sqs];
libeth_xdpsq_get(&xdpsq->xdp_lock, dev, vport->xdpsq_share);
libeth_xdpsq_init_timer(xdpsq->timer, xdpsq, &xdpsq->xdp_lock,
idpf_xdp_tx_timer);
xdpsq->pending = 0;
xdpsq->xdp_tx = 0;
xdpsq->thresh = libeth_xdp_queue_threshold(xdpsq->desc_count);
}
return 0;
}
void idpf_xdpsqs_put(const struct idpf_vport *vport)
{
struct net_device *dev;
u32 sqs;
if (!idpf_xdp_enabled(vport))
return;
dev = vport->netdev;
sqs = vport->xdp_txq_offset;
for (u32 i = sqs; i < vport->num_txq; i++) {
struct idpf_tx_queue *xdpsq = vport->txqs[i];
if (!idpf_queue_has_clear(XDP, xdpsq))
continue;
libeth_xdpsq_deinit_timer(xdpsq->timer);
libeth_xdpsq_put(&xdpsq->xdp_lock, dev);
kfree(xdpsq->timer);
xdpsq->refillq = NULL;
idpf_queue_clear(NOIRQ, xdpsq);
}
}
static int idpf_xdp_parse_cqe(const struct idpf_splitq_4b_tx_compl_desc *desc,
bool gen)
{
u32 val;
#ifdef __LIBETH_WORD_ACCESS
val = *(const u32 *)desc;
#else
val = ((u32)le16_to_cpu(desc->q_head_compl_tag.q_head) << 16) |
le16_to_cpu(desc->qid_comptype_gen);
#endif
if (!!(val & IDPF_TXD_COMPLQ_GEN_M) != gen)
return -ENODATA;
if (unlikely((val & GENMASK(IDPF_TXD_COMPLQ_GEN_S - 1, 0)) !=
FIELD_PREP(IDPF_TXD_COMPLQ_COMPL_TYPE_M,
IDPF_TXD_COMPLT_RS)))
return -EINVAL;
return upper_16_bits(val);
}
static u32 idpf_xdpsq_poll(struct idpf_tx_queue *xdpsq, u32 budget)
{
struct idpf_compl_queue *cq = xdpsq->complq;
u32 tx_ntc = xdpsq->next_to_clean;
u32 tx_cnt = xdpsq->desc_count;
u32 ntc = cq->next_to_clean;
u32 cnt = cq->desc_count;
u32 done_frames;
bool gen;
gen = idpf_queue_has(GEN_CHK, cq);
for (done_frames = 0; done_frames < budget; ) {
int ret;
ret = idpf_xdp_parse_cqe(&cq->comp_4b[ntc], gen);
if (ret >= 0) {
done_frames = ret > tx_ntc ? ret - tx_ntc :
ret + tx_cnt - tx_ntc;
goto next;
}
switch (ret) {
case -ENODATA:
goto out;
case -EINVAL:
break;
}
next:
if (unlikely(++ntc == cnt)) {
ntc = 0;
gen = !gen;
idpf_queue_change(GEN_CHK, cq);
}
}
out:
cq->next_to_clean = ntc;
return done_frames;
}
static u32 idpf_xdpsq_complete(void *_xdpsq, u32 budget)
{
struct libeth_xdpsq_napi_stats ss = { };
struct idpf_tx_queue *xdpsq = _xdpsq;
u32 tx_ntc = xdpsq->next_to_clean;
u32 tx_cnt = xdpsq->desc_count;
struct xdp_frame_bulk bq;
struct libeth_cq_pp cp = {
.dev = xdpsq->dev,
.bq = &bq,
.xss = &ss,
.napi = true,
};
u32 done_frames;
done_frames = idpf_xdpsq_poll(xdpsq, budget);
if (unlikely(!done_frames))
return 0;
xdp_frame_bulk_init(&bq);
for (u32 i = 0; likely(i < done_frames); i++) {
libeth_xdp_complete_tx(&xdpsq->tx_buf[tx_ntc], &cp);
if (unlikely(++tx_ntc == tx_cnt))
tx_ntc = 0;
}
xdp_flush_frame_bulk(&bq);
xdpsq->next_to_clean = tx_ntc;
xdpsq->pending -= done_frames;
xdpsq->xdp_tx -= cp.xdp_tx;
return done_frames;
}
static u32 idpf_xdp_tx_prep(void *_xdpsq, struct libeth_xdpsq *sq)
{
struct idpf_tx_queue *xdpsq = _xdpsq;
u32 free;
libeth_xdpsq_lock(&xdpsq->xdp_lock);
free = xdpsq->desc_count - xdpsq->pending;
if (free < xdpsq->thresh)
free += idpf_xdpsq_complete(xdpsq, xdpsq->thresh);
*sq = (struct libeth_xdpsq){
.sqes = xdpsq->tx_buf,
.descs = xdpsq->desc_ring,
.count = xdpsq->desc_count,
.lock = &xdpsq->xdp_lock,
.ntu = &xdpsq->next_to_use,
.pending = &xdpsq->pending,
.xdp_tx = &xdpsq->xdp_tx,
};
return free;
}
LIBETH_XDP_DEFINE_START();
LIBETH_XDP_DEFINE_TIMER(static idpf_xdp_tx_timer, idpf_xdpsq_complete);
LIBETH_XDP_DEFINE_FLUSH_TX(idpf_xdp_tx_flush_bulk, idpf_xdp_tx_prep,
idpf_xdp_tx_xmit);
LIBETH_XDP_DEFINE_FLUSH_XMIT(static idpf_xdp_xmit_flush_bulk, idpf_xdp_tx_prep,
idpf_xdp_tx_xmit);
LIBETH_XDP_DEFINE_END();
int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags)
{
const struct idpf_netdev_priv *np = netdev_priv(dev);
const struct idpf_vport *vport = np->vport;
if (unlikely(!netif_carrier_ok(dev) || !vport->link_up))
return -ENETDOWN;
return libeth_xdp_xmit_do_bulk(dev, n, frames, flags,
&vport->txqs[vport->xdp_txq_offset],
vport->num_xdp_txq,
idpf_xdp_xmit_flush_bulk,
idpf_xdp_tx_finalize);
}
static int idpf_xdpmo_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
const struct libeth_xdp_buff *xdp = (typeof(xdp))ctx;
struct idpf_xdp_rx_desc desc __uninitialized;
const struct idpf_rx_queue *rxq;
struct libeth_rx_pt pt;
rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq);
idpf_xdp_get_qw0(&desc, xdp->desc);
pt = rxq->rx_ptype_lkup[idpf_xdp_rx_pt(&desc)];
if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, pt))
return -ENODATA;
idpf_xdp_get_qw2(&desc, xdp->desc);
return libeth_xdpmo_rx_hash(hash, rss_type, idpf_xdp_rx_hash(&desc),
pt);
}
static const struct xdp_metadata_ops idpf_xdpmo = {
.xmo_rx_hash = idpf_xdpmo_rx_hash,
};
void idpf_xdp_set_features(const struct idpf_vport *vport)
{
if (!idpf_is_queue_model_split(vport->rxq_model))
return;
libeth_xdp_set_features_noredir(vport->netdev, &idpf_xdpmo);
}
static int idpf_xdp_setup_prog(struct idpf_vport *vport,
const struct netdev_bpf *xdp)
{
const struct idpf_netdev_priv *np = netdev_priv(vport->netdev);
struct bpf_prog *old, *prog = xdp->prog;
struct idpf_vport_config *cfg;
int ret;
cfg = vport->adapter->vport_config[vport->idx];
if (test_bit(IDPF_REMOVE_IN_PROG, vport->adapter->flags) ||
!test_bit(IDPF_VPORT_REG_NETDEV, cfg->flags) ||
!!vport->xdp_prog == !!prog) {
if (np->state == __IDPF_VPORT_UP)
idpf_xdp_copy_prog_to_rqs(vport, prog);
old = xchg(&vport->xdp_prog, prog);
if (old)
bpf_prog_put(old);
cfg->user_config.xdp_prog = prog;
return 0;
}
if (!vport->num_xdp_txq && vport->num_txq == cfg->max_q.max_txq) {
NL_SET_ERR_MSG_MOD(xdp->extack,
"No Tx queues available for XDP, please decrease the number of regular SQs");
return -ENOSPC;
}
old = cfg->user_config.xdp_prog;
cfg->user_config.xdp_prog = prog;
ret = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE);
if (ret) {
NL_SET_ERR_MSG_MOD(xdp->extack,
"Could not reopen the vport after XDP setup");
cfg->user_config.xdp_prog = old;
old = prog;
}
if (old)
bpf_prog_put(old);
libeth_xdp_set_redirect(vport->netdev, vport->xdp_prog);
return ret;
}
int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct idpf_vport *vport;
int ret;
idpf_vport_ctrl_lock(dev);
vport = idpf_netdev_to_vport(dev);
if (!idpf_is_queue_model_split(vport->txq_model))
goto notsupp;
switch (xdp->command) {
case XDP_SETUP_PROG:
ret = idpf_xdp_setup_prog(vport, xdp);
break;
default:
notsupp:
ret = -EOPNOTSUPP;
break;
}
idpf_vport_ctrl_unlock(dev);
return ret;
}

View File

@@ -0,0 +1,172 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2025 Intel Corporation */
#ifndef _IDPF_XDP_H_
#define _IDPF_XDP_H_
#include <net/libeth/xdp.h>
#include "idpf_txrx.h"
int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport);
void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport);
void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport,
struct bpf_prog *xdp_prog);
int idpf_xdpsqs_get(const struct idpf_vport *vport);
void idpf_xdpsqs_put(const struct idpf_vport *vport);
bool idpf_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags);
/**
* idpf_xdp_tx_xmit - produce a single HW Tx descriptor out of XDP desc
* @desc: XDP descriptor to pull the DMA address and length from
* @i: descriptor index on the queue to fill
* @sq: XDP queue to produce the HW Tx descriptor on
* @priv: &xsk_tx_metadata_ops on XSk xmit or %NULL
*/
static inline void idpf_xdp_tx_xmit(struct libeth_xdp_tx_desc desc, u32 i,
const struct libeth_xdpsq *sq, u64 priv)
{
struct idpf_flex_tx_desc *tx_desc = sq->descs;
u32 cmd;
cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_DTYPE_M,
IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2);
if (desc.flags & LIBETH_XDP_TX_LAST)
cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M,
IDPF_TX_DESC_CMD_EOP);
if (priv && (desc.flags & LIBETH_XDP_TX_CSUM))
cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M,
IDPF_TX_FLEX_DESC_CMD_CS_EN);
tx_desc = &tx_desc[i];
tx_desc->buf_addr = cpu_to_le64(desc.addr);
#ifdef __LIBETH_WORD_ACCESS
*(u64 *)&tx_desc->qw1 = ((u64)desc.len << 48) | cmd;
#else
tx_desc->qw1.buf_size = cpu_to_le16(desc.len);
tx_desc->qw1.cmd_dtype = cpu_to_le16(cmd);
#endif
}
static inline void idpf_xdpsq_set_rs(const struct idpf_tx_queue *xdpsq)
{
u32 ntu, cmd;
ntu = xdpsq->next_to_use;
if (unlikely(!ntu))
ntu = xdpsq->desc_count;
cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, IDPF_TX_DESC_CMD_RS);
#ifdef __LIBETH_WORD_ACCESS
*(u64 *)&xdpsq->flex_tx[ntu - 1].q.qw1 |= cmd;
#else
xdpsq->flex_tx[ntu - 1].q.qw1.cmd_dtype |= cpu_to_le16(cmd);
#endif
}
static inline void idpf_xdpsq_update_tail(const struct idpf_tx_queue *xdpsq)
{
dma_wmb();
writel_relaxed(xdpsq->next_to_use, xdpsq->tail);
}
/**
* idpf_xdp_tx_finalize - finalize sending over XDPSQ
* @_xdpsq: XDP Tx queue
* @sent: whether any frames were sent
* @flush: whether to update RS bit and the tail register
*
* Set the RS bit ("end of batch"), bump the tail, and queue the cleanup timer.
* To be called after a NAPI polling loop, at the end of .ndo_xdp_xmit() etc.
*/
static inline void idpf_xdp_tx_finalize(void *_xdpsq, bool sent, bool flush)
{
struct idpf_tx_queue *xdpsq = _xdpsq;
if ((!flush || unlikely(!sent)) &&
likely(xdpsq->desc_count - 1 != xdpsq->pending))
return;
libeth_xdpsq_lock(&xdpsq->xdp_lock);
idpf_xdpsq_set_rs(xdpsq);
idpf_xdpsq_update_tail(xdpsq);
libeth_xdpsq_queue_timer(xdpsq->timer);
libeth_xdpsq_unlock(&xdpsq->xdp_lock);
}
struct idpf_xdp_rx_desc {
aligned_u64 qw0;
#define IDPF_XDP_RX_BUFQ BIT_ULL(47)
#define IDPF_XDP_RX_GEN BIT_ULL(46)
#define IDPF_XDP_RX_LEN GENMASK_ULL(45, 32)
#define IDPF_XDP_RX_PT GENMASK_ULL(25, 16)
aligned_u64 qw1;
#define IDPF_XDP_RX_BUF GENMASK_ULL(47, 32)
#define IDPF_XDP_RX_EOP BIT_ULL(1)
aligned_u64 qw2;
#define IDPF_XDP_RX_HASH GENMASK_ULL(31, 0)
aligned_u64 qw3;
} __aligned(4 * sizeof(u64));
static_assert(sizeof(struct idpf_xdp_rx_desc) ==
sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3));
#define idpf_xdp_rx_bufq(desc) !!((desc)->qw0 & IDPF_XDP_RX_BUFQ)
#define idpf_xdp_rx_gen(desc) !!((desc)->qw0 & IDPF_XDP_RX_GEN)
#define idpf_xdp_rx_len(desc) FIELD_GET(IDPF_XDP_RX_LEN, (desc)->qw0)
#define idpf_xdp_rx_pt(desc) FIELD_GET(IDPF_XDP_RX_PT, (desc)->qw0)
#define idpf_xdp_rx_buf(desc) FIELD_GET(IDPF_XDP_RX_BUF, (desc)->qw1)
#define idpf_xdp_rx_eop(desc) !!((desc)->qw1 & IDPF_XDP_RX_EOP)
#define idpf_xdp_rx_hash(desc) FIELD_GET(IDPF_XDP_RX_HASH, (desc)->qw2)
static inline void
idpf_xdp_get_qw0(struct idpf_xdp_rx_desc *desc,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd)
{
#ifdef __LIBETH_WORD_ACCESS
desc->qw0 = ((const typeof(desc))rxd)->qw0;
#else
desc->qw0 = ((u64)le16_to_cpu(rxd->pktlen_gen_bufq_id) << 32) |
((u64)le16_to_cpu(rxd->ptype_err_fflags0) << 16);
#endif
}
static inline void
idpf_xdp_get_qw1(struct idpf_xdp_rx_desc *desc,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd)
{
#ifdef __LIBETH_WORD_ACCESS
desc->qw1 = ((const typeof(desc))rxd)->qw1;
#else
desc->qw1 = ((u64)le16_to_cpu(rxd->buf_id) << 32) |
rxd->status_err0_qw1;
#endif
}
static inline void
idpf_xdp_get_qw2(struct idpf_xdp_rx_desc *desc,
const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd)
{
#ifdef __LIBETH_WORD_ACCESS
desc->qw2 = ((const typeof(desc))rxd)->qw2;
#else
desc->qw2 = ((u64)rxd->hash3 << 24) |
((u64)rxd->ff2_mirrid_hash2.hash2 << 16) |
le16_to_cpu(rxd->hash1);
#endif
}
void idpf_xdp_set_features(const struct idpf_vport *vport);
int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp);
int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
#endif /* _IDPF_XDP_H_ */

View File

@@ -1274,7 +1274,6 @@ bool libeth_xdp_buff_add_frag(struct libeth_xdp_buff *xdp,
* Internal, use libeth_xdp_process_buff() instead. Initializes XDP buffer
* head with the Rx buffer data: data pointer, length, headroom, and
* truesize/tailroom. Zeroes the flags.
* Uses faster single u64 write instead of per-field access.
*/
static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
const struct libeth_fqe *fqe,
@@ -1282,17 +1281,9 @@ static inline void libeth_xdp_prepare_buff(struct libeth_xdp_buff *xdp,
{
const struct page *page = __netmem_to_page(fqe->netmem);
#ifdef __LIBETH_WORD_ACCESS
static_assert(offsetofend(typeof(xdp->base), flags) -
offsetof(typeof(xdp->base), frame_sz) ==
sizeof(u64));
*(u64 *)&xdp->base.frame_sz = fqe->truesize;
#else
xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
#endif
xdp_prepare_buff(&xdp->base, page_address(page) + fqe->offset,
pp_page_to_nmdesc(page)->pp->p.offset, len, true);
xdp_init_buff(&xdp->base, fqe->truesize, xdp->base.rxq);
}
/**

View File

@@ -85,8 +85,20 @@ struct xdp_buff {
void *data_hard_start;
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
u32 flags; /* supported values defined in xdp_buff_flags */
union {
struct {
/* frame size to deduce data_hard_end/tailroom */
u32 frame_sz;
/* supported values defined in xdp_buff_flags */
u32 flags;
};
#ifdef __LITTLE_ENDIAN
/* Used to micro-optimize xdp_init_buff(), don't use directly */
u64 frame_sz_flags_init;
#endif
};
};
static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp)
@@ -118,9 +130,19 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
xdp->frame_sz = frame_sz;
xdp->rxq = rxq;
#ifdef __LITTLE_ENDIAN
/*
* Force the compilers to initialize ::flags and assign ::frame_sz with
* one write on 64-bit LE architectures as they're often unable to do
* it themselves.
*/
xdp->frame_sz_flags_init = frame_sz;
#else
xdp->frame_sz = frame_sz;
xdp->flags = 0;
#endif
}
static __always_inline void