mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

Ice driver needs to be a bit reworked on Rx data path in order to support multi-buffer XDP. For skb path, it currently works in a way that Rx ring carries pointer to skb so if driver didn't manage to combine fragmented frame at current NAPI instance, it can restore the state on next instance and keep looking for last fragment (so descriptor with EOP bit set). What needs to be achieved is that xdp_buff needs to be combined in such way (linear + frags part) in the first place. Then skb will be ready to go in case of XDP_PASS or BPF program being not present on interface. If BPF program is there, it would work on multi-buffer XDP. At this point xdp_buff resides directly on Rx ring, so given the fact that skb will be built straight from xdp_buff, there will be no further need to carry skb on Rx ring. Besides removing skb pointer from Rx ring, lots of members have been moved around within ice_rx_ring. First and foremost reason was to place rx_buf with xdp_buff on the same cacheline. This means that once we touch rx_buf (which is a preceding step before touching xdp_buff), xdp_buff will already be hot in cache. Second thing was that xdp_rxq is used rather rarely and it occupies a separate cacheline, so maybe it is better to have it at the end of ice_rx_ring. Other change that affects ice_rx_ring is the introduction of ice_rx_ring::first_desc. Its purpose is twofold - first is to propagate rx_buf->act to all the parts of current xdp_buff after running XDP program, so that ice_put_rx_buf() that got moved out of the main Rx processing loop will be able to tak an appriopriate action on each buffer. Second is for ice_construct_skb(). ice_construct_skb() has a copybreak mechanism which had an explicit impact on xdp_buff->skb conversion in the new approach when legacy Rx flag is toggled. It works in a way that linear part is 256 bytes long, if frame is bigger than that, remaining bytes are going as a frag to skb_shared_info. This means while memcpying frags from xdp_buff to newly allocated skb, care needs to be taken when picking the destination frag array entry. Upon the time ice_construct_skb() is called, when dealing with fragmented frame, current rx_buf points to the *last* fragment, but copybreak needs to be done against the first one. That's where ice_rx_ring::first_desc helps. When frame building spans across NAPI polls (DD bit is not set on current descriptor and xdp->data is not NULL) with current Rx buffer handling state there might be some problems. Since calls to ice_put_rx_buf() were pulled out of the main Rx processing loop and were scoped from cached_ntc to current ntc, remember that now mentioned function relies on rx_buf->act, which is set within ice_run_xdp(). ice_run_xdp() is called when EOP bit was found, so currently we could put Rx buffer with rx_buf->act being *uninitialized*. To address this, change scoping to rely on first_desc on both boundaries instead. This also implies that cleaned_count which is used as an input to ice_alloc_rx_buffers() and tells how many new buffers should be refilled has to be adjusted. If it stayed as is, what could happen is a case where ntc would go over ntu. Therefore, remove cleaned_count altogether and use against allocing routine newly introduced ICE_RX_DESC_UNUSED() macro which is an equivalent of ICE_DESC_UNUSED() dedicated for Rx side and based on struct ice_rx_ring::first_desc instead of next_to_clean. Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com> Link: https://lore.kernel.org/bpf/20230131204506.219292-11-maciej.fijalkowski@intel.com
136 lines
4.4 KiB
C
136 lines
4.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/* Copyright (c) 2019, Intel Corporation. */
|
|
|
|
#ifndef _ICE_TXRX_LIB_H_
|
|
#define _ICE_TXRX_LIB_H_
|
|
#include "ice.h"
|
|
|
|
/**
|
|
* ice_set_rx_bufs_act - propagate Rx buffer action to frags
|
|
* @xdp: XDP buffer representing frame (linear and frags part)
|
|
* @rx_ring: Rx ring struct
|
|
* act: action to store onto Rx buffers related to XDP buffer parts
|
|
*
|
|
* Set action that should be taken before putting Rx buffer from first frag
|
|
* to one before last. Last one is handled by caller of this function as it
|
|
* is the EOP frag that is currently being processed. This function is
|
|
* supposed to be called only when XDP buffer contains frags.
|
|
*/
|
|
static inline void
|
|
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
|
|
const unsigned int act)
|
|
{
|
|
const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
|
|
u32 first = rx_ring->first_desc;
|
|
u32 nr_frags = sinfo->nr_frags;
|
|
u32 cnt = rx_ring->count;
|
|
struct ice_rx_buf *buf;
|
|
|
|
for (int i = 0; i < nr_frags; i++) {
|
|
buf = &rx_ring->rx_buf[first];
|
|
buf->act = act;
|
|
|
|
if (++first == cnt)
|
|
first = 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* ice_test_staterr - tests bits in Rx descriptor status and error fields
|
|
* @status_err_n: Rx descriptor status_error0 or status_error1 bits
|
|
* @stat_err_bits: value to mask
|
|
*
|
|
* This function does some fast chicanery in order to return the
|
|
* value of the mask which is really only used for boolean tests.
|
|
* The status_error_len doesn't need to be shifted because it begins
|
|
* at offset zero.
|
|
*/
|
|
static inline bool
|
|
ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
|
|
{
|
|
return !!(status_err_n & cpu_to_le16(stat_err_bits));
|
|
}
|
|
|
|
/**
|
|
* ice_is_non_eop - process handling of non-EOP buffers
|
|
* @rx_ring: Rx ring being processed
|
|
* @rx_desc: Rx descriptor for current buffer
|
|
*
|
|
* If the buffer is an EOP buffer, this function exits returning false,
|
|
* otherwise return true indicating that this is in fact a non-EOP buffer.
|
|
*/
|
|
static inline bool
|
|
ice_is_non_eop(const struct ice_rx_ring *rx_ring,
|
|
const union ice_32b_rx_flex_desc *rx_desc)
|
|
{
|
|
/* if we are the last buffer then there is nothing else to do */
|
|
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
|
|
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
|
|
return false;
|
|
|
|
rx_ring->ring_stats->rx_stats.non_eop_descs++;
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline __le64
|
|
ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
|
|
{
|
|
return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
|
|
(td_cmd << ICE_TXD_QW1_CMD_S) |
|
|
(td_offset << ICE_TXD_QW1_OFFSET_S) |
|
|
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
|
|
(td_tag << ICE_TXD_QW1_L2TAG1_S));
|
|
}
|
|
|
|
/**
|
|
* ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor
|
|
* @rx_desc: Rx 32b flex descriptor with RXDID=2
|
|
*
|
|
* The OS and current PF implementation only support stripping a single VLAN tag
|
|
* at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
|
|
* one is found return the tag, else return 0 to mean no VLAN tag was found.
|
|
*/
|
|
static inline u16
|
|
ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc)
|
|
{
|
|
u16 stat_err_bits;
|
|
|
|
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
|
|
if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
|
|
return le16_to_cpu(rx_desc->wb.l2tag1);
|
|
|
|
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
|
|
if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
|
|
return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
|
|
* @xdp_ring: XDP Tx ring
|
|
*
|
|
* This function updates the XDP Tx ring tail register.
|
|
*/
|
|
static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
|
|
{
|
|
/* Force memory writes to complete before letting h/w
|
|
* know there are new descriptors to fetch.
|
|
*/
|
|
wmb();
|
|
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
|
|
}
|
|
|
|
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res);
|
|
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
|
|
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring);
|
|
void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
|
|
void
|
|
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
|
|
union ice_32b_rx_flex_desc *rx_desc,
|
|
struct sk_buff *skb, u16 ptype);
|
|
void
|
|
ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
|
|
#endif /* !_ICE_TXRX_LIB_H_ */
|