Merge tag 'nf-next-25-07-25' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following series contains Netfilter/IPVS updates for net-next:

1) Display netns inode in conntrack table full log, from lvxiafei.

2) Autoload nf_log_syslog in case no logging backend is available,
   from Lance Yang.

3) Three patches to remove unused functions in x_tables, nf_tables and
   conntrack. From Yue Haibing.

4) Exclude LEGACY TABLES on PREEMPT_RT: Add NETFILTER_XTABLES_LEGACY
   to exclude xtables legacy infrastructure.

5) Restore selftests by toggling NETFILTER_XTABLES_LEGACY where needed.
   From Florian Westphal.

6) Use CONFIG_INET_SCTP_DIAG in tools/testing/selftests/net/netfilter/config,
   from Sebastian Andrzej Siewior.

7) Use timer_delete in comment in IPVS codebase, from WangYuli.

8) Dump flowtable information in nfnetlink_hook, this includes an initial
   patch to consolidate common code in helper function, from Phil Sutter.

9) Remove unused arguments in nft_pipapo set backend, from Florian Westphal.

10) Return nft_set_ext instead of boolean in set lookup function,
    from Florian Westphal.

11) Remove indirection in dynamic set infrastructure, also from Florian.

12) Consolidate pipapo_get/lookup, from Florian.

13) Use kvmalloc in nft_pipapop, from Florian Westphal.

14) syzbot reports slab-out-of-bounds in xt_nfacct log message,
    fix from Florian Westphal.

15) Ignored tainted kernels in selftest nft_interface_stress.sh,
    from Phil Sutter.

16) Fix IPVS selftest by disabling rp_filter with ipip tunnel device,
    from Yi Chen.

* tag 'nf-next-25-07-25' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  selftests: netfilter: ipvs.sh: Explicity disable rp_filter on interface tunl0
  selftests: netfilter: Ignore tainted kernels in interface stress test
  netfilter: xt_nfacct: don't assume acct name is null-terminated
  netfilter: nft_set_pipapo: prefer kvmalloc for scratch maps
  netfilter: nft_set_pipapo: merge pipapo_get/lookup
  netfilter: nft_set: remove indirection from update API call
  netfilter: nft_set: remove one argument from lookup and update functions
  netfilter: nft_set_pipapo: remove unused arguments
  netfilter: nfnetlink_hook: Dump flowtable info
  netfilter: nfnetlink: New NFNLA_HOOK_INFO_DESC helper
  ipvs: Rename del_timer in comment in ip_vs_conn_expire_now()
  selftests: netfilter: Enable CONFIG_INET_SCTP_DIAG
  selftests: net: Enable legacy netfilter legacy options.
  netfilter: Exclude LEGACY TABLES on PREEMPT_RT.
  netfilter: conntrack: Remove unused net in nf_conntrack_double_lock()
  netfilter: nf_tables: Remove unused nft_reduce_is_readonly()
  netfilter: x_tables: Remove unused functions xt_{in|out}name()
  netfilter: load nf_log_syslog on enabling nf_conntrack_log_invalid
  netfilter: conntrack: table full detailed log
====================

Link: https://patch.msgid.link/20250725170340.21327-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-07-25 16:37:54 -07:00
34 changed files with 405 additions and 351 deletions

View File

@@ -92,6 +92,7 @@ enum nf_hook_ops_type {
NF_HOOK_OP_UNDEFINED,
NF_HOOK_OP_NF_TABLES,
NF_HOOK_OP_BPF,
NF_HOOK_OP_NFT_FT,
};
struct nf_hook_ops {

View File

@@ -51,21 +51,11 @@ static inline struct net_device *xt_in(const struct xt_action_param *par)
return par->state->in;
}
static inline const char *xt_inname(const struct xt_action_param *par)
{
return par->state->in->name;
}
static inline struct net_device *xt_out(const struct xt_action_param *par)
{
return par->state->out;
}
static inline const char *xt_outname(const struct xt_action_param *par)
{
return par->state->out->name;
}
static inline unsigned int xt_hooknum(const struct xt_action_param *par)
{
return par->state->hook;

View File

@@ -59,6 +59,9 @@ extern int sysctl_nf_log_all_netns;
int nf_log_register(u_int8_t pf, struct nf_logger *logger);
void nf_log_unregister(struct nf_logger *logger);
/* Check if any logger is registered for a given protocol family. */
bool nf_log_is_registered(u_int8_t pf);
int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger);
void nf_log_unset(struct net *net, const struct nf_logger *logger);

View File

@@ -459,19 +459,13 @@ struct nft_set_ext;
* control plane functions.
*/
struct nft_set_ops {
bool (*lookup)(const struct net *net,
const struct nft_set_ext * (*lookup)(const struct net *net,
const struct nft_set *set,
const u32 *key);
const struct nft_set_ext * (*update)(struct nft_set *set,
const u32 *key,
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
const u32 *key,
struct nft_elem_priv *
(*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *),
const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_set_ext **ext);
struct nft_regs *regs);
bool (*delete)(const struct nft_set *set,
const u32 *key);
@@ -1939,11 +1933,6 @@ static inline u64 nft_net_tstamp(const struct net *net)
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
static inline bool nft_reduce_is_readonly(const struct nft_expr *expr)
{
return expr->ops->reduce == NFT_REDUCE_READONLY;
}
void nft_reg_track_update(struct nft_regs_track *track,
const struct nft_expr *expr, u8 dreg, u8 len);
void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len);

View File

@@ -94,34 +94,41 @@ extern const struct nft_set_type nft_set_pipapo_type;
extern const struct nft_set_type nft_set_pipapo_avx2_type;
#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_hash_lookup_fast(const struct net *net,
const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
#else
static inline bool
const struct nft_set_ext *
nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
const struct nft_set_ext *
nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
const struct nft_set_ext *
nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
const struct nft_set_ext *
nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
const u32 *key);
const struct nft_set_ext *
nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const u32 *key);
#else
static inline const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
return set->ops->lookup(net, set, key, ext);
return set->ops->lookup(net, set, key);
}
#endif
/* called from nft_pipapo_avx2.c */
bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
const struct nft_set_ext *
nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
/* called from nft_set_pipapo.c */
bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
void nft_counter_init_seqcount(void);
@@ -181,4 +188,7 @@ void nft_objref_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
void nft_objref_map_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
const struct nft_expr *expr,
struct nft_regs *regs);
#endif /* _NET_NF_TABLES_CORE_H */

View File

@@ -61,10 +61,12 @@ enum nfnl_hook_chain_desc_attributes {
*
* @NFNL_HOOK_TYPE_NFTABLES: nf_tables base chain
* @NFNL_HOOK_TYPE_BPF: bpf program
* @NFNL_HOOK_TYPE_NFT_FLOWTABLE: nf_tables flowtable
*/
enum nfnl_hook_chaintype {
NFNL_HOOK_TYPE_NFTABLES = 0x1,
NFNL_HOOK_TYPE_BPF,
NFNL_HOOK_TYPE_NFT_FLOWTABLE,
};
/**

View File

@@ -42,8 +42,8 @@ config NF_CONNTRACK_BRIDGE
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
tristate "Legacy EBTABLES support"
depends on BRIDGE && NETFILTER_XTABLES
default n
depends on BRIDGE && NETFILTER_XTABLES_LEGACY
default n
help
Legacy ebtables packet/frame classifier.
This is not needed if you are using ebtables over nftables
@@ -65,7 +65,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
select BRIDGE_NF_EBTABLES_LEGACY
depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -76,7 +76,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
select BRIDGE_NF_EBTABLES_LEGACY
depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -86,7 +86,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
select BRIDGE_NF_EBTABLES_LEGACY
depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).

View File

@@ -13,8 +13,8 @@ config NF_DEFRAG_IPV4
# old sockopt interface and eval loop
config IP_NF_IPTABLES_LEGACY
tristate "Legacy IP tables support"
default n
select NETFILTER_XTABLES
depends on NETFILTER_XTABLES_LEGACY
default m if NETFILTER_XTABLES_LEGACY
help
iptables is a legacy packet classifier.
This is not needed if you are using iptables over nftables
@@ -182,8 +182,8 @@ config IP_NF_MATCH_TTL
# `filter', generic and specific targets
config IP_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
select IP_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n || IP_NF_IPTABLES_LEGACY
depends on IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -220,10 +220,10 @@ config IP_NF_TARGET_SYNPROXY
config IP_NF_NAT
tristate "iptables NAT support"
depends on NF_CONNTRACK
depends on IP_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
select IP_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -263,8 +263,8 @@ endif # IP_NF_NAT
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
select IP_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n || IP_NF_IPTABLES_LEGACY
depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -299,7 +299,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
select IP_NF_IPTABLES_LEGACY
depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -313,7 +313,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
select IP_NF_IPTABLES_LEGACY
depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -325,8 +325,8 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
tristate "Legacy ARPTABLES support"
depends on NETFILTER_XTABLES
default n
depends on NETFILTER_XTABLES_LEGACY
default n
help
arptables is a legacy packet classifier.
This is not needed if you are using arptables over nftables
@@ -342,7 +342,7 @@ config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
select NETFILTER_FAMILY_ARP
depends on NETFILTER_XTABLES
depends on NETFILTER_XTABLES_LEGACY
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and

View File

@@ -9,9 +9,8 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
tristate "Legacy IP6 tables support"
depends on INET && IPV6
select NETFILTER_XTABLES
default n
depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY
default m if NETFILTER_XTABLES_LEGACY
help
ip6tables is a legacy packet classifier.
This is not needed if you are using iptables over nftables
@@ -196,8 +195,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
select IP6_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY
depends on IP6_NF_IPTABLES_LEGACY
tristate
help
Packet filtering defines a table `filter', which has a series of
@@ -233,8 +232,8 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
select IP6_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY
depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -244,7 +243,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
select IP6_NF_IPTABLES_LEGACY
depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -258,7 +257,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
select IP6_NF_IPTABLES_LEGACY
depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -269,8 +268,8 @@ config IP6_NF_NAT
tristate "ip6tables NAT support"
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
depends on IP6_NF_IPTABLES_LEGACY
select NF_NAT
select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,

View File

@@ -758,6 +758,16 @@ config NETFILTER_XTABLES_COMPAT
If unsure, say N.
config NETFILTER_XTABLES_LEGACY
bool "Netfilter legacy tables support"
depends on !PREEMPT_RT
help
Say Y here if you still require support for legacy tables. This is
required by the legacy tools (iptables-legacy) and is not needed if
you use iptables over nftables (iptables-nft).
Legacy support is not limited to IP, it also includes EBTABLES and
ARPTABLES.
comment "Xtables combined modules"
config NETFILTER_XT_MARK

View File

@@ -926,7 +926,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
/* Using mod_timer_pending will ensure the timer is not
* modified after the final del_timer in ip_vs_conn_expire.
* modified after the final timer_delete in ip_vs_conn_expire.
*/
if (timer_pending(&cp->timer) &&
time_after(cp->timer.expires, jiffies))

View File

@@ -136,8 +136,8 @@ static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
}
/* return true if we need to recompute hashes (in case hash table was resized) */
static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
unsigned int h2, unsigned int sequence)
static bool nf_conntrack_double_lock(unsigned int h1, unsigned int h2,
unsigned int sequence)
{
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
@@ -613,7 +613,7 @@ static void __nf_ct_delete_from_lists(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
} while (nf_conntrack_double_lock(hash, reply_hash, sequence));
clean_from_lists(ct);
nf_conntrack_double_unlock(hash, reply_hash);
@@ -890,7 +890,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
} while (nf_conntrack_double_lock(hash, reply_hash, sequence));
max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN);
@@ -1234,7 +1234,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
} while (nf_conntrack_double_lock(hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
* connections for unconfirmed conns. But packet copies and
@@ -1673,7 +1673,11 @@ __nf_conntrack_alloc(struct net *net,
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
atomic_dec(&cnet->count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
if (net == &init_net)
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
else
net_warn_ratelimited("nf_conntrack: table full in netns %u, dropping packet\n",
net->ns.inum);
return ERR_PTR(-ENOMEM);
}
}

View File

@@ -14,6 +14,7 @@
#include <linux/sysctl.h>
#endif
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -555,6 +556,29 @@ nf_conntrack_hash_sysctl(const struct ctl_table *table, int write,
return ret;
}
static int
nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, i;
ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
if (ret < 0 || !write)
return ret;
if (*(u8 *)table->data == 0)
return ret;
/* Load nf_log_syslog only if no logger is currently registered */
for (i = 0; i < NFPROTO_NUMPROTO; i++) {
if (nf_log_is_registered(i))
return ret;
}
request_module("%s", "nf_log_syslog");
return ret;
}
static struct ctl_table_header *nf_ct_netfilter_header;
enum nf_ct_sysctl_index {
@@ -651,7 +675,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.proc_handler = nf_conntrack_log_invalid_sysctl,
},
[NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",

View File

@@ -125,6 +125,32 @@ void nf_log_unregister(struct nf_logger *logger)
}
EXPORT_SYMBOL(nf_log_unregister);
/**
* nf_log_is_registered - Check if any logger is registered for a given
* protocol family.
*
* @pf: Protocol family
*
* Returns: true if at least one logger is active for @pf, false otherwise.
*/
bool nf_log_is_registered(u_int8_t pf)
{
int i;
if (pf >= NFPROTO_NUMPROTO) {
WARN_ON_ONCE(1);
return false;
}
for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
if (rcu_access_pointer(loggers[pf][i]))
return true;
}
return false;
}
EXPORT_SYMBOL(nf_log_is_registered);
int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger)
{

View File

@@ -8895,11 +8895,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
list_for_each_entry(hook, &flowtable_hook->list, list) {
list_for_each_entry(ops, &hook->ops_list, list) {
ops->pf = NFPROTO_NETDEV;
ops->hooknum = flowtable_hook->num;
ops->priority = flowtable_hook->priority;
ops->priv = &flowtable->data;
ops->hook = flowtable->data.type->hook;
ops->pf = NFPROTO_NETDEV;
ops->hooknum = flowtable_hook->num;
ops->priority = flowtable_hook->priority;
ops->priv = &flowtable->data;
ops->hook = flowtable->data.type->hook;
ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
}
}
@@ -9727,12 +9728,13 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
if (!ops)
return 1;
ops->pf = NFPROTO_NETDEV;
ops->hooknum = flowtable->hooknum;
ops->priority = flowtable->data.priority;
ops->priv = &flowtable->data;
ops->hook = flowtable->data.type->hook;
ops->dev = dev;
ops->pf = NFPROTO_NETDEV;
ops->hooknum = flowtable->hooknum;
ops->priority = flowtable->data.priority;
ops->priv = &flowtable->data;
ops->hook = flowtable->data.type->hook;
ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
ops->dev = dev;
if (nft_register_flowtable_ops(dev_net(dev),
flowtable, ops)) {
kfree(ops);

View File

@@ -109,13 +109,30 @@ cancel_nest:
return -EMSGSIZE;
}
static int nfnl_hook_put_nft_info_desc(struct sk_buff *nlskb, const char *tname,
const char *name, u8 family)
{
struct nlattr *nest;
nest = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
if (!nest ||
nla_put_string(nlskb, NFNLA_CHAIN_TABLE, tname) ||
nla_put_string(nlskb, NFNLA_CHAIN_NAME, name) ||
nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, family)) {
nla_nest_cancel(nlskb, nest);
return -EMSGSIZE;
}
nla_nest_end(nlskb, nest);
return 0;
}
static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
unsigned int seq,
struct nft_chain *chain)
{
struct net *net = sock_net(nlskb->sk);
struct nlattr *nest, *nest2;
struct nlattr *nest;
int ret = 0;
if (WARN_ON_ONCE(!chain))
@@ -128,29 +145,47 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest)
return -EMSGSIZE;
nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
if (!nest2)
goto cancel_nest;
ret = nfnl_hook_put_nft_info_desc(nlskb, chain->table->name,
chain->name, chain->table->family);
if (ret) {
nla_nest_cancel(nlskb, nest);
return ret;
}
ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
if (ret)
goto cancel_nest;
ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
if (ret)
goto cancel_nest;
ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
if (ret)
goto cancel_nest;
nla_nest_end(nlskb, nest2);
nla_nest_end(nlskb, nest);
return ret;
return 0;
}
cancel_nest:
nla_nest_cancel(nlskb, nest);
return -EMSGSIZE;
static int nfnl_hook_put_nft_ft_info(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
unsigned int seq,
struct nf_flowtable *nf_ft)
{
struct nft_flowtable *ft =
container_of(nf_ft, struct nft_flowtable, data);
struct net *net = sock_net(nlskb->sk);
struct nlattr *nest;
int ret = 0;
if (WARN_ON_ONCE(!nf_ft))
return 0;
if (!nft_is_active(net, ft))
return 0;
nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFT_FLOWTABLE);
if (!nest)
return -EMSGSIZE;
ret = nfnl_hook_put_nft_info_desc(nlskb, ft->table->name,
ft->name, ft->table->family);
if (ret) {
nla_nest_cancel(nlskb, nest);
return ret;
}
nla_nest_end(nlskb, nest);
return 0;
}
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
@@ -220,6 +255,9 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
case NF_HOOK_OP_BPF:
ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv);
break;
case NF_HOOK_OP_NFT_FT:
ret = nfnl_hook_put_nft_ft_info(nlskb, ctx, seq, ops->priv);
break;
case NF_HOOK_OP_UNDEFINED:
break;
default:

View File

@@ -44,9 +44,9 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
const struct nft_expr *expr,
struct nft_regs *regs)
struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
const struct nft_expr *expr,
struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
@@ -91,8 +91,8 @@ void nft_dynset_eval(const struct nft_expr *expr,
return;
}
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
ext = set->ops->update(set, &regs->data[priv->sreg_key], expr, regs);
if (ext) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {

View File

@@ -25,32 +25,33 @@ struct nft_lookup {
};
#ifdef CONFIG_MITIGATION_RETPOLINE
bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
if (set->ops == &nft_set_hash_fast_type.ops)
return nft_hash_lookup_fast(net, set, key, ext);
return nft_hash_lookup_fast(net, set, key);
if (set->ops == &nft_set_hash_type.ops)
return nft_hash_lookup(net, set, key, ext);
return nft_hash_lookup(net, set, key);
if (set->ops == &nft_set_rhash_type.ops)
return nft_rhash_lookup(net, set, key, ext);
return nft_rhash_lookup(net, set, key);
if (set->ops == &nft_set_bitmap_type.ops)
return nft_bitmap_lookup(net, set, key, ext);
return nft_bitmap_lookup(net, set, key);
if (set->ops == &nft_set_pipapo_type.ops)
return nft_pipapo_lookup(net, set, key, ext);
return nft_pipapo_lookup(net, set, key);
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
if (set->ops == &nft_set_pipapo_avx2_type.ops)
return nft_pipapo_avx2_lookup(net, set, key, ext);
return nft_pipapo_avx2_lookup(net, set, key);
#endif
if (set->ops == &nft_set_rbtree_type.ops)
return nft_rbtree_lookup(net, set, key, ext);
return nft_rbtree_lookup(net, set, key);
WARN_ON_ONCE(1);
return set->ops->lookup(net, set, key, ext);
return set->ops->lookup(net, set, key);
}
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
#endif
@@ -61,12 +62,12 @@ void nft_lookup_eval(const struct nft_expr *expr,
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
const struct nft_set_ext *ext = NULL;
const struct net *net = nft_net(pkt);
const struct nft_set_ext *ext;
bool found;
found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
priv->invert;
ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
found = !!ext ^ priv->invert;
if (!found) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {

View File

@@ -111,10 +111,9 @@ void nft_objref_map_eval(const struct nft_expr *expr,
struct net *net = nft_net(pkt);
const struct nft_set_ext *ext;
struct nft_object *obj;
bool found;
found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
if (!found) {
ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
if (!ext) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
regs->verdict.code = NFT_BREAK;

View File

@@ -75,16 +75,21 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
}
INDIRECT_CALLABLE_SCOPE
bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
const struct nft_bitmap *priv = nft_set_priv(set);
static const struct nft_set_ext found;
u8 genmask = nft_genmask_cur(net);
u32 idx, off;
nft_bitmap_location(set, key, &idx, &off);
return nft_bitmap_active(priv->bitmap, idx, off, genmask);
if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
return &found;
return NULL;
}
static struct nft_bitmap_elem *

View File

@@ -81,8 +81,9 @@ static const struct rhashtable_params nft_rhash_params = {
};
INDIRECT_CALLABLE_SCOPE
bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_rhash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_rhash *priv = nft_set_priv(set);
const struct nft_rhash_elem *he;
@@ -95,9 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
*ext = &he->ext;
return &he->ext;
return !!he;
return NULL;
}
static struct nft_elem_priv *
@@ -120,14 +121,9 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
return ERR_PTR(-ENOENT);
}
static bool nft_rhash_update(struct nft_set *set, const u32 *key,
struct nft_elem_priv *
(*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *regs),
const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_set_ext **ext)
static const struct nft_set_ext *
nft_rhash_update(struct nft_set *set, const u32 *key,
const struct nft_expr *expr, struct nft_regs *regs)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
@@ -143,7 +139,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
if (he != NULL)
goto out;
elem_priv = new(set, expr, regs);
elem_priv = nft_dynset_new(set, expr, regs);
if (!elem_priv)
goto err1;
@@ -161,14 +157,13 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
}
out:
*ext = &he->ext;
return true;
return &he->ext;
err2:
nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
return false;
return NULL;
}
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
@@ -507,8 +502,9 @@ struct nft_hash_elem {
};
INDIRECT_CALLABLE_SCOPE
bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -519,12 +515,10 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
*ext = &he->ext;
return true;
}
nft_set_elem_active(&he->ext, genmask))
return &he->ext;
}
return false;
return NULL;
}
static struct nft_elem_priv *
@@ -547,9 +541,9 @@ nft_hash_get(const struct net *net, const struct nft_set *set,
}
INDIRECT_CALLABLE_SCOPE
bool nft_hash_lookup_fast(const struct net *net,
const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -562,12 +556,10 @@ bool nft_hash_lookup_fast(const struct net *net,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
if (k1 == k2 &&
nft_set_elem_active(&he->ext, genmask)) {
*ext = &he->ext;
return true;
}
nft_set_elem_active(&he->ext, genmask))
return &he->ext;
}
return false;
return NULL;
}
static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,

View File

@@ -397,34 +397,36 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
}
/**
* nft_pipapo_lookup() - Lookup function
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
* @ext: nftables API extension pointer, filled with matching reference
* pipapo_get() - Get matching element reference given key data
* @m: storage containing the set elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
*
* For more details, see DOC: Theory of Operation.
*
* Return: true on match, false otherwise.
* This is the main lookup function. It matches key data against either
* the working match set or the uncommitted copy, depending on what the
* caller passed to us.
* nft_pipapo_get (lookup from userspace/control plane) and nft_pipapo_lookup
* (datapath lookup) pass the active copy.
* The insertion path will pass the uncommitted working copy.
*
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
bool map_index;
int i;
local_bh_disable();
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
if (unlikely(!raw_cpu_ptr(m->scratch)))
goto out;
scratch = *raw_cpu_ptr(m->scratch);
@@ -444,12 +446,12 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* packet bytes value, then AND bucket value
*/
if (likely(f->bb == 8))
pipapo_and_field_buckets_8bit(f, res_map, rp);
pipapo_and_field_buckets_8bit(f, res_map, data);
else
pipapo_and_field_buckets_4bit(f, res_map, rp);
pipapo_and_field_buckets_4bit(f, res_map, data);
NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
@@ -465,13 +467,15 @@ next_match:
scratch->map_index = map_index;
local_bh_enable();
return false;
return NULL;
}
if (last) {
*ext = &f->mt[b].e->ext;
if (unlikely(nft_set_elem_expired(*ext) ||
!nft_set_elem_active(*ext, genmask)))
struct nft_pipapo_elem *e;
e = f->mt[b].e;
if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
!nft_set_elem_active(&e->ext, genmask)))
goto next_match;
/* Last field: we're just returning the key without
@@ -481,8 +485,7 @@ next_match:
*/
scratch->map_index = map_index;
local_bh_enable();
return true;
return e;
}
/* Swap bitmap indices: res_map is the initial bitmap for the
@@ -492,112 +495,38 @@ next_match:
map_index = !map_index;
swap(res_map, fill_map);
rp += NFT_PIPAPO_GROUPS_PADDING(f);
data += NFT_PIPAPO_GROUPS_PADDING(f);
}
out:
local_bh_enable();
return false;
return NULL;
}
/**
* pipapo_get() - Get matching element reference given key data
* nft_pipapo_lookup() - Dataplane fronted for main lookup function
* @net: Network namespace
* @set: nftables API set representation
* @m: storage containing active/existing elements
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
* @tstamp: timestamp to check for expired elements
* @gfp: the type of memory to allocate (see kmalloc).
* @key: pointer to nft registers containing key data
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
* bitmap results.
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy.
*
* Return: pointer to &struct nft_pipapo_elem on match, error pointer otherwise.
* Return: ntables API extension pointer or NULL if no match.
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
const struct nft_pipapo_match *m,
const u8 *data, u8 genmask,
u64 tstamp, gfp_t gfp)
const struct nft_set_ext *
nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
unsigned long *res_map, *fill_map = NULL;
const struct nft_pipapo_field *f;
int i;
struct nft_pipapo *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_elem *e;
if (m->bsize_max == 0)
return ret;
m = rcu_dereference(priv->match);
e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp);
if (!res_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
}
fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp);
if (!fill_map) {
ret = ERR_PTR(-ENOMEM);
goto out;
}
pipapo_resmap_init(m, res_map);
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1;
int b;
/* For each bit group: select lookup table bucket depending on
* packet bytes value, then AND bucket value
*/
if (f->bb == 8)
pipapo_and_field_buckets_8bit(f, res_map, data);
else if (f->bb == 4)
pipapo_and_field_buckets_4bit(f, res_map, data);
else
BUG();
data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
* pointer if any.
*
* Now res_map contains the matching bitmap, and fill_map is the
* bitmap for the next field.
*/
next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0)
goto out;
if (last) {
if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
ret = f->mt[b].e;
goto out;
}
data += NFT_PIPAPO_GROUPS_PADDING(f);
/* Swap bitmap indices: fill_map will be the initial bitmap for
* the next field (i.e. the new res_map), and res_map is
* guaranteed to be all-zeroes at this point, ready to be filled
* according to the next mapping table.
*/
swap(res_map, fill_map);
}
out:
kfree(fill_map);
kfree(res_map);
return ret;
return e ? &e->ext : NULL;
}
/**
@@ -606,6 +535,11 @@ out:
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @flags: Unused
*
* This function is called from the control plane path under
* RCU read lock.
*
* Return: set element private pointer or ERR_PTR(-ENOENT).
*/
static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
@@ -615,11 +549,10 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
nft_genmask_cur(net), get_jiffies_64(),
GFP_ATOMIC);
if (IS_ERR(e))
return ERR_CAST(e);
e = pipapo_get(m, (const u8 *)elem->key.val.data,
nft_genmask_cur(net), get_jiffies_64());
if (!e)
return ERR_PTR(-ENOENT);
return &e->priv;
}
@@ -1219,7 +1152,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c
mem = s;
mem -= s->align_off;
kfree(mem);
kvfree(mem);
}
/**
@@ -1240,10 +1173,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
void *scratch_aligned;
u32 align_off;
#endif
scratch = kzalloc_node(struct_size(scratch, map,
bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
* allocations: this means that some scratch maps have
@@ -1345,8 +1277,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
if (!IS_ERR(dup)) {
dup = pipapo_get(m, start, genmask, tstamp);
if (dup) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1365,15 +1297,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
return -ENOTEMPTY;
}
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
GFP_KERNEL);
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
/* Look for partially overlapping entries */
dup = pipapo_get(m, end, nft_genmask_next(net), tstamp);
if (dup) {
*elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1914,9 +1840,9 @@ nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
if (!m)
return NULL;
e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
if (IS_ERR(e))
e = pipapo_get(m, (const u8 *)elem->key.val.data,
nft_genmask_next(net), nft_net_tstamp(net));
if (!e)
return NULL;
nft_set_elem_change_active(net, set, &e->ext);

View File

@@ -1137,7 +1137,6 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
* @ext: nftables API extension pointer, filled with matching reference
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
*
@@ -1146,8 +1145,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
*
* Return: true on match, false otherwise.
*/
bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
@@ -1155,17 +1155,18 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
const struct nft_set_ext *ext;
unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
int i;
local_bh_disable();
if (unlikely(!irq_fpu_usable())) {
bool fallback_res = nft_pipapo_lookup(net, set, key, ext);
ext = nft_pipapo_lookup(net, set, key);
local_bh_enable();
return fallback_res;
return ext;
}
m = rcu_dereference(priv->match);
@@ -1182,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
if (unlikely(!scratch)) {
kernel_fpu_end();
local_bh_enable();
return false;
return NULL;
}
map_index = scratch->map_index;
@@ -1197,6 +1198,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
next_match:
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1, first = !i;
int ret = 0;
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
@@ -1244,10 +1246,10 @@ next_match:
goto out;
if (last) {
*ext = &f->mt[ret].e->ext;
if (unlikely(nft_set_elem_expired(*ext) ||
!nft_set_elem_active(*ext, genmask))) {
ret = 0;
ext = &f->mt[ret].e->ext;
if (unlikely(nft_set_elem_expired(ext) ||
!nft_set_elem_active(ext, genmask))) {
ext = NULL;
goto next_match;
}
@@ -1264,5 +1266,5 @@ out:
kernel_fpu_end();
local_bh_enable();
return ret >= 0;
return ext;
}

View File

@@ -52,9 +52,9 @@ static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
return nft_set_elem_expired(&rbe->ext);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
static const struct nft_set_ext *
__nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -65,7 +65,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
if (read_seqcount_retry(&priv->count, seq))
return false;
return NULL;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -87,50 +87,48 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
}
if (nft_rbtree_elem_expired(rbe))
return false;
return NULL;
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
return false;
return NULL;
parent = rcu_dereference_raw(parent->rb_left);
interval = NULL;
continue;
}
*ext = &rbe->ext;
return true;
return &rbe->ext;
}
}
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
nft_rbtree_interval_start(interval))
return &interval->ext;
return false;
return NULL;
}
INDIRECT_CALLABLE_SCOPE
bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
const struct nft_set_ext *
nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
bool ret;
const struct nft_set_ext *ext;
ret = __nft_rbtree_lookup(net, set, key, ext, seq);
if (ret || !read_seqcount_retry(&priv->count, seq))
return ret;
ext = __nft_rbtree_lookup(net, set, key, seq);
if (ext || !read_seqcount_retry(&priv->count, seq))
return ext;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
ret = __nft_rbtree_lookup(net, set, key, ext, seq);
ext = __nft_rbtree_lookup(net, set, key, seq);
read_unlock_bh(&priv->lock);
return ret;
return ext;
}
static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,

View File

@@ -1317,12 +1317,13 @@ void xt_compat_unlock(u_int8_t af)
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
struct static_key xt_tee_enabled __read_mostly;
EXPORT_SYMBOL_GPL(xt_tee_enabled);
#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
@@ -1514,6 +1515,7 @@ void *xt_unregister_table(struct xt_table *table)
return private;
}
EXPORT_SYMBOL_GPL(xt_unregister_table);
#endif
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1897,6 +1899,7 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
/**
* xt_percpu_counter_alloc - allocate x_tables rule counter
*
@@ -1951,6 +1954,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
free_percpu((void __percpu *)pcnt);
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
#endif
static int __net_init xt_net_init(struct net *net)
{
@@ -1983,8 +1987,10 @@ static int __init xt_init(void)
unsigned int i;
int rv;
for_each_possible_cpu(i) {
seqcount_init(&per_cpu(xt_recseq, i));
if (IS_ENABLED(CONFIG_NETFILTER_XTABLES_LEGACY)) {
for_each_possible_cpu(i) {
seqcount_init(&per_cpu(xt_recseq, i));
}
}
xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);

View File

@@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
pr_info_ratelimited("accounting object `%s' does not exists\n",
info->name);
pr_info_ratelimited("accounting object `%.*s' does not exist\n",
NFACCT_NAME_MAX, info->name);
return -ENOENT;
}
info->nfacct = nfacct;

View File

@@ -97,6 +97,7 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NETFILTER_INGRESS=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_FLOW_TABLE=y
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NETFILTER_NETLINK=y

View File

@@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_MATCH_COMMENT=y

View File

@@ -30,16 +30,25 @@ CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_TARGET_HL=m
CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m

View File

@@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m

View File

@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
@@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
CONFIG_INET_DIAG=m
CONFIG_SCTP_DIAG=m
CONFIG_INET_SCTP_DIAG=m

View File

@@ -151,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
ip netns exec "${ns1}" modprobe -q ipip
modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -160,10 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service

View File

@@ -10,6 +10,8 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
checktool "iperf3 --version" "run test without iperf3 tool"
read kernel_tainted < /proc/sys/kernel/tainted
# how many seconds to torture the kernel?
# default to 80% of max run time but don't exceed 48s
TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
@@ -135,7 +137,8 @@ else
wait
fi
[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
echo "FAIL: Kernel is tainted!"
exit $ksft_fail
}

View File

@@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y