mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	netfilter: connlimit: use rbtree for per-host conntrack obj storage
With current match design every invocation of the connlimit_match function means we have to perform (number_of_conntracks % 256) lookups in the conntrack table [ to perform GC/delete stale entries ]. This is also the reason why ____nf_conntrack_find() in perf top has > 20% cpu time per core. This patch changes the storage to rbtree which cuts down the number of ct objects that need testing. When looking up a new tuple, we only test the connections of the host objects we visit while searching for the wanted host/network (or the leaf we need to insert at). The slot count is reduced to 32. Increasing slot count doesn't speed up things much because of rbtree nature. before patch (50kpps rx, 10kpps tx): + 20.95% ksoftirqd/0 [nf_conntrack] [k] ____nf_conntrack_find + 20.50% ksoftirqd/1 [nf_conntrack] [k] ____nf_conntrack_find + 20.27% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 5.76% ksoftirqd/1 [nf_conntrack] [k] hash_conntrack_raw + 5.39% ksoftirqd/2 [nf_conntrack] [k] hash_conntrack_raw + 5.35% ksoftirqd/0 [nf_conntrack] [k] hash_conntrack_raw after (90kpps, 51kpps tx): + 17.24% swapper [nf_conntrack] [k] ____nf_conntrack_find + 6.60% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 2.73% swapper [nf_conntrack] [k] hash_conntrack_raw + 2.36% swapper [xt_connlimit] [k] count_tree Obvious disadvantages to previous version are the increase in code complexity and the increased memory cost. Partially based on Eric Dumazets fq scheduler. Reviewed-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
		
							parent
							
								
									50e0e9b129
								
							
						
					
					
						commit
						7d08487777
					
				| @ -19,6 +19,7 @@ | |||||||
| #include <linux/jhash.h> | #include <linux/jhash.h> | ||||||
| #include <linux/slab.h> | #include <linux/slab.h> | ||||||
| #include <linux/list.h> | #include <linux/list.h> | ||||||
|  | #include <linux/rbtree.h> | ||||||
| #include <linux/module.h> | #include <linux/module.h> | ||||||
| #include <linux/random.h> | #include <linux/random.h> | ||||||
| #include <linux/skbuff.h> | #include <linux/skbuff.h> | ||||||
| @ -31,8 +32,9 @@ | |||||||
| #include <net/netfilter/nf_conntrack_tuple.h> | #include <net/netfilter/nf_conntrack_tuple.h> | ||||||
| #include <net/netfilter/nf_conntrack_zones.h> | #include <net/netfilter/nf_conntrack_zones.h> | ||||||
| 
 | 
 | ||||||
| #define CONNLIMIT_SLOTS	256 | #define CONNLIMIT_SLOTS		32 | ||||||
| #define CONNLIMIT_LOCK_SLOTS	32 | #define CONNLIMIT_LOCK_SLOTS	32 | ||||||
|  | #define CONNLIMIT_GC_MAX_NODES	8 | ||||||
| 
 | 
 | ||||||
| /* we will save the tuples of all connections we care about */ | /* we will save the tuples of all connections we care about */ | ||||||
| struct xt_connlimit_conn { | struct xt_connlimit_conn { | ||||||
| @ -41,12 +43,20 @@ struct xt_connlimit_conn { | |||||||
| 	union nf_inet_addr		addr; | 	union nf_inet_addr		addr; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct xt_connlimit_rb { | ||||||
|  | 	struct rb_node node; | ||||||
|  | 	struct hlist_head hhead; /* connections/hosts in same subnet */ | ||||||
|  | 	union nf_inet_addr addr; /* search key */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct xt_connlimit_data { | struct xt_connlimit_data { | ||||||
| 	struct hlist_head	iphash[CONNLIMIT_SLOTS]; | 	struct rb_root climit_root4[CONNLIMIT_SLOTS]; | ||||||
|  | 	struct rb_root climit_root6[CONNLIMIT_SLOTS]; | ||||||
| 	spinlock_t		locks[CONNLIMIT_LOCK_SLOTS]; | 	spinlock_t		locks[CONNLIMIT_LOCK_SLOTS]; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static u_int32_t connlimit_rnd __read_mostly; | static u_int32_t connlimit_rnd __read_mostly; | ||||||
|  | static struct kmem_cache *connlimit_rb_cachep __read_mostly; | ||||||
| static struct kmem_cache *connlimit_conn_cachep __read_mostly; | static struct kmem_cache *connlimit_conn_cachep __read_mostly; | ||||||
| 
 | 
 | ||||||
| static inline unsigned int connlimit_iphash(__be32 addr) | static inline unsigned int connlimit_iphash(__be32 addr) | ||||||
| @ -99,19 +109,33 @@ same_source_net(const union nf_inet_addr *addr, | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int count_hlist(struct net *net, | static bool add_hlist(struct hlist_head *head, | ||||||
|  | 		      const struct nf_conntrack_tuple *tuple, | ||||||
|  | 		      const union nf_inet_addr *addr) | ||||||
|  | { | ||||||
|  | 	struct xt_connlimit_conn *conn; | ||||||
|  | 
 | ||||||
|  | 	conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); | ||||||
|  | 	if (conn == NULL) | ||||||
|  | 		return false; | ||||||
|  | 	conn->tuple = *tuple; | ||||||
|  | 	conn->addr = *addr; | ||||||
|  | 	hlist_add_head(&conn->node, head); | ||||||
|  | 	return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static unsigned int check_hlist(struct net *net, | ||||||
| 				struct hlist_head *head, | 				struct hlist_head *head, | ||||||
| 				const struct nf_conntrack_tuple *tuple, | 				const struct nf_conntrack_tuple *tuple, | ||||||
| 		       const union nf_inet_addr *addr, | 				bool *addit) | ||||||
| 		       const union nf_inet_addr *mask, |  | ||||||
| 		       u_int8_t family, bool *addit) |  | ||||||
| { | { | ||||||
| 	const struct nf_conntrack_tuple_hash *found; | 	const struct nf_conntrack_tuple_hash *found; | ||||||
| 	struct xt_connlimit_conn *conn; | 	struct xt_connlimit_conn *conn; | ||||||
| 	struct hlist_node *n; | 	struct hlist_node *n; | ||||||
| 	struct nf_conn *found_ct; | 	struct nf_conn *found_ct; | ||||||
| 	int matches = 0; | 	unsigned int length = 0; | ||||||
| 
 | 
 | ||||||
|  | 	*addit = true; | ||||||
| 	rcu_read_lock(); | 	rcu_read_lock(); | ||||||
| 
 | 
 | ||||||
| 	/* check the saved connections */ | 	/* check the saved connections */ | ||||||
| @ -144,30 +168,114 @@ static int count_hlist(struct net *net, | |||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		if (same_source_net(addr, mask, &conn->addr, family) == 0) |  | ||||||
| 			/* same source network -> be counted! */ |  | ||||||
| 			++matches; |  | ||||||
| 		nf_ct_put(found_ct); | 		nf_ct_put(found_ct); | ||||||
|  | 		length++; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	rcu_read_unlock(); | 	rcu_read_unlock(); | ||||||
| 
 | 
 | ||||||
| 	return matches; | 	return length; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool add_hlist(struct hlist_head *head, | static void tree_nodes_free(struct rb_root *root, | ||||||
| 		      const struct nf_conntrack_tuple *tuple, | 			    struct xt_connlimit_rb *gc_nodes[], | ||||||
| 		      const union nf_inet_addr *addr) | 			    unsigned int gc_count) | ||||||
| { | { | ||||||
|  | 	struct xt_connlimit_rb *rbconn; | ||||||
|  | 
 | ||||||
|  | 	while (gc_count) { | ||||||
|  | 		rbconn = gc_nodes[--gc_count]; | ||||||
|  | 		rb_erase(&rbconn->node, root); | ||||||
|  | 		kmem_cache_free(connlimit_rb_cachep, rbconn); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static unsigned int | ||||||
|  | count_tree(struct net *net, struct rb_root *root, | ||||||
|  | 	   const struct nf_conntrack_tuple *tuple, | ||||||
|  | 	   const union nf_inet_addr *addr, const union nf_inet_addr *mask, | ||||||
|  | 	   u8 family) | ||||||
|  | { | ||||||
|  | 	struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; | ||||||
|  | 	struct rb_node **rbnode, *parent; | ||||||
|  | 	struct xt_connlimit_rb *rbconn; | ||||||
| 	struct xt_connlimit_conn *conn; | 	struct xt_connlimit_conn *conn; | ||||||
|  | 	unsigned int gc_count; | ||||||
|  | 	bool no_gc = false; | ||||||
|  | 
 | ||||||
|  |  restart: | ||||||
|  | 	gc_count = 0; | ||||||
|  | 	parent = NULL; | ||||||
|  | 	rbnode = &(root->rb_node); | ||||||
|  | 	while (*rbnode) { | ||||||
|  | 		int diff; | ||||||
|  | 		bool addit; | ||||||
|  | 
 | ||||||
|  | 		rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); | ||||||
|  | 
 | ||||||
|  | 		parent = *rbnode; | ||||||
|  | 		diff = same_source_net(addr, mask, &rbconn->addr, family); | ||||||
|  | 		if (diff < 0) { | ||||||
|  | 			rbnode = &((*rbnode)->rb_left); | ||||||
|  | 		} else if (diff > 0) { | ||||||
|  | 			rbnode = &((*rbnode)->rb_right); | ||||||
|  | 		} else { | ||||||
|  | 			/* same source network -> be counted! */ | ||||||
|  | 			unsigned int count; | ||||||
|  | 			count = check_hlist(net, &rbconn->hhead, tuple, &addit); | ||||||
|  | 
 | ||||||
|  | 			tree_nodes_free(root, gc_nodes, gc_count); | ||||||
|  | 			if (!addit) | ||||||
|  | 				return count; | ||||||
|  | 
 | ||||||
|  | 			if (!add_hlist(&rbconn->hhead, tuple, addr)) | ||||||
|  | 				return 0; /* hotdrop */ | ||||||
|  | 
 | ||||||
|  | 			return count + 1; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		/* only used for GC on hhead, retval and 'addit' ignored */ | ||||||
|  | 		check_hlist(net, &rbconn->hhead, tuple, &addit); | ||||||
|  | 		if (hlist_empty(&rbconn->hhead)) | ||||||
|  | 			gc_nodes[gc_count++] = rbconn; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (gc_count) { | ||||||
|  | 		no_gc = true; | ||||||
|  | 		tree_nodes_free(root, gc_nodes, gc_count); | ||||||
|  | 		/* tree_node_free before new allocation permits
 | ||||||
|  | 		 * allocator to re-use newly free'd object. | ||||||
|  | 		 * | ||||||
|  | 		 * This is a rare event; in most cases we will find | ||||||
|  | 		 * existing node to re-use. (or gc_count is 0). | ||||||
|  | 		 */ | ||||||
|  | 		goto restart; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* no match, need to insert new node */ | ||||||
|  | 	rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); | ||||||
|  | 	if (rbconn == NULL) | ||||||
|  | 		return 0; | ||||||
| 
 | 
 | ||||||
| 	conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); | 	conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); | ||||||
| 	if (conn == NULL) | 	if (conn == NULL) { | ||||||
| 		return false; | 		kmem_cache_free(connlimit_rb_cachep, rbconn); | ||||||
|  | 		return 0; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	conn->tuple = *tuple; | 	conn->tuple = *tuple; | ||||||
| 	conn->addr = *addr; | 	conn->addr = *addr; | ||||||
| 	hlist_add_head(&conn->node, head); | 	rbconn->addr = *addr; | ||||||
| 	return true; | 
 | ||||||
|  | 	INIT_HLIST_HEAD(&rbconn->hhead); | ||||||
|  | 	hlist_add_head(&conn->node, &rbconn->hhead); | ||||||
|  | 
 | ||||||
|  | 	rb_link_node(&rbconn->node, parent, rbnode); | ||||||
|  | 	rb_insert_color(&rbconn->node, root); | ||||||
|  | 	return 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int count_them(struct net *net, | static int count_them(struct net *net, | ||||||
| @ -177,26 +285,22 @@ static int count_them(struct net *net, | |||||||
| 		      const union nf_inet_addr *mask, | 		      const union nf_inet_addr *mask, | ||||||
| 		      u_int8_t family) | 		      u_int8_t family) | ||||||
| { | { | ||||||
| 	struct hlist_head *hhead; | 	struct rb_root *root; | ||||||
| 	int count; | 	int count; | ||||||
| 	u32 hash; | 	u32 hash; | ||||||
| 	bool addit = true; |  | ||||||
| 
 | 
 | ||||||
| 	if (family == NFPROTO_IPV6) | 	if (family == NFPROTO_IPV6) { | ||||||
| 		hash = connlimit_iphash6(addr, mask); | 		hash = connlimit_iphash6(addr, mask); | ||||||
| 	else | 		root = &data->climit_root6[hash]; | ||||||
|  | 	} else { | ||||||
| 		hash = connlimit_iphash(addr->ip & mask->ip); | 		hash = connlimit_iphash(addr->ip & mask->ip); | ||||||
| 
 | 		root = &data->climit_root4[hash]; | ||||||
| 	hhead = &data->iphash[hash]; | 	} | ||||||
| 
 | 
 | ||||||
| 	spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | 	spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | ||||||
| 	count = count_hlist(net, hhead, tuple, addr, mask, family, &addit); | 
 | ||||||
| 	if (addit) { | 	count = count_tree(net, root, tuple, addr, mask, family); | ||||||
| 		if (add_hlist(hhead, tuple, addr)) | 
 | ||||||
| 			count++; |  | ||||||
| 		else |  | ||||||
| 			count = -ENOMEM; |  | ||||||
| 	} |  | ||||||
| 	spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | 	spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | ||||||
| 
 | 
 | ||||||
| 	return count; | 	return count; | ||||||
| @ -212,7 +316,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||||||
| 	const struct nf_conntrack_tuple *tuple_ptr = &tuple; | 	const struct nf_conntrack_tuple *tuple_ptr = &tuple; | ||||||
| 	enum ip_conntrack_info ctinfo; | 	enum ip_conntrack_info ctinfo; | ||||||
| 	const struct nf_conn *ct; | 	const struct nf_conn *ct; | ||||||
| 	int connections; | 	unsigned int connections; | ||||||
| 
 | 
 | ||||||
| 	ct = nf_ct_get(skb, &ctinfo); | 	ct = nf_ct_get(skb, &ctinfo); | ||||||
| 	if (ct != NULL) | 	if (ct != NULL) | ||||||
| @ -233,7 +337,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) | |||||||
| 
 | 
 | ||||||
| 	connections = count_them(net, info->data, tuple_ptr, &addr, | 	connections = count_them(net, info->data, tuple_ptr, &addr, | ||||||
| 	                         &info->mask, par->family); | 	                         &info->mask, par->family); | ||||||
| 	if (connections < 0) | 	if (connections == 0) | ||||||
| 		/* kmalloc failed, drop it entirely */ | 		/* kmalloc failed, drop it entirely */ | ||||||
| 		goto hotdrop; | 		goto hotdrop; | ||||||
| 
 | 
 | ||||||
| @ -276,28 +380,44 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) | |||||||
| 	for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) | 	for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) | ||||||
| 		spin_lock_init(&info->data->locks[i]); | 		spin_lock_init(&info->data->locks[i]); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) | 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) | ||||||
| 		INIT_HLIST_HEAD(&info->data->iphash[i]); | 		info->data->climit_root4[i] = RB_ROOT; | ||||||
|  | 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) | ||||||
|  | 		info->data->climit_root6[i] = RB_ROOT; | ||||||
| 
 | 
 | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static void destroy_tree(struct rb_root *r) | ||||||
|  | { | ||||||
|  | 	struct xt_connlimit_conn *conn; | ||||||
|  | 	struct xt_connlimit_rb *rbconn; | ||||||
|  | 	struct hlist_node *n; | ||||||
|  | 	struct rb_node *node; | ||||||
|  | 
 | ||||||
|  | 	while ((node = rb_first(r)) != NULL) { | ||||||
|  | 		rbconn = container_of(node, struct xt_connlimit_rb, node); | ||||||
|  | 
 | ||||||
|  | 		rb_erase(node, r); | ||||||
|  | 
 | ||||||
|  | 		hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) | ||||||
|  | 			kmem_cache_free(connlimit_conn_cachep, conn); | ||||||
|  | 
 | ||||||
|  | 		kmem_cache_free(connlimit_rb_cachep, rbconn); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) | static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) | ||||||
| { | { | ||||||
| 	const struct xt_connlimit_info *info = par->matchinfo; | 	const struct xt_connlimit_info *info = par->matchinfo; | ||||||
| 	struct xt_connlimit_conn *conn; |  | ||||||
| 	struct hlist_node *n; |  | ||||||
| 	struct hlist_head *hash = info->data->iphash; |  | ||||||
| 	unsigned int i; | 	unsigned int i; | ||||||
| 
 | 
 | ||||||
| 	nf_ct_l3proto_module_put(par->family); | 	nf_ct_l3proto_module_put(par->family); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { | 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) | ||||||
| 		hlist_for_each_entry_safe(conn, n, &hash[i], node) { | 		destroy_tree(&info->data->climit_root4[i]); | ||||||
| 			hlist_del(&conn->node); | 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) | ||||||
| 			kmem_cache_free(connlimit_conn_cachep, conn); | 		destroy_tree(&info->data->climit_root6[i]); | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	kfree(info->data); | 	kfree(info->data); | ||||||
| } | } | ||||||
| @ -326,9 +446,18 @@ static int __init connlimit_mt_init(void) | |||||||
| 	if (!connlimit_conn_cachep) | 	if (!connlimit_conn_cachep) | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 
 | 
 | ||||||
| 	ret = xt_register_match(&connlimit_mt_reg); | 	connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", | ||||||
| 	if (ret != 0) | 					   sizeof(struct xt_connlimit_rb), | ||||||
|  | 					   0, 0, NULL); | ||||||
|  | 	if (!connlimit_rb_cachep) { | ||||||
| 		kmem_cache_destroy(connlimit_conn_cachep); | 		kmem_cache_destroy(connlimit_conn_cachep); | ||||||
|  | 		return -ENOMEM; | ||||||
|  | 	} | ||||||
|  | 	ret = xt_register_match(&connlimit_mt_reg); | ||||||
|  | 	if (ret != 0) { | ||||||
|  | 		kmem_cache_destroy(connlimit_conn_cachep); | ||||||
|  | 		kmem_cache_destroy(connlimit_rb_cachep); | ||||||
|  | 	} | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -336,6 +465,7 @@ static void __exit connlimit_mt_exit(void) | |||||||
| { | { | ||||||
| 	xt_unregister_match(&connlimit_mt_reg); | 	xt_unregister_match(&connlimit_mt_reg); | ||||||
| 	kmem_cache_destroy(connlimit_conn_cachep); | 	kmem_cache_destroy(connlimit_conn_cachep); | ||||||
|  | 	kmem_cache_destroy(connlimit_rb_cachep); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| module_init(connlimit_mt_init); | module_init(connlimit_mt_init); | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Florian Westphal
						Florian Westphal