mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	netfilter: xtables: don't save/restore jumpstack offset
In most cases there is no reentrancy into ip/ip6tables. For skbs sent by REJECT or SYNPROXY targets, there is one level of reentrancy, but its not relevant as those targets issue an absolute verdict, i.e. the jumpstack can be clobbered since its not used after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc). So the only special case where it is relevant is the TEE target, which returns XT_CONTINUE. This patch changes ip(6)_do_table to always use the jump stack starting from 0. When we detect we're operating on an skb sent via TEE (percpu nf_skb_duplicated is 1) we switch to an alternate stack to leave the original one alone. Since there is no TEE support for arptables, it doesn't need to test if tee is active. The jump stack overflow tests are no longer needed as well -- since ->stacksize is the largest call depth we cannot exceed it. A much better alternative to the external jumpstack would be to just declare a jumps[32] stack on the local stack frame, but that would mean we'd have to reject iptables rulesets that used to work before. Another alternative would be to start rejecting rulesets with a larger call depth, e.g. 1000 -- in this case it would be feasible to allocate the entire stack in the percpu area which would avoid one dereference. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
		
							parent
							
								
									e7c8899f3e
								
							
						
					
					
						commit
						7814b6ec6d
					
				| @ -222,7 +222,6 @@ struct xt_table_info { | |||||||
| 	 * @stacksize jumps (number of user chains) can possibly be made. | 	 * @stacksize jumps (number of user chains) can possibly be made. | ||||||
| 	 */ | 	 */ | ||||||
| 	unsigned int stacksize; | 	unsigned int stacksize; | ||||||
| 	unsigned int __percpu *stackptr; |  | ||||||
| 	void ***jumpstack; | 	void ***jumpstack; | ||||||
| 
 | 
 | ||||||
| 	unsigned char entries[0] __aligned(8); | 	unsigned char entries[0] __aligned(8); | ||||||
|  | |||||||
| @ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||||||
| 	table_base = private->entries; | 	table_base = private->entries; | ||||||
| 	jumpstack  = (struct arpt_entry **)private->jumpstack[cpu]; | 	jumpstack  = (struct arpt_entry **)private->jumpstack[cpu]; | ||||||
| 
 | 
 | ||||||
|  | 	/* No TEE support for arptables, so no need to switch to alternate
 | ||||||
|  | 	 * stack.  All targets that reenter must return absolute verdicts. | ||||||
|  | 	 */ | ||||||
| 	e = get_entry(table_base, private->hook_entry[hook]); | 	e = get_entry(table_base, private->hook_entry[hook]); | ||||||
| 
 | 
 | ||||||
| 	acpar.in      = state->in; | 	acpar.in      = state->in; | ||||||
| @ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||||||
| 			} | 			} | ||||||
| 			if (table_base + v | 			if (table_base + v | ||||||
| 			    != arpt_next_entry(e)) { | 			    != arpt_next_entry(e)) { | ||||||
| 
 |  | ||||||
| 				if (stackidx >= private->stacksize) { |  | ||||||
| 					verdict = NF_DROP; |  | ||||||
| 					break; |  | ||||||
| 				} |  | ||||||
| 				jumpstack[stackidx++] = e; | 				jumpstack[stackidx++] = e; | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| @ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		/* Targets which reenter must return
 |  | ||||||
| 		 * abs. verdicts |  | ||||||
| 		 */ |  | ||||||
| 		acpar.target   = t->u.kernel.target; | 		acpar.target   = t->u.kernel.target; | ||||||
| 		acpar.targinfo = t->data; | 		acpar.targinfo = t->data; | ||||||
| 		verdict = t->u.kernel.target->target(skb, &acpar); | 		verdict = t->u.kernel.target->target(skb, &acpar); | ||||||
|  | |||||||
| @ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb, | |||||||
| 	const char *indev, *outdev; | 	const char *indev, *outdev; | ||||||
| 	const void *table_base; | 	const void *table_base; | ||||||
| 	struct ipt_entry *e, **jumpstack; | 	struct ipt_entry *e, **jumpstack; | ||||||
| 	unsigned int *stackptr, origptr, cpu; | 	unsigned int stackidx, cpu; | ||||||
| 	const struct xt_table_info *private; | 	const struct xt_table_info *private; | ||||||
| 	struct xt_action_param acpar; | 	struct xt_action_param acpar; | ||||||
| 	unsigned int addend; | 	unsigned int addend; | ||||||
| 
 | 
 | ||||||
| 	/* Initialization */ | 	/* Initialization */ | ||||||
|  | 	stackidx = 0; | ||||||
| 	ip = ip_hdr(skb); | 	ip = ip_hdr(skb); | ||||||
| 	indev = state->in ? state->in->name : nulldevname; | 	indev = state->in ? state->in->name : nulldevname; | ||||||
| 	outdev = state->out ? state->out->name : nulldevname; | 	outdev = state->out ? state->out->name : nulldevname; | ||||||
| @ -331,13 +332,20 @@ ipt_do_table(struct sk_buff *skb, | |||||||
| 	smp_read_barrier_depends(); | 	smp_read_barrier_depends(); | ||||||
| 	table_base = private->entries; | 	table_base = private->entries; | ||||||
| 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu]; | 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu]; | ||||||
| 	stackptr   = per_cpu_ptr(private->stackptr, cpu); | 
 | ||||||
| 	origptr    = *stackptr; | 	/* Switch to alternate jumpstack if we're being invoked via TEE.
 | ||||||
|  | 	 * TEE issues XT_CONTINUE verdict on original skb so we must not | ||||||
|  | 	 * clobber the jumpstack. | ||||||
|  | 	 * | ||||||
|  | 	 * For recursion via REJECT or SYNPROXY the stack will be clobbered | ||||||
|  | 	 * but it is no problem since absolute verdict is issued by these. | ||||||
|  | 	 */ | ||||||
|  | 	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); | ||||||
| 
 | 
 | ||||||
| 	e = get_entry(table_base, private->hook_entry[hook]); | 	e = get_entry(table_base, private->hook_entry[hook]); | ||||||
| 
 | 
 | ||||||
| 	pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", | 	pr_debug("Entering %s(hook %u), UF %p\n", | ||||||
| 		 table->name, hook, origptr, | 		 table->name, hook, | ||||||
| 		 get_entry(table_base, private->underflow[hook])); | 		 get_entry(table_base, private->underflow[hook])); | ||||||
| 
 | 
 | ||||||
| 	do { | 	do { | ||||||
| @ -383,28 +391,24 @@ ipt_do_table(struct sk_buff *skb, | |||||||
| 					verdict = (unsigned int)(-v) - 1; | 					verdict = (unsigned int)(-v) - 1; | ||||||
| 					break; | 					break; | ||||||
| 				} | 				} | ||||||
| 				if (*stackptr <= origptr) { | 				if (stackidx == 0) { | ||||||
| 					e = get_entry(table_base, | 					e = get_entry(table_base, | ||||||
| 					    private->underflow[hook]); | 					    private->underflow[hook]); | ||||||
| 					pr_debug("Underflow (this is normal) " | 					pr_debug("Underflow (this is normal) " | ||||||
| 						 "to %p\n", e); | 						 "to %p\n", e); | ||||||
| 				} else { | 				} else { | ||||||
| 					e = jumpstack[--*stackptr]; | 					e = jumpstack[--stackidx]; | ||||||
| 					pr_debug("Pulled %p out from pos %u\n", | 					pr_debug("Pulled %p out from pos %u\n", | ||||||
| 						 e, *stackptr); | 						 e, stackidx); | ||||||
| 					e = ipt_next_entry(e); | 					e = ipt_next_entry(e); | ||||||
| 				} | 				} | ||||||
| 				continue; | 				continue; | ||||||
| 			} | 			} | ||||||
| 			if (table_base + v != ipt_next_entry(e) && | 			if (table_base + v != ipt_next_entry(e) && | ||||||
| 			    !(e->ip.flags & IPT_F_GOTO)) { | 			    !(e->ip.flags & IPT_F_GOTO)) { | ||||||
| 				if (*stackptr >= private->stacksize) { | 				jumpstack[stackidx++] = e; | ||||||
| 					verdict = NF_DROP; |  | ||||||
| 					break; |  | ||||||
| 				} |  | ||||||
| 				jumpstack[(*stackptr)++] = e; |  | ||||||
| 				pr_debug("Pushed %p into pos %u\n", | 				pr_debug("Pushed %p into pos %u\n", | ||||||
| 					 e, *stackptr - 1); | 					 e, stackidx - 1); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			e = get_entry(table_base, v); | 			e = get_entry(table_base, v); | ||||||
| @ -423,9 +427,8 @@ ipt_do_table(struct sk_buff *skb, | |||||||
| 			/* Verdict */ | 			/* Verdict */ | ||||||
| 			break; | 			break; | ||||||
| 	} while (!acpar.hotdrop); | 	} while (!acpar.hotdrop); | ||||||
| 	pr_debug("Exiting %s; resetting sp from %u to %u\n", | 	pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); | ||||||
| 		 __func__, *stackptr, origptr); | 
 | ||||||
| 	*stackptr = origptr; |  | ||||||
|  	xt_write_recseq_end(addend); |  	xt_write_recseq_end(addend); | ||||||
|  	local_bh_enable(); |  	local_bh_enable(); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb, | |||||||
| 	const char *indev, *outdev; | 	const char *indev, *outdev; | ||||||
| 	const void *table_base; | 	const void *table_base; | ||||||
| 	struct ip6t_entry *e, **jumpstack; | 	struct ip6t_entry *e, **jumpstack; | ||||||
| 	unsigned int *stackptr, origptr, cpu; | 	unsigned int stackidx, cpu; | ||||||
| 	const struct xt_table_info *private; | 	const struct xt_table_info *private; | ||||||
| 	struct xt_action_param acpar; | 	struct xt_action_param acpar; | ||||||
| 	unsigned int addend; | 	unsigned int addend; | ||||||
| 
 | 
 | ||||||
| 	/* Initialization */ | 	/* Initialization */ | ||||||
|  | 	stackidx = 0; | ||||||
| 	indev = state->in ? state->in->name : nulldevname; | 	indev = state->in ? state->in->name : nulldevname; | ||||||
| 	outdev = state->out ? state->out->name : nulldevname; | 	outdev = state->out ? state->out->name : nulldevname; | ||||||
| 	/* We handle fragments by dealing with the first fragment as
 | 	/* We handle fragments by dealing with the first fragment as
 | ||||||
| @ -357,8 +358,15 @@ ip6t_do_table(struct sk_buff *skb, | |||||||
| 	cpu        = smp_processor_id(); | 	cpu        = smp_processor_id(); | ||||||
| 	table_base = private->entries; | 	table_base = private->entries; | ||||||
| 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu]; | 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu]; | ||||||
| 	stackptr   = per_cpu_ptr(private->stackptr, cpu); | 
 | ||||||
| 	origptr    = *stackptr; | 	/* Switch to alternate jumpstack if we're being invoked via TEE.
 | ||||||
|  | 	 * TEE issues XT_CONTINUE verdict on original skb so we must not | ||||||
|  | 	 * clobber the jumpstack. | ||||||
|  | 	 * | ||||||
|  | 	 * For recursion via REJECT or SYNPROXY the stack will be clobbered | ||||||
|  | 	 * but it is no problem since absolute verdict is issued by these. | ||||||
|  | 	 */ | ||||||
|  | 	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); | ||||||
| 
 | 
 | ||||||
| 	e = get_entry(table_base, private->hook_entry[hook]); | 	e = get_entry(table_base, private->hook_entry[hook]); | ||||||
| 
 | 
 | ||||||
| @ -406,20 +414,16 @@ ip6t_do_table(struct sk_buff *skb, | |||||||
| 					verdict = (unsigned int)(-v) - 1; | 					verdict = (unsigned int)(-v) - 1; | ||||||
| 					break; | 					break; | ||||||
| 				} | 				} | ||||||
| 				if (*stackptr <= origptr) | 				if (stackidx == 0) | ||||||
| 					e = get_entry(table_base, | 					e = get_entry(table_base, | ||||||
| 					    private->underflow[hook]); | 					    private->underflow[hook]); | ||||||
| 				else | 				else | ||||||
| 					e = ip6t_next_entry(jumpstack[--*stackptr]); | 					e = ip6t_next_entry(jumpstack[--stackidx]); | ||||||
| 				continue; | 				continue; | ||||||
| 			} | 			} | ||||||
| 			if (table_base + v != ip6t_next_entry(e) && | 			if (table_base + v != ip6t_next_entry(e) && | ||||||
| 			    !(e->ipv6.flags & IP6T_F_GOTO)) { | 			    !(e->ipv6.flags & IP6T_F_GOTO)) { | ||||||
| 				if (*stackptr >= private->stacksize) { | 				jumpstack[stackidx++] = e; | ||||||
| 					verdict = NF_DROP; |  | ||||||
| 					break; |  | ||||||
| 				} |  | ||||||
| 				jumpstack[(*stackptr)++] = e; |  | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			e = get_entry(table_base, v); | 			e = get_entry(table_base, v); | ||||||
| @ -437,8 +441,6 @@ ip6t_do_table(struct sk_buff *skb, | |||||||
| 			break; | 			break; | ||||||
| 	} while (!acpar.hotdrop); | 	} while (!acpar.hotdrop); | ||||||
| 
 | 
 | ||||||
| 	*stackptr = origptr; |  | ||||||
| 
 |  | ||||||
|  	xt_write_recseq_end(addend); |  	xt_write_recseq_end(addend); | ||||||
|  	local_bh_enable(); |  	local_bh_enable(); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { | |||||||
| 	[NFPROTO_IPV6]   = "ip6", | 	[NFPROTO_IPV6]   = "ip6", | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /* Allow this many total (re)entries. */ |  | ||||||
| static const unsigned int xt_jumpstack_multiplier = 2; |  | ||||||
| 
 |  | ||||||
| /* Registration hooks for targets. */ | /* Registration hooks for targets. */ | ||||||
| int xt_register_target(struct xt_target *target) | int xt_register_target(struct xt_target *target) | ||||||
| { | { | ||||||
| @ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info) | |||||||
| 		kvfree(info->jumpstack); | 		kvfree(info->jumpstack); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	free_percpu(info->stackptr); |  | ||||||
| 
 |  | ||||||
| 	kvfree(info); | 	kvfree(info); | ||||||
| } | } | ||||||
| EXPORT_SYMBOL(xt_free_table_info); | EXPORT_SYMBOL(xt_free_table_info); | ||||||
| @ -737,10 +732,6 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) | |||||||
| 	unsigned int size; | 	unsigned int size; | ||||||
| 	int cpu; | 	int cpu; | ||||||
| 
 | 
 | ||||||
| 	i->stackptr = alloc_percpu(unsigned int); |  | ||||||
| 	if (i->stackptr == NULL) |  | ||||||
| 		return -ENOMEM; |  | ||||||
| 
 |  | ||||||
| 	size = sizeof(void **) * nr_cpu_ids; | 	size = sizeof(void **) * nr_cpu_ids; | ||||||
| 	if (size > PAGE_SIZE) | 	if (size > PAGE_SIZE) | ||||||
| 		i->jumpstack = vzalloc(size); | 		i->jumpstack = vzalloc(size); | ||||||
| @ -753,8 +744,17 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) | |||||||
| 	if (i->stacksize == 0) | 	if (i->stacksize == 0) | ||||||
| 		return 0; | 		return 0; | ||||||
| 
 | 
 | ||||||
| 	i->stacksize *= xt_jumpstack_multiplier; | 	/* Jumpstack needs to be able to record two full callchains, one
 | ||||||
| 	size = sizeof(void *) * i->stacksize; | 	 * from the first rule set traversal, plus one table reentrancy | ||||||
|  | 	 * via -j TEE without clobbering the callchain that brought us to | ||||||
|  | 	 * TEE target. | ||||||
|  | 	 * | ||||||
|  | 	 * This is done by allocating two jumpstacks per cpu, on reentry | ||||||
|  | 	 * the upper half of the stack is used. | ||||||
|  | 	 * | ||||||
|  | 	 * see the jumpstack setup in ipt_do_table() for more details. | ||||||
|  | 	 */ | ||||||
|  | 	size = sizeof(void *) * i->stacksize * 2u; | ||||||
| 	for_each_possible_cpu(cpu) { | 	for_each_possible_cpu(cpu) { | ||||||
| 		if (size > PAGE_SIZE) | 		if (size > PAGE_SIZE) | ||||||
| 			i->jumpstack[cpu] = vmalloc_node(size, | 			i->jumpstack[cpu] = vmalloc_node(size, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Florian Westphal
						Florian Westphal