mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	 ee8d153d46
			
		
	
	
		ee8d153d46
		
	
	
	
	
		
			
			We already annotated most accesses to sk->sk_napi_id
We missed sk_mark_napi_id() and sk_mark_napi_id_once()
which might be called without socket lock held in UDP stack.
KCSAN reported :
BUG: KCSAN: data-race in udpv6_queue_rcv_one_skb / udpv6_queue_rcv_one_skb
write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 0:
 sk_mark_napi_id include/net/busy_poll.h:125 [inline]
 __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
 udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
 dst_input include/net/dst.h:442 [inline]
 ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
 process_backlog+0x1d3/0x420 net/core/dev.c:5955
 napi_poll net/core/dev.c:6392 [inline]
 net_rx_action+0x3ae/0xa90 net/core/dev.c:6460
write to 0xffff888121c6d108 of 4 bytes by interrupt on cpu 1:
 sk_mark_napi_id include/net/busy_poll.h:125 [inline]
 __udpv6_queue_rcv_skb net/ipv6/udp.c:571 [inline]
 udpv6_queue_rcv_one_skb+0x70c/0xb40 net/ipv6/udp.c:672
 udpv6_queue_rcv_skb+0xb5/0x400 net/ipv6/udp.c:689
 udp6_unicast_rcv_skb.isra.0+0xd7/0x180 net/ipv6/udp.c:832
 __udp6_lib_rcv+0x69c/0x1770 net/ipv6/udp.c:913
 udpv6_rcv+0x2b/0x40 net/ipv6/udp.c:1015
 ip6_protocol_deliver_rcu+0x22a/0xbe0 net/ipv6/ip6_input.c:409
 ip6_input_finish+0x30/0x50 net/ipv6/ip6_input.c:450
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ip6_input+0x177/0x190 net/ipv6/ip6_input.c:459
 dst_input include/net/dst.h:442 [inline]
 ip6_rcv_finish+0x110/0x140 net/ipv6/ip6_input.c:76
 NF_HOOK include/linux/netfilter.h:305 [inline]
 NF_HOOK include/linux/netfilter.h:299 [inline]
 ipv6_rcv+0x1a1/0x1b0 net/ipv6/ip6_input.c:284
 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5010
 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5124
 process_backlog+0x1d3/0x420 net/core/dev.c:5955
Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 10890 Comm: syz-executor.0 Not tainted 5.4.0-rc3+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Fixes: e68b6e50fa ("udp: enable busy polling for all sockets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
		
	
			
		
			
				
	
	
		
			141 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			141 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: GPL-2.0-only */
 | |
| /*
 | |
|  * net busy poll support
 | |
|  * Copyright(c) 2013 Intel Corporation.
 | |
|  *
 | |
|  * Author: Eliezer Tamir
 | |
|  *
 | |
|  * Contact Information:
 | |
|  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 | |
|  */
 | |
| 
 | |
| #ifndef _LINUX_NET_BUSY_POLL_H
 | |
| #define _LINUX_NET_BUSY_POLL_H
 | |
| 
 | |
| #include <linux/netdevice.h>
 | |
| #include <linux/sched/clock.h>
 | |
| #include <linux/sched/signal.h>
 | |
| #include <net/ip.h>
 | |
| 
 | |
| /*		0 - Reserved to indicate value not set
 | |
|  *     1..NR_CPUS - Reserved for sender_cpu
 | |
|  *  NR_CPUS+1..~0 - Region available for NAPI IDs
 | |
|  */
 | |
| #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1))
 | |
| 
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 
 | |
| struct napi_struct;
 | |
| extern unsigned int sysctl_net_busy_read __read_mostly;
 | |
| extern unsigned int sysctl_net_busy_poll __read_mostly;
 | |
| 
 | |
| static inline bool net_busy_loop_on(void)
 | |
| {
 | |
| 	return sysctl_net_busy_poll;
 | |
| }
 | |
| 
 | |
| static inline bool sk_can_busy_loop(const struct sock *sk)
 | |
| {
 | |
| 	return sk->sk_ll_usec && !signal_pending(current);
 | |
| }
 | |
| 
 | |
| bool sk_busy_loop_end(void *p, unsigned long start_time);
 | |
| 
 | |
| void napi_busy_loop(unsigned int napi_id,
 | |
| 		    bool (*loop_end)(void *, unsigned long),
 | |
| 		    void *loop_end_arg);
 | |
| 
 | |
| #else /* CONFIG_NET_RX_BUSY_POLL */
 | |
| static inline unsigned long net_busy_loop_on(void)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline bool sk_can_busy_loop(struct sock *sk)
 | |
| {
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| #endif /* CONFIG_NET_RX_BUSY_POLL */
 | |
| 
 | |
| static inline unsigned long busy_loop_current_time(void)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	return (unsigned long)(local_clock() >> 10);
 | |
| #else
 | |
| 	return 0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* in poll/select we use the global sysctl_net_ll_poll value */
 | |
| static inline bool busy_loop_timeout(unsigned long start_time)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	unsigned long bp_usec = READ_ONCE(sysctl_net_busy_poll);
 | |
| 
 | |
| 	if (bp_usec) {
 | |
| 		unsigned long end_time = start_time + bp_usec;
 | |
| 		unsigned long now = busy_loop_current_time();
 | |
| 
 | |
| 		return time_after(now, end_time);
 | |
| 	}
 | |
| #endif
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static inline bool sk_busy_loop_timeout(struct sock *sk,
 | |
| 					unsigned long start_time)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	unsigned long bp_usec = READ_ONCE(sk->sk_ll_usec);
 | |
| 
 | |
| 	if (bp_usec) {
 | |
| 		unsigned long end_time = start_time + bp_usec;
 | |
| 		unsigned long now = busy_loop_current_time();
 | |
| 
 | |
| 		return time_after(now, end_time);
 | |
| 	}
 | |
| #endif
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static inline void sk_busy_loop(struct sock *sk, int nonblock)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	unsigned int napi_id = READ_ONCE(sk->sk_napi_id);
 | |
| 
 | |
| 	if (napi_id >= MIN_NAPI_ID)
 | |
| 		napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* used in the NIC receive handler to mark the skb */
 | |
| static inline void skb_mark_napi_id(struct sk_buff *skb,
 | |
| 				    struct napi_struct *napi)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	skb->napi_id = napi->napi_id;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* used in the protocol hanlder to propagate the napi_id to the socket */
 | |
| static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 | |
| #endif
 | |
| 	sk_rx_queue_set(sk, skb);
 | |
| }
 | |
| 
 | |
| /* variant used for unconnected sockets */
 | |
| static inline void sk_mark_napi_id_once(struct sock *sk,
 | |
| 					const struct sk_buff *skb)
 | |
| {
 | |
| #ifdef CONFIG_NET_RX_BUSY_POLL
 | |
| 	if (!READ_ONCE(sk->sk_napi_id))
 | |
| 		WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #endif /* _LINUX_NET_BUSY_POLL_H */
 |