mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	xprtrdma: Prevent inline overflow
When deciding whether to send a Call inline, rpcrdma_marshal_req doesn't take into account header bytes consumed by chunk lists. This results in Call messages on the wire that are sometimes larger than the inline threshold. Likewise, when a Write list or Reply chunk is in play, the server's reply has to emit an RDMA Send that includes a larger-than-minimal RPC-over-RDMA header. The actual size of a Call message cannot be estimated until after the chunk lists have been registered. Thus the size of each RPC-over-RDMA header can be estimated only after chunks are registered; but the decision to register chunks is based on the size of that header. Chicken, meet egg. The best a client can do is estimate header size based on the largest header that might occur, and then ensure that inline content is always smaller than that. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
		
							parent
							
								
									949317464b
								
							
						
					
					
						commit
						302d3deb20
					
				| @ -39,6 +39,9 @@ static int | |||||||
| fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||||||
| 	    struct rpcrdma_create_data_internal *cdata) | 	    struct rpcrdma_create_data_internal *cdata) | ||||||
| { | { | ||||||
|  | 	rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||||||
|  | 						      RPCRDMA_MAX_DATA_SEGS / | ||||||
|  | 						      RPCRDMA_MAX_FMR_SGES)); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||||||
| 					       depth; | 					       depth; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||||||
|  | 						      RPCRDMA_MAX_DATA_SEGS / | ||||||
|  | 						      ia->ri_max_frmr_depth)); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||||||
| 		       __func__, PTR_ERR(mr)); | 		       __func__, PTR_ERR(mr)); | ||||||
| 		return -ENOMEM; | 		return -ENOMEM; | ||||||
| 	} | 	} | ||||||
| 
 |  | ||||||
| 	ia->ri_dma_mr = mr; | 	ia->ri_dma_mr = mr; | ||||||
|  | 
 | ||||||
|  | 	rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int, | ||||||
|  | 						      RPCRDMA_MAX_DATA_SEGS, | ||||||
|  | 						      RPCRDMA_MAX_HDR_SEGS)); | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -61,7 +61,6 @@ enum rpcrdma_chunktype { | |||||||
| 	rpcrdma_replych | 	rpcrdma_replych | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |  | ||||||
| static const char transfertypes[][12] = { | static const char transfertypes[][12] = { | ||||||
| 	"pure inline",	/* no chunks */ | 	"pure inline",	/* no chunks */ | ||||||
| 	" read chunk",	/* some argument via rdma read */ | 	" read chunk",	/* some argument via rdma read */ | ||||||
| @ -69,18 +68,72 @@ static const char transfertypes[][12] = { | |||||||
| 	"write chunk",	/* some result via rdma write */ | 	"write chunk",	/* some result via rdma write */ | ||||||
| 	"reply chunk"	/* entire reply via rdma write */ | 	"reply chunk"	/* entire reply via rdma write */ | ||||||
| }; | }; | ||||||
| #endif | 
 | ||||||
|  | /* Returns size of largest RPC-over-RDMA header in a Call message
 | ||||||
|  |  * | ||||||
|  |  * The client marshals only one chunk list per Call message. | ||||||
|  |  * The largest list is the Read list. | ||||||
|  |  */ | ||||||
|  | static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) | ||||||
|  | { | ||||||
|  | 	unsigned int size; | ||||||
|  | 
 | ||||||
|  | 	/* Fixed header fields and list discriminators */ | ||||||
|  | 	size = RPCRDMA_HDRLEN_MIN; | ||||||
|  | 
 | ||||||
|  | 	/* Maximum Read list size */ | ||||||
|  | 	maxsegs += 2;	/* segment for head and tail buffers */ | ||||||
|  | 	size = maxsegs * sizeof(struct rpcrdma_read_chunk); | ||||||
|  | 
 | ||||||
|  | 	dprintk("RPC:       %s: max call header size = %u\n", | ||||||
|  | 		__func__, size); | ||||||
|  | 	return size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Returns size of largest RPC-over-RDMA header in a Reply message
 | ||||||
|  |  * | ||||||
|  |  * There is only one Write list or one Reply chunk per Reply | ||||||
|  |  * message.  The larger list is the Write list. | ||||||
|  |  */ | ||||||
|  | static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) | ||||||
|  | { | ||||||
|  | 	unsigned int size; | ||||||
|  | 
 | ||||||
|  | 	/* Fixed header fields and list discriminators */ | ||||||
|  | 	size = RPCRDMA_HDRLEN_MIN; | ||||||
|  | 
 | ||||||
|  | 	/* Maximum Write list size */ | ||||||
|  | 	maxsegs += 2;	/* segment for head and tail buffers */ | ||||||
|  | 	size = sizeof(__be32);		/* segment count */ | ||||||
|  | 	size += maxsegs * sizeof(struct rpcrdma_segment); | ||||||
|  | 	size += sizeof(__be32);	/* list discriminator */ | ||||||
|  | 
 | ||||||
|  | 	dprintk("RPC:       %s: max reply header size = %u\n", | ||||||
|  | 		__func__, size); | ||||||
|  | 	return size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, | ||||||
|  | 				  struct rpcrdma_create_data_internal *cdata, | ||||||
|  | 				  unsigned int maxsegs) | ||||||
|  | { | ||||||
|  | 	ia->ri_max_inline_write = cdata->inline_wsize - | ||||||
|  | 				  rpcrdma_max_call_header_size(maxsegs); | ||||||
|  | 	ia->ri_max_inline_read = cdata->inline_rsize - | ||||||
|  | 				 rpcrdma_max_reply_header_size(maxsegs); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| /* The client can send a request inline as long as the RPCRDMA header
 | /* The client can send a request inline as long as the RPCRDMA header
 | ||||||
|  * plus the RPC call fit under the transport's inline limit. If the |  * plus the RPC call fit under the transport's inline limit. If the | ||||||
|  * combined call message size exceeds that limit, the client must use |  * combined call message size exceeds that limit, the client must use | ||||||
|  * the read chunk list for this operation. |  * the read chunk list for this operation. | ||||||
|  */ |  */ | ||||||
| static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, | ||||||
|  | 				struct rpc_rqst *rqst) | ||||||
| { | { | ||||||
| 	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; | 	struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||||||
| 
 | 
 | ||||||
| 	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | 	return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* The client can't know how large the actual reply will be. Thus it
 | /* The client can't know how large the actual reply will be. Thus it
 | ||||||
| @ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | |||||||
|  * limit, the client must provide a write list or a reply chunk for |  * limit, the client must provide a write list or a reply chunk for | ||||||
|  * this request. |  * this request. | ||||||
|  */ |  */ | ||||||
| static bool rpcrdma_results_inline(struct rpc_rqst *rqst) | static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, | ||||||
|  | 				   struct rpc_rqst *rqst) | ||||||
| { | { | ||||||
| 	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; | 	struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||||||
| 
 | 
 | ||||||
| 	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); | 	return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static int | static int | ||||||
| @ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||||||
| 	 */ | 	 */ | ||||||
| 	if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 	if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | ||||||
| 		wtype = rpcrdma_writech; | 		wtype = rpcrdma_writech; | ||||||
| 	else if (rpcrdma_results_inline(rqst)) | 	else if (rpcrdma_results_inline(r_xprt, rqst)) | ||||||
| 		wtype = rpcrdma_noch; | 		wtype = rpcrdma_noch; | ||||||
| 	else | 	else | ||||||
| 		wtype = rpcrdma_replych; | 		wtype = rpcrdma_replych; | ||||||
| @ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||||||
| 	 * that both has a data payload, and whose non-data arguments | 	 * that both has a data payload, and whose non-data arguments | ||||||
| 	 * by themselves are larger than the inline threshold. | 	 * by themselves are larger than the inline threshold. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (rpcrdma_args_inline(rqst)) { | 	if (rpcrdma_args_inline(r_xprt, rqst)) { | ||||||
| 		rtype = rpcrdma_noch; | 		rtype = rpcrdma_noch; | ||||||
| 	} else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | 	} else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | ||||||
| 		rtype = rpcrdma_readch; | 		rtype = rpcrdma_readch; | ||||||
| @ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||||||
| 	if (hdrlen < 0) | 	if (hdrlen < 0) | ||||||
| 		return hdrlen; | 		return hdrlen; | ||||||
| 
 | 
 | ||||||
|  | 	if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | ||||||
|  | 		goto out_overflow; | ||||||
|  | 
 | ||||||
| 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd" | 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd" | ||||||
| 		" headerp 0x%p base 0x%p lkey 0x%x\n", | 		" headerp 0x%p base 0x%p lkey 0x%x\n", | ||||||
| 		__func__, transfertypes[wtype], hdrlen, rpclen, | 		__func__, transfertypes[wtype], hdrlen, rpclen, | ||||||
| @ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||||||
| 
 | 
 | ||||||
| 	req->rl_niovs = 2; | 	req->rl_niovs = 2; | ||||||
| 	return 0; | 	return 0; | ||||||
|  | 
 | ||||||
|  | out_overflow: | ||||||
|  | 	pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s\n", | ||||||
|  | 		hdrlen, rpclen, transfertypes[wtype]); | ||||||
|  | 	/* Terminate this RPC. Chunks registered above will be
 | ||||||
|  | 	 * released by xprt_release -> xprt_rmda_free . | ||||||
|  | 	 */ | ||||||
|  | 	return -EIO; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | |||||||
| @ -73,6 +73,8 @@ struct rpcrdma_ia { | |||||||
| 	struct completion	ri_done; | 	struct completion	ri_done; | ||||||
| 	int			ri_async_rc; | 	int			ri_async_rc; | ||||||
| 	unsigned int		ri_max_frmr_depth; | 	unsigned int		ri_max_frmr_depth; | ||||||
|  | 	unsigned int		ri_max_inline_write; | ||||||
|  | 	unsigned int		ri_max_inline_read; | ||||||
| 	struct ib_qp_attr	ri_qp_attr; | 	struct ib_qp_attr	ri_qp_attr; | ||||||
| 	struct ib_qp_init_attr	ri_qp_init_attr; | 	struct ib_qp_init_attr	ri_qp_init_attr; | ||||||
| }; | }; | ||||||
| @ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||||||
|  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | ||||||
|  */ |  */ | ||||||
| int rpcrdma_marshal_req(struct rpc_rqst *); | int rpcrdma_marshal_req(struct rpc_rqst *); | ||||||
|  | void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, | ||||||
|  | 				  struct rpcrdma_create_data_internal *, | ||||||
|  | 				  unsigned int); | ||||||
| 
 | 
 | ||||||
| /* RPC/RDMA module init - xprtrdma/transport.c
 | /* RPC/RDMA module init - xprtrdma/transport.c
 | ||||||
|  */ |  */ | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Chuck Lever
						Chuck Lever