mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	xprtrdma: Prevent inline overflow
When deciding whether to send a Call inline, rpcrdma_marshal_req doesn't take into account header bytes consumed by chunk lists. This results in Call messages on the wire that are sometimes larger than the inline threshold. Likewise, when a Write list or Reply chunk is in play, the server's reply has to emit an RDMA Send that includes a larger-than-minimal RPC-over-RDMA header. The actual size of a Call message cannot be estimated until after the chunk lists have been registered. Thus the size of each RPC-over-RDMA header can be estimated only after chunks are registered; but the decision to register chunks is based on the size of that header. Chicken, meet egg. The best a client can do is estimate header size based on the largest header that might occur, and then ensure that inline content is always smaller than that. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
		
							parent
							
								
									949317464b
								
							
						
					
					
						commit
						302d3deb20
					
				| @ -39,6 +39,9 @@ static int | ||||
| fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||||
| 	    struct rpcrdma_create_data_internal *cdata) | ||||
| { | ||||
| 	rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||||
| 						      RPCRDMA_MAX_DATA_SEGS / | ||||
| 						      RPCRDMA_MAX_FMR_SGES)); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -231,6 +231,9 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||||
| 					       depth; | ||||
| 	} | ||||
| 
 | ||||
| 	rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, | ||||
| 						      RPCRDMA_MAX_DATA_SEGS / | ||||
| 						      ia->ri_max_frmr_depth)); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -36,8 +36,11 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||||
| 		       __func__, PTR_ERR(mr)); | ||||
| 		return -ENOMEM; | ||||
| 	} | ||||
| 
 | ||||
| 	ia->ri_dma_mr = mr; | ||||
| 
 | ||||
| 	rpcrdma_set_max_header_sizes(ia, cdata, min_t(unsigned int, | ||||
| 						      RPCRDMA_MAX_DATA_SEGS, | ||||
| 						      RPCRDMA_MAX_HDR_SEGS)); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -61,7 +61,6 @@ enum rpcrdma_chunktype { | ||||
| 	rpcrdma_replych | ||||
| }; | ||||
| 
 | ||||
| #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||||
| static const char transfertypes[][12] = { | ||||
| 	"pure inline",	/* no chunks */ | ||||
| 	" read chunk",	/* some argument via rdma read */ | ||||
| @ -69,18 +68,72 @@ static const char transfertypes[][12] = { | ||||
| 	"write chunk",	/* some result via rdma write */ | ||||
| 	"reply chunk"	/* entire reply via rdma write */ | ||||
| }; | ||||
| #endif | ||||
| 
 | ||||
| /* Returns size of largest RPC-over-RDMA header in a Call message
 | ||||
|  * | ||||
|  * The client marshals only one chunk list per Call message. | ||||
|  * The largest list is the Read list. | ||||
|  */ | ||||
| static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) | ||||
| { | ||||
| 	unsigned int size; | ||||
| 
 | ||||
| 	/* Fixed header fields and list discriminators */ | ||||
| 	size = RPCRDMA_HDRLEN_MIN; | ||||
| 
 | ||||
| 	/* Maximum Read list size */ | ||||
| 	maxsegs += 2;	/* segment for head and tail buffers */ | ||||
| 	size = maxsegs * sizeof(struct rpcrdma_read_chunk); | ||||
| 
 | ||||
| 	dprintk("RPC:       %s: max call header size = %u\n", | ||||
| 		__func__, size); | ||||
| 	return size; | ||||
| } | ||||
| 
 | ||||
| /* Returns size of largest RPC-over-RDMA header in a Reply message
 | ||||
|  * | ||||
|  * There is only one Write list or one Reply chunk per Reply | ||||
|  * message.  The larger list is the Write list. | ||||
|  */ | ||||
| static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) | ||||
| { | ||||
| 	unsigned int size; | ||||
| 
 | ||||
| 	/* Fixed header fields and list discriminators */ | ||||
| 	size = RPCRDMA_HDRLEN_MIN; | ||||
| 
 | ||||
| 	/* Maximum Write list size */ | ||||
| 	maxsegs += 2;	/* segment for head and tail buffers */ | ||||
| 	size = sizeof(__be32);		/* segment count */ | ||||
| 	size += maxsegs * sizeof(struct rpcrdma_segment); | ||||
| 	size += sizeof(__be32);	/* list discriminator */ | ||||
| 
 | ||||
| 	dprintk("RPC:       %s: max reply header size = %u\n", | ||||
| 		__func__, size); | ||||
| 	return size; | ||||
| } | ||||
| 
 | ||||
| void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, | ||||
| 				  struct rpcrdma_create_data_internal *cdata, | ||||
| 				  unsigned int maxsegs) | ||||
| { | ||||
| 	ia->ri_max_inline_write = cdata->inline_wsize - | ||||
| 				  rpcrdma_max_call_header_size(maxsegs); | ||||
| 	ia->ri_max_inline_read = cdata->inline_rsize - | ||||
| 				 rpcrdma_max_reply_header_size(maxsegs); | ||||
| } | ||||
| 
 | ||||
| /* The client can send a request inline as long as the RPCRDMA header
 | ||||
|  * plus the RPC call fit under the transport's inline limit. If the | ||||
|  * combined call message size exceeds that limit, the client must use | ||||
|  * the read chunk list for this operation. | ||||
|  */ | ||||
| static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | ||||
| static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, | ||||
| 				struct rpc_rqst *rqst) | ||||
| { | ||||
| 	unsigned int callsize = RPCRDMA_HDRLEN_MIN + rqst->rq_snd_buf.len; | ||||
| 	struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||||
| 
 | ||||
| 	return callsize <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); | ||||
| 	return rqst->rq_snd_buf.len <= ia->ri_max_inline_write; | ||||
| } | ||||
| 
 | ||||
| /* The client can't know how large the actual reply will be. Thus it
 | ||||
| @ -89,11 +142,12 @@ static bool rpcrdma_args_inline(struct rpc_rqst *rqst) | ||||
|  * limit, the client must provide a write list or a reply chunk for | ||||
|  * this request. | ||||
|  */ | ||||
| static bool rpcrdma_results_inline(struct rpc_rqst *rqst) | ||||
| static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, | ||||
| 				   struct rpc_rqst *rqst) | ||||
| { | ||||
| 	unsigned int repsize = RPCRDMA_HDRLEN_MIN + rqst->rq_rcv_buf.buflen; | ||||
| 	struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||||
| 
 | ||||
| 	return repsize <= RPCRDMA_INLINE_READ_THRESHOLD(rqst); | ||||
| 	return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| @ -492,7 +546,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | ||||
| 	 */ | ||||
| 	if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | ||||
| 		wtype = rpcrdma_writech; | ||||
| 	else if (rpcrdma_results_inline(rqst)) | ||||
| 	else if (rpcrdma_results_inline(r_xprt, rqst)) | ||||
| 		wtype = rpcrdma_noch; | ||||
| 	else | ||||
| 		wtype = rpcrdma_replych; | ||||
| @ -511,7 +565,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | ||||
| 	 * that both has a data payload, and whose non-data arguments | ||||
| 	 * by themselves are larger than the inline threshold. | ||||
| 	 */ | ||||
| 	if (rpcrdma_args_inline(rqst)) { | ||||
| 	if (rpcrdma_args_inline(r_xprt, rqst)) { | ||||
| 		rtype = rpcrdma_noch; | ||||
| 	} else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) { | ||||
| 		rtype = rpcrdma_readch; | ||||
| @ -561,6 +615,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | ||||
| 	if (hdrlen < 0) | ||||
| 		return hdrlen; | ||||
| 
 | ||||
| 	if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | ||||
| 		goto out_overflow; | ||||
| 
 | ||||
| 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd" | ||||
| 		" headerp 0x%p base 0x%p lkey 0x%x\n", | ||||
| 		__func__, transfertypes[wtype], hdrlen, rpclen, | ||||
| @ -587,6 +644,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | ||||
| 
 | ||||
| 	req->rl_niovs = 2; | ||||
| 	return 0; | ||||
| 
 | ||||
| out_overflow: | ||||
| 	pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s\n", | ||||
| 		hdrlen, rpclen, transfertypes[wtype]); | ||||
| 	/* Terminate this RPC. Chunks registered above will be
 | ||||
| 	 * released by xprt_release -> xprt_rmda_free . | ||||
| 	 */ | ||||
| 	return -EIO; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
| @ -73,6 +73,8 @@ struct rpcrdma_ia { | ||||
| 	struct completion	ri_done; | ||||
| 	int			ri_async_rc; | ||||
| 	unsigned int		ri_max_frmr_depth; | ||||
| 	unsigned int		ri_max_inline_write; | ||||
| 	unsigned int		ri_max_inline_read; | ||||
| 	struct ib_qp_attr	ri_qp_attr; | ||||
| 	struct ib_qp_init_attr	ri_qp_init_attr; | ||||
| }; | ||||
| @ -538,6 +540,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | ||||
|  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | ||||
|  */ | ||||
| int rpcrdma_marshal_req(struct rpc_rqst *); | ||||
| void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, | ||||
| 				  struct rpcrdma_create_data_internal *, | ||||
| 				  unsigned int); | ||||
| 
 | ||||
| /* RPC/RDMA module init - xprtrdma/transport.c
 | ||||
|  */ | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Chuck Lever
						Chuck Lever