mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	IB/mlx4: Add support for masked atomic operations
Add support for masked atomic operations (masked compare and swap, masked fetch and add). Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
		
							parent
							
								
									5e80ba8ff0
								
							
						
					
					
						commit
						6fa8f71984
					
				| @ -661,6 +661,14 @@ repoll: | ||||
| 			wc->opcode    = IB_WC_FETCH_ADD; | ||||
| 			wc->byte_len  = 8; | ||||
| 			break; | ||||
| 		case MLX4_OPCODE_MASKED_ATOMIC_CS: | ||||
| 			wc->opcode    = IB_WC_MASKED_COMP_SWAP; | ||||
| 			wc->byte_len  = 8; | ||||
| 			break; | ||||
| 		case MLX4_OPCODE_MASKED_ATOMIC_FA: | ||||
| 			wc->opcode    = IB_WC_MASKED_FETCH_ADD; | ||||
| 			wc->byte_len  = 8; | ||||
| 			break; | ||||
| 		case MLX4_OPCODE_BIND_MW: | ||||
| 			wc->opcode    = IB_WC_BIND_MW; | ||||
| 			break; | ||||
|  | ||||
| @ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, | ||||
| 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay; | ||||
| 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? | ||||
| 		IB_ATOMIC_HCA : IB_ATOMIC_NONE; | ||||
| 	props->masked_atomic_cap   = IB_ATOMIC_HCA; | ||||
| 	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1]; | ||||
| 	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; | ||||
| 	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; | ||||
|  | ||||
| @ -74,17 +74,19 @@ enum { | ||||
| }; | ||||
| 
 | ||||
| static const __be32 mlx4_ib_opcode[] = { | ||||
| 	[IB_WR_SEND]			= cpu_to_be32(MLX4_OPCODE_SEND), | ||||
| 	[IB_WR_LSO]			= cpu_to_be32(MLX4_OPCODE_LSO), | ||||
| 	[IB_WR_SEND_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_SEND_IMM), | ||||
| 	[IB_WR_RDMA_WRITE]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), | ||||
| 	[IB_WR_RDMA_WRITE_WITH_IMM]	= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), | ||||
| 	[IB_WR_RDMA_READ]		= cpu_to_be32(MLX4_OPCODE_RDMA_READ), | ||||
| 	[IB_WR_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), | ||||
| 	[IB_WR_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), | ||||
| 	[IB_WR_SEND_WITH_INV]		= cpu_to_be32(MLX4_OPCODE_SEND_INVAL), | ||||
| 	[IB_WR_LOCAL_INV]		= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), | ||||
| 	[IB_WR_FAST_REG_MR]		= cpu_to_be32(MLX4_OPCODE_FMR), | ||||
| 	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND), | ||||
| 	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO), | ||||
| 	[IB_WR_SEND_WITH_IMM]			= cpu_to_be32(MLX4_OPCODE_SEND_IMM), | ||||
| 	[IB_WR_RDMA_WRITE]			= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), | ||||
| 	[IB_WR_RDMA_WRITE_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), | ||||
| 	[IB_WR_RDMA_READ]			= cpu_to_be32(MLX4_OPCODE_RDMA_READ), | ||||
| 	[IB_WR_ATOMIC_CMP_AND_SWP]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), | ||||
| 	[IB_WR_ATOMIC_FETCH_AND_ADD]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), | ||||
| 	[IB_WR_SEND_WITH_INV]			= cpu_to_be32(MLX4_OPCODE_SEND_INVAL), | ||||
| 	[IB_WR_LOCAL_INV]			= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), | ||||
| 	[IB_WR_FAST_REG_MR]			= cpu_to_be32(MLX4_OPCODE_FMR), | ||||
| 	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), | ||||
| 	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), | ||||
| }; | ||||
| 
 | ||||
| static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) | ||||
| @ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * | ||||
| 	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { | ||||
| 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); | ||||
| 		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add); | ||||
| 	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { | ||||
| 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); | ||||
| 		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask); | ||||
| 	} else { | ||||
| 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); | ||||
| 		aseg->compare  = 0; | ||||
| @ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, | ||||
| 				  struct ib_send_wr *wr) | ||||
| { | ||||
| 	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap); | ||||
| 	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask); | ||||
| 	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add); | ||||
| 	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask); | ||||
| } | ||||
| 
 | ||||
| static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, | ||||
| 			     struct ib_send_wr *wr) | ||||
| { | ||||
| @ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | ||||
| 			switch (wr->opcode) { | ||||
| 			case IB_WR_ATOMIC_CMP_AND_SWP: | ||||
| 			case IB_WR_ATOMIC_FETCH_AND_ADD: | ||||
| 			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: | ||||
| 				set_raddr_seg(wqe, wr->wr.atomic.remote_addr, | ||||
| 					      wr->wr.atomic.rkey); | ||||
| 				wqe  += sizeof (struct mlx4_wqe_raddr_seg); | ||||
| @ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | ||||
| 
 | ||||
| 				break; | ||||
| 
 | ||||
| 			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: | ||||
| 				set_raddr_seg(wqe, wr->wr.atomic.remote_addr, | ||||
| 					      wr->wr.atomic.rkey); | ||||
| 				wqe  += sizeof (struct mlx4_wqe_raddr_seg); | ||||
| 
 | ||||
| 				set_masked_atomic_seg(wqe, wr); | ||||
| 				wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg); | ||||
| 
 | ||||
| 				size += (sizeof (struct mlx4_wqe_raddr_seg) + | ||||
| 					 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; | ||||
| 
 | ||||
| 				break; | ||||
| 
 | ||||
| 			case IB_WR_RDMA_READ: | ||||
| 			case IB_WR_RDMA_WRITE: | ||||
| 			case IB_WR_RDMA_WRITE_WITH_IMM: | ||||
|  | ||||
| @ -123,8 +123,8 @@ enum { | ||||
| 	MLX4_OPCODE_RDMA_READ		= 0x10, | ||||
| 	MLX4_OPCODE_ATOMIC_CS		= 0x11, | ||||
| 	MLX4_OPCODE_ATOMIC_FA		= 0x12, | ||||
| 	MLX4_OPCODE_ATOMIC_MASK_CS	= 0x14, | ||||
| 	MLX4_OPCODE_ATOMIC_MASK_FA	= 0x15, | ||||
| 	MLX4_OPCODE_MASKED_ATOMIC_CS	= 0x14, | ||||
| 	MLX4_OPCODE_MASKED_ATOMIC_FA	= 0x15, | ||||
| 	MLX4_OPCODE_BIND_MW		= 0x18, | ||||
| 	MLX4_OPCODE_FMR			= 0x19, | ||||
| 	MLX4_OPCODE_LOCAL_INVAL		= 0x1b, | ||||
|  | ||||
| @ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg { | ||||
| 	__be64			compare; | ||||
| }; | ||||
| 
 | ||||
| struct mlx4_wqe_masked_atomic_seg { | ||||
| 	__be64			swap_add; | ||||
| 	__be64			compare; | ||||
| 	__be64			swap_add_mask; | ||||
| 	__be64			compare_mask; | ||||
| }; | ||||
| 
 | ||||
| struct mlx4_wqe_data_seg { | ||||
| 	__be32			byte_count; | ||||
| 	__be32			lkey; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Vladimir Sokolovsky
						Vladimir Sokolovsky