mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-04 20:19:47 +08:00 
			
		
		
		
	nvmet-rdma: register ib_client to not deadlock in device removal
We can deadlock in case we got to a device removal event on a queue which is already in the process of destroying the cm_id is this is blocking until all events on this cm_id will drain. On the other hand we cannot guarantee that rdma_destroy_id was invoked as we only have indication that the queue disconnect flow has been queued (the queue state is updated before the realease work has been queued). So, we leave all the queue removal to a separate ib_client to avoid this deadlock as ib_client device removal is in a different context than the cm_id itself. Reported-by: Shiraz Saleem <shiraz.saleem@intel.com> Tested-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
		
							parent
							
								
									69fa964632
								
							
						
					
					
						commit
						f1d4ef7d88
					
				| @ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, | |||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * nvme_rdma_device_removal() - Handle RDMA device removal |  * nvme_rdma_device_removal() - Handle RDMA device removal | ||||||
|  |  * @cm_id:	rdma_cm id, used for nvmet port | ||||||
|  * @queue:      nvmet rdma queue (cm id qp_context) |  * @queue:      nvmet rdma queue (cm id qp_context) | ||||||
|  * @addr:	nvmet address (cm_id context) |  | ||||||
|  * |  * | ||||||
|  * DEVICE_REMOVAL event notifies us that the RDMA device is about |  * DEVICE_REMOVAL event notifies us that the RDMA device is about | ||||||
|  * to unplug so we should take care of destroying our RDMA resources. |  * to unplug. Note that this event can be generated on a normal | ||||||
|  * This event will be generated for each allocated cm_id. |  * queue cm_id and/or a device bound listener cm_id (where in this | ||||||
|  |  * case queue will be null). | ||||||
|  * |  * | ||||||
|  * Note that this event can be generated on a normal queue cm_id |  * We registered an ib_client to handle device removal for queues, | ||||||
|  * and/or a device bound listener cm_id (where in this case |  * so we only need to handle the listening port cm_ids. In this case | ||||||
|  * queue will be null). |  | ||||||
|  * |  | ||||||
|  * we claim ownership on destroying the cm_id. For queues we move |  | ||||||
|  * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port |  | ||||||
|  * we nullify the priv to prevent double cm_id destruction and destroying |  * we nullify the priv to prevent double cm_id destruction and destroying | ||||||
|  * the cm_id implicitely by returning a non-zero rc to the callout. |  * the cm_id implicitely by returning a non-zero rc to the callout. | ||||||
|  */ |  */ | ||||||
| static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, | static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, | ||||||
| 		struct nvmet_rdma_queue *queue) | 		struct nvmet_rdma_queue *queue) | ||||||
| { | { | ||||||
| 	unsigned long flags; | 	struct nvmet_port *port; | ||||||
| 
 |  | ||||||
| 	if (!queue) { |  | ||||||
| 		struct nvmet_port *port = cm_id->context; |  | ||||||
| 
 | 
 | ||||||
|  | 	if (queue) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * This is a listener cm_id. Make sure that | 		 * This is a queue cm_id. we have registered | ||||||
| 		 * future remove_port won't invoke a double | 		 * an ib_client to handle queues removal | ||||||
| 		 * cm_id destroy. use atomic xchg to make sure | 		 * so don't interfear and just return. | ||||||
| 		 * we don't compete with remove_port. |  | ||||||
| 		 */ | 		 */ | ||||||
| 		if (xchg(&port->priv, NULL) != cm_id) | 		return 0; | ||||||
| 			return 0; |  | ||||||
| 	} else { |  | ||||||
| 		/*
 |  | ||||||
| 		 * This is a queue cm_id. Make sure that |  | ||||||
| 		 * release queue will not destroy the cm_id |  | ||||||
| 		 * and schedule all ctrl queues removal (only |  | ||||||
| 		 * if the queue is not disconnecting already). |  | ||||||
| 		 */ |  | ||||||
| 		spin_lock_irqsave(&queue->state_lock, flags); |  | ||||||
| 		if (queue->state != NVMET_RDMA_Q_DISCONNECTING) |  | ||||||
| 			queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL; |  | ||||||
| 		spin_unlock_irqrestore(&queue->state_lock, flags); |  | ||||||
| 		nvmet_rdma_queue_disconnect(queue); |  | ||||||
| 		flush_scheduled_work(); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	port = cm_id->context; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * This is a listener cm_id. Make sure that | ||||||
|  | 	 * future remove_port won't invoke a double | ||||||
|  | 	 * cm_id destroy. use atomic xchg to make sure | ||||||
|  | 	 * we don't compete with remove_port. | ||||||
|  | 	 */ | ||||||
|  | 	if (xchg(&port->priv, NULL) != cm_id) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * We need to return 1 so that the core will destroy | 	 * We need to return 1 so that the core will destroy | ||||||
| 	 * it's own ID.  What a great API design.. | 	 * it's own ID.  What a great API design.. | ||||||
| @ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = { | |||||||
| 	.delete_ctrl		= nvmet_rdma_delete_ctrl, | 	.delete_ctrl		= nvmet_rdma_delete_ctrl, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | static void nvmet_rdma_add_one(struct ib_device *ib_device) | ||||||
|  | { | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) | ||||||
|  | { | ||||||
|  | 	struct nvmet_rdma_queue *queue; | ||||||
|  | 
 | ||||||
|  | 	/* Device is being removed, delete all queues using this device */ | ||||||
|  | 	mutex_lock(&nvmet_rdma_queue_mutex); | ||||||
|  | 	list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) { | ||||||
|  | 		if (queue->dev->device != ib_device) | ||||||
|  | 			continue; | ||||||
|  | 
 | ||||||
|  | 		pr_info("Removing queue %d\n", queue->idx); | ||||||
|  | 		__nvmet_rdma_queue_disconnect(queue); | ||||||
|  | 	} | ||||||
|  | 	mutex_unlock(&nvmet_rdma_queue_mutex); | ||||||
|  | 
 | ||||||
|  | 	flush_scheduled_work(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static struct ib_client nvmet_rdma_ib_client = { | ||||||
|  | 	.name   = "nvmet_rdma", | ||||||
|  | 	.add = nvmet_rdma_add_one, | ||||||
|  | 	.remove = nvmet_rdma_remove_one | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| static int __init nvmet_rdma_init(void) | static int __init nvmet_rdma_init(void) | ||||||
| { | { | ||||||
| 	return nvmet_register_transport(&nvmet_rdma_ops); | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	ret = ib_register_client(&nvmet_rdma_ib_client); | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | 
 | ||||||
|  | 	ret = nvmet_register_transport(&nvmet_rdma_ops); | ||||||
|  | 	if (ret) | ||||||
|  | 		goto err_ib_client; | ||||||
|  | 
 | ||||||
|  | 	return 0; | ||||||
|  | 
 | ||||||
|  | err_ib_client: | ||||||
|  | 	ib_unregister_client(&nvmet_rdma_ib_client); | ||||||
|  | 	return ret; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void __exit nvmet_rdma_exit(void) | static void __exit nvmet_rdma_exit(void) | ||||||
| @ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void) | |||||||
| 	mutex_unlock(&nvmet_rdma_queue_mutex); | 	mutex_unlock(&nvmet_rdma_queue_mutex); | ||||||
| 
 | 
 | ||||||
| 	flush_scheduled_work(); | 	flush_scheduled_work(); | ||||||
|  | 	ib_unregister_client(&nvmet_rdma_ib_client); | ||||||
| 	ida_destroy(&nvmet_rdma_queue_ida); | 	ida_destroy(&nvmet_rdma_queue_ida); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Sagi Grimberg
						Sagi Grimberg