From 539d1b47e935e8384977dd7e5cec370c08b7a644 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 1 Mar 2026 18:29:43 +0530 Subject: [PATCH] block: break pcpu_alloc_mutex dependency on freeze_lock While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex after ->freeze_lock is acquired or queue is frozen. This potentially creates a circular dependency involving ->fs_reclaim if reclaim is triggered simultaneously in a code path which first acquires ->pcpu_ alloc_mutex. As the queue is already frozen while nr_hw_queue update allocates tagsets, the reclaim can't forward progress and thus it could cause a potential deadlock as reported in lockdep splat[1]. Fix this by pre-allocating tagset tags before we freeze queue during nr_hw_queue update. Later the allocated tagset tags could be safely installed and used after queue is frozen. Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1] Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Tested-by: Yi Zhang Reviewed-by: Yu Kuai [axboe: fix brace style issue] Signed-off-by: Jens Axboe --- block/blk-mq.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d5602ff62c7c..0b182013016a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } -static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, - int new_nr_hw_queues) +static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags( + struct blk_mq_tag_set *set, + int new_nr_hw_queues) { struct blk_mq_tags **new_tags; int i; if (set->nr_hw_queues >= new_nr_hw_queues) - goto done; + return NULL; new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!new_tags) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (set->tags) memcpy(new_tags, set->tags, set->nr_hw_queues * sizeof(*set->tags)); - kfree(set->tags); - set->tags = new_tags; for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { - if (!__blk_mq_alloc_map_and_rqs(set, i)) { - while (--i >= set->nr_hw_queues) - __blk_mq_free_map_and_rqs(set, i); - return -ENOMEM; + if (blk_mq_is_shared_tags(set->flags)) { + new_tags[i] = set->shared_tags; + } else { + new_tags[i] = blk_mq_alloc_map_and_rqs(set, i, + set->queue_depth); + if (!new_tags[i]) + goto out_unwind; } cond_resched(); } -done: - set->nr_hw_queues = new_nr_hw_queues; - return 0; + return new_tags; +out_unwind: + while (--i >= set->nr_hw_queues) { + if (!blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, new_tags[i], i); + } + kfree(new_tags); + return ERR_PTR(-ENOMEM); } /* @@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl; + struct blk_mq_tags **new_tags; bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues); + if (IS_ERR(new_tags)) + goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue_nomemsave(q); queues_frozen = true; - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) - goto switch_back; + if (new_tags) { + kfree(set->tags); + set->tags = new_tags; + } + set->nr_hw_queues = nr_hw_queues; fallback: blk_mq_update_queue_map(set);