diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 525624c285a6..e2e91aa1a042 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -415,6 +415,9 @@ static inline int split_huge_page(struct page *page) return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio, bool partially_mapped); +#ifdef CONFIG_MEMCG +void reparent_deferred_split_queue(struct mem_cgroup *memcg); +#endif void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze); @@ -647,6 +650,7 @@ static inline int try_folio_split_to_order(struct folio *folio, } static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} +static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b0c6a4635c67..cc6db20d7dca 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1775,6 +1775,12 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid); void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg); + +static inline bool memcg_is_dying(struct mem_cgroup *memcg) +{ + return memcg ? css_is_dying(&memcg->css) : false; +} + #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1845,6 +1851,11 @@ static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) static inline void mem_cgroup_show_protected_memory(struct mem_cgroup *memcg) { } + +static inline bool memcg_is_dying(struct mem_cgroup *memcg) +{ + return false; +} #endif /* CONFIG_MEMCG */ #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 13684e5376e8..d17d3810a882 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1118,8 +1118,19 @@ static struct deferred_split *split_queue_lock(int nid, struct mem_cgroup *memcg { struct deferred_split *queue; +retry: queue = memcg_split_queue(nid, memcg); spin_lock(&queue->split_queue_lock); + /* + * There is a period between setting memcg to dying and reparenting + * deferred split queue, and during this period the THPs in the deferred + * split queue will be hidden from the shrinker side. + */ + if (unlikely(memcg_is_dying(memcg))) { + spin_unlock(&queue->split_queue_lock); + memcg = parent_mem_cgroup(memcg); + goto retry; + } return queue; } @@ -1129,8 +1140,14 @@ split_queue_lock_irqsave(int nid, struct mem_cgroup *memcg, unsigned long *flags { struct deferred_split *queue; +retry: queue = memcg_split_queue(nid, memcg); spin_lock_irqsave(&queue->split_queue_lock, *flags); + if (unlikely(memcg_is_dying(memcg))) { + spin_unlock_irqrestore(&queue->split_queue_lock, *flags); + memcg = parent_mem_cgroup(memcg); + goto retry; + } return queue; } @@ -4389,6 +4406,33 @@ next: return split; } +#ifdef CONFIG_MEMCG +void reparent_deferred_split_queue(struct mem_cgroup *memcg) +{ + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct deferred_split *ds_queue = &memcg->deferred_split_queue; + struct deferred_split *parent_ds_queue = &parent->deferred_split_queue; + int nid; + + spin_lock_irq(&ds_queue->split_queue_lock); + spin_lock_nested(&parent_ds_queue->split_queue_lock, SINGLE_DEPTH_NESTING); + + if (!ds_queue->split_queue_len) + goto unlock; + + list_splice_tail_init(&ds_queue->split_queue, &parent_ds_queue->split_queue); + parent_ds_queue->split_queue_len += ds_queue->split_queue_len; + ds_queue->split_queue_len = 0; + + for_each_node(nid) + set_shrinker_bit(parent, nid, shrinker_id(deferred_split_shrinker)); + +unlock: + spin_unlock(&parent_ds_queue->split_queue_lock); + spin_unlock_irq(&ds_queue->split_queue_lock); +} +#endif + #ifdef CONFIG_DEBUG_FS static void split_huge_pages_all(void) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bfc986da3289..623446821b00 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3920,6 +3920,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) zswap_memcg_offline_cleanup(memcg); memcg_offline_kmem(memcg); + reparent_deferred_split_queue(memcg); reparent_shrinker_deferred(memcg); wb_memcg_offline(memcg); lru_gen_offline_memcg(memcg);