2
0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-09-04 20:19:47 +08:00

mm: fix a UAF when vma->mm is freed after vma->vm_refcnt got dropped

By inducing delays in the right places, Jann Horn created a reproducer for
a hard to hit UAF issue that became possible after VMAs were allowed to be
recycled by adding SLAB_TYPESAFE_BY_RCU to their cache.

Race description is borrowed from Jann's discovery report:
lock_vma_under_rcu() looks up a VMA locklessly with mas_walk() under
rcu_read_lock().  At that point, the VMA may be concurrently freed, and it
can be recycled by another process.  vma_start_read() then increments the
vma->vm_refcnt (if it is in an acceptable range), and if this succeeds,
vma_start_read() can return a recycled VMA.

In this scenario where the VMA has been recycled, lock_vma_under_rcu()
will then detect the mismatching ->vm_mm pointer and drop the VMA through
vma_end_read(), which calls vma_refcount_put().  vma_refcount_put() drops
the refcount and then calls rcuwait_wake_up() using a copy of vma->vm_mm. 
This is wrong: It implicitly assumes that the caller is keeping the VMA's
mm alive, but in this scenario the caller has no relation to the VMA's mm,
so the rcuwait_wake_up() can cause UAF.

The diagram depicting the race:
T1         T2         T3
==         ==         ==
lock_vma_under_rcu
  mas_walk
          <VMA gets removed from mm>
                      mmap
                        <the same VMA is reallocated>
  vma_start_read
    __refcount_inc_not_zero_limited_acquire
                      munmap
                        __vma_enter_locked
                          refcount_add_not_zero
  vma_end_read
    vma_refcount_put
      __refcount_dec_and_test
                          rcuwait_wait_event
                            <finish operation>
      rcuwait_wake_up [UAF]

Note that rcuwait_wait_event() in T3 does not block because refcount was
already dropped by T1.  At this point T3 can exit and free the mm causing
UAF in T1.

To avoid this we move vma->vm_mm verification into vma_start_read() and
grab vma->vm_mm to stabilize it before vma_refcount_put() operation.

[surenb@google.com: v3]
  Link: https://lkml.kernel.org/r/20250729145709.2731370-1-surenb@google.com
Link: https://lkml.kernel.org/r/20250728175355.2282375-1-surenb@google.com
Fixes: 3104138517 ("mm: make vma cache SLAB_TYPESAFE_BY_RCU")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Reported-by: Jann Horn <jannh@google.com>
Closes: https://lore.kernel.org/all/CAG48ez0-deFbVH=E3jbkWx=X3uVbd8nWeo6kbJPQ0KoUD+m2tA@mail.gmail.com/
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Suren Baghdasaryan 2025-07-28 10:53:55 -07:00 committed by Andrew Morton
parent a222439e1e
commit 9bbffee67f
2 changed files with 33 additions and 7 deletions

View File

@ -12,6 +12,7 @@ extern int rcuwait_wake_up(struct rcuwait *w);
#include <linux/tracepoint-defs.h> #include <linux/tracepoint-defs.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/cleanup.h> #include <linux/cleanup.h>
#include <linux/sched/mm.h>
#define MMAP_LOCK_INITIALIZER(name) \ #define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
@ -154,6 +155,10 @@ static inline void vma_refcount_put(struct vm_area_struct *vma)
* reused and attached to a different mm before we lock it. * reused and attached to a different mm before we lock it.
* Returns the vma on success, NULL on failure to lock and EAGAIN if vma got * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
* detached. * detached.
*
* WARNING! The vma passed to this function cannot be used if the function
* fails to lock it because in certain cases RCU lock is dropped and then
* reacquired. Once RCU lock is dropped the vma can be concurently freed.
*/ */
static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
struct vm_area_struct *vma) struct vm_area_struct *vma)
@ -183,6 +188,31 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
} }
rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
/*
* If vma got attached to another mm from under us, that mm is not
* stable and can be freed in the narrow window after vma->vm_refcnt
* is dropped and before rcuwait_wake_up(mm) is called. Grab it before
* releasing vma->vm_refcnt.
*/
if (unlikely(vma->vm_mm != mm)) {
/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
struct mm_struct *other_mm = vma->vm_mm;
/*
* __mmdrop() is a heavy operation and we don't need RCU
* protection here. Release RCU lock during these operations.
* We reinstate the RCU read lock as the caller expects it to
* be held when this function returns even on error.
*/
rcu_read_unlock();
mmgrab(other_mm);
vma_refcount_put(vma);
mmdrop(other_mm);
rcu_read_lock();
return NULL;
}
/* /*
* Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
* False unlocked result is impossible because we modify and check * False unlocked result is impossible because we modify and check

View File

@ -164,8 +164,7 @@ retry:
*/ */
/* Check if the vma we locked is the right one. */ /* Check if the vma we locked is the right one. */
if (unlikely(vma->vm_mm != mm || if (unlikely(address < vma->vm_start || address >= vma->vm_end))
address < vma->vm_start || address >= vma->vm_end))
goto inval_end_read; goto inval_end_read;
rcu_read_unlock(); rcu_read_unlock();
@ -236,11 +235,8 @@ retry:
goto fallback; goto fallback;
} }
/* /* Verify the vma is not behind the last search position. */
* Verify the vma we locked belongs to the same address space and it's if (unlikely(from_addr >= vma->vm_end))
* not behind of the last search position.
*/
if (unlikely(vma->vm_mm != mm || from_addr >= vma->vm_end))
goto fallback_unlock; goto fallback_unlock;
/* /*