mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00
mount changes. I've got more stuff in the local tree, but
this is getting too much for one merge window as it is. * mount hash conflicts rudiments are gone now - we do not allow multiple mounts with the same parent/mountpoint to be hashed at the same time. * struct mount changes mnt_umounting is gone; mnt_slave_list/mnt_slave is an hlist now; overmounts are kept track of by explicit pointer in mount; a bunch of flags moved out of mnt_flags to a new field, with only namespace_sem for protection; mnt_expiry is protected by mount_lock now (instead of namespace_sem); MNT_LOCKED is used only for mounts that need to remain attached to their parents to prevent mountpoint exposure - no more overloading it for absolute roots; all mnt_list uses are transient now - it's used only to represent temporary sets during umount_tree(). * mount refcounting change children no longer pin parents for any mounts, whether they'd passed through umount_tree() or not. * struct mountpoint changes refcount is no more; what matters is ->m_list emptiness; instead of temporary bumping the refcount, we insert a new object (pinned_mountpoint) into ->m_list; new calling conventions for lock_mount() and friends. * do_move_mount()/attach_recursive_mnt() seriously cleaned up. * globals in fs/pnode.c are gone. * propagate_mnt(), change_mnt_propagation() and propagate_umount() cleaned up (in the last case - pretty much completely rewritten). * freeing of emptied mnt_namespace is done in namespace_unlock() for one thing, there are subtle ordering requirements there; for another it simplifies cleanups. * assorted cleanups. * restore the machinery for long-term mounts from accumulated bitrot. This is going to get a followup come next cycle, when #work.fs_context with its change of vfs_fs_parse_string() calling conventions goes into -next. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQQqUNBr3gm4hGXdBJlZ7Krx/gZQ6wUCaIR2dQAKCRBZ7Krx/gZQ 6/SzAP4x+Fjjc5Tm2UNgGW5dptDY5s9O5RuFauo1MM6rcrekagEApTarcMlPnZvC mj1TVJFNfdVhZyTXnz5ocHhGX1udmgU= =qT69 -----END PGP SIGNATURE----- Merge tag 'pull-mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull vfs mount updates from Al Viro: - mount hash conflicts rudiments are gone now - we do not allow multiple mounts with the same parent/mountpoint to be hashed at the same time. - 'struct mount' changes: - mnt_umounting is gone - mnt_slave_list/mnt_slave is an hlist now - overmounts are kept track of by explicit pointer in mount - a bunch of flags moved out of mnt_flags to a new field, with only namespace_sem for protection - mnt_expiry is protected by mount_lock now (instead of namespace_sem) - MNT_LOCKED is used only for mounts that need to remain attached to their parents to prevent mountpoint exposure - no more overloading it for absolute roots - all mnt_list uses are transient now - it's used only to represent temporary sets during umount_tree() - mount refcounting change: children no longer pin parents for any mounts, whether they'd passed through umount_tree() or not - 'struct mountpoint' changes: - refcount is no more; what matters is ->m_list emptiness - instead of temporary bumping the refcount, we insert a new object (pinned_mountpoint) into ->m_list - new calling conventions for lock_mount() and friends - do_move_mount()/attach_recursive_mnt() seriously cleaned up - globals in fs/pnode.c are gone - propagate_mnt(), change_mnt_propagation() and propagate_umount() cleaned up (in the last case - pretty much completely rewritten). - freeing of emptied mnt_namespace is done in namespace_unlock(). For one thing, there are subtle ordering requirements there; for another it simplifies cleanups. - assorted cleanups - restore the machinery for long-term mounts from accumulated bitrot. This is going to get a followup come next cycle, when the change of vfs_fs_parse_string() calling conventions goes into -next * tag 'pull-mount' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (48 commits) statmount_mnt_basic(): simplify the logics for group id invent_group_ids(): zero ->mnt_group_id always implies !IS_MNT_SHARED() get rid of CL_SHARE_TO_SLAVE take freeing of emptied mnt_namespace to namespace_unlock() copy_tree(): don't link the mounts via mnt_list change_mnt_propagation(): move ->mnt_master assignment into MS_SLAVE case mnt_slave_list/mnt_slave: turn into hlist_head/hlist_node turn do_make_slave() into transfer_propagation() do_make_slave(): choose new master sanely change_mnt_propagation(): do_make_slave() is a no-op unless IS_MNT_SHARED() change_mnt_propagation() cleanups, step 1 propagate_mnt(): fix comment and convert to kernel-doc, while we are at it propagate_mnt(): get rid of last_dest fs/pnode.c: get rid of globals propagate_one(): fold into the sole caller propagate_one(): separate the "what should be the master for this copy" part propagate_one(): separate the "do we need secondary here?" logics propagate_mnt(): handle all peer groups in the same loop propagate_one(): get rid of dest_master mount: separate the flags accessed only under namespace_sem ...
This commit is contained in:
commit
794cbac9c0
484
Documentation/filesystems/propagate_umount.txt
Normal file
484
Documentation/filesystems/propagate_umount.txt
Normal file
@ -0,0 +1,484 @@
|
|||||||
|
Notes on propagate_umount()
|
||||||
|
|
||||||
|
Umount propagation starts with a set of mounts we are already going to
|
||||||
|
take out. Ideally, we would like to add all downstream cognates to
|
||||||
|
that set - anything with the same mountpoint as one of the removed
|
||||||
|
mounts and with parent that would receive events from the parent of that
|
||||||
|
mount. However, there are some constraints the resulting set must
|
||||||
|
satisfy.
|
||||||
|
|
||||||
|
It is convenient to define several properties of sets of mounts:
|
||||||
|
|
||||||
|
1) A set S of mounts is non-shifting if for any mount X belonging
|
||||||
|
to S all subtrees mounted strictly inside of X (i.e. not overmounting
|
||||||
|
the root of X) contain only elements of S.
|
||||||
|
|
||||||
|
2) A set S is non-revealing if all locked mounts that belong to S have
|
||||||
|
parents that also belong to S.
|
||||||
|
|
||||||
|
3) A set S is closed if it contains all children of its elements.
|
||||||
|
|
||||||
|
The set of mounts taken out by umount(2) must be non-shifting and
|
||||||
|
non-revealing; the first constraint is what allows to reparent
|
||||||
|
any remaining mounts and the second is what prevents the exposure
|
||||||
|
of any concealed mountpoints.
|
||||||
|
|
||||||
|
propagate_umount() takes the original set as an argument and tries to
|
||||||
|
extend that set. The original set is a full subtree and its root is
|
||||||
|
unlocked; what matters is that it's closed and non-revealing.
|
||||||
|
Resulting set may not be closed; there might still be mounts outside
|
||||||
|
of that set, but only on top of stacks of root-overmounting elements
|
||||||
|
of set. They can be reparented to the place where the bottom of
|
||||||
|
stack is attached to a mount that will survive. NOTE: doing that
|
||||||
|
will violate a constraint on having no more than one mount with
|
||||||
|
the same parent/mountpoint pair; however, the caller (umount_tree())
|
||||||
|
will immediately remedy that - it may keep unmounted element attached
|
||||||
|
to parent, but only if the parent itself is unmounted. Since all
|
||||||
|
conflicts created by reparenting have common parent *not* in the
|
||||||
|
set and one side of the conflict (bottom of the stack of overmounts)
|
||||||
|
is in the set, it will be resolved. However, we rely upon umount_tree()
|
||||||
|
doing that pretty much immediately after the call of propagate_umount().
|
||||||
|
|
||||||
|
Algorithm is based on two statements:
|
||||||
|
1) for any set S, there is a maximal non-shifting subset of S
|
||||||
|
and it can be calculated in O(#S) time.
|
||||||
|
2) for any non-shifting set S, there is a maximal non-revealing
|
||||||
|
subset of S. That subset is also non-shifting and it can be calculated
|
||||||
|
in O(#S) time.
|
||||||
|
|
||||||
|
Finding candidates.
|
||||||
|
|
||||||
|
We are given a closed set U and we want to find all mounts that have
|
||||||
|
the same mountpoint as some mount m in U *and* whose parent receives
|
||||||
|
propagation from the parent of the same mount m. Naive implementation
|
||||||
|
would be
|
||||||
|
S = {}
|
||||||
|
for each m in U
|
||||||
|
add m to S
|
||||||
|
p = parent(m)
|
||||||
|
for each q in Propagation(p) - {p}
|
||||||
|
child = look_up(q, mountpoint(m))
|
||||||
|
if child
|
||||||
|
add child to S
|
||||||
|
but that can lead to excessive work - there might be propagation among the
|
||||||
|
subtrees of U, in which case we'd end up examining the same candidates
|
||||||
|
many times. Since propagation is transitive, the same will happen to
|
||||||
|
everything downstream of that candidate and it's not hard to construct
|
||||||
|
cases where the approach above leads to the time quadratic by the actual
|
||||||
|
number of candidates.
|
||||||
|
|
||||||
|
Note that if we run into a candidate we'd already seen, it must've been
|
||||||
|
added on an earlier iteration of the outer loop - all additions made
|
||||||
|
during one iteration of the outer loop have different parents. So
|
||||||
|
if we find a child already added to the set, we know that everything
|
||||||
|
in Propagation(parent(child)) with the same mountpoint has been already
|
||||||
|
added.
|
||||||
|
S = {}
|
||||||
|
for each m in U
|
||||||
|
if m in S
|
||||||
|
continue
|
||||||
|
add m to S
|
||||||
|
p = parent(m)
|
||||||
|
q = propagation_next(p, p)
|
||||||
|
while q
|
||||||
|
child = look_up(q, mountpoint(m))
|
||||||
|
if child
|
||||||
|
if child in S
|
||||||
|
q = skip_them(q, p)
|
||||||
|
continue;
|
||||||
|
add child to S
|
||||||
|
q = propagation_next(q, p)
|
||||||
|
where
|
||||||
|
skip_them(q, p)
|
||||||
|
keep walking Propagation(p) from q until we find something
|
||||||
|
not in Propagation(q)
|
||||||
|
|
||||||
|
would get rid of that problem, but we need a sane implementation of
|
||||||
|
skip_them(). That's not hard to do - split propagation_next() into
|
||||||
|
"down into mnt_slave_list" and "forward-and-up" parts, with the
|
||||||
|
skip_them() being "repeat the forward-and-up part until we get NULL
|
||||||
|
or something that isn't a peer of the one we are skipping".
|
||||||
|
|
||||||
|
Note that there can be no absolute roots among the extra candidates -
|
||||||
|
they all come from mount lookups. Absolute root among the original
|
||||||
|
set is _currently_ impossible, but it might be worth protecting
|
||||||
|
against.
|
||||||
|
|
||||||
|
Maximal non-shifting subsets.
|
||||||
|
|
||||||
|
Let's call a mount m in a set S forbidden in that set if there is a
|
||||||
|
subtree mounted strictly inside m and containing mounts that do not
|
||||||
|
belong to S.
|
||||||
|
|
||||||
|
The set is non-shifting when none of its elements are forbidden in it.
|
||||||
|
|
||||||
|
If mount m is forbidden in a set S, it is forbidden in any subset S' it
|
||||||
|
belongs to. In other words, it can't belong to any of the non-shifting
|
||||||
|
subsets of S. If we had a way to find a forbidden mount or show that
|
||||||
|
there's none, we could use it to find the maximal non-shifting subset
|
||||||
|
simply by finding and removing them until none remain.
|
||||||
|
|
||||||
|
Suppose mount m is forbidden in S; then any mounts forbidden in S - {m}
|
||||||
|
must have been forbidden in S itself. Indeed, since m has descendents
|
||||||
|
that do not belong to S, any subtree that fits into S will fit into
|
||||||
|
S - {m} as well.
|
||||||
|
|
||||||
|
So in principle we could go through elements of S, checking if they
|
||||||
|
are forbidden in S and removing the ones that are. Removals will
|
||||||
|
not invalidate the checks done for earlier mounts - if they were not
|
||||||
|
forbidden at the time we checked, they won't become forbidden later.
|
||||||
|
It's too costly to be practical, but there is a similar approach that
|
||||||
|
is linear by size of S.
|
||||||
|
|
||||||
|
Let's say that mount x in a set S is forbidden by mount y, if
|
||||||
|
* both x and y belong to S.
|
||||||
|
* there is a chain of mounts starting at x and leaving S
|
||||||
|
immediately after passing through y, with the first
|
||||||
|
mountpoint strictly inside x.
|
||||||
|
Note 1: x may be equal to y - that's the case when something not
|
||||||
|
belonging to S is mounted strictly inside x.
|
||||||
|
Note 2: if y does not belong to S, it can't forbid anything in S.
|
||||||
|
Note 3: if y has no children outside of S, it can't forbid anything in S.
|
||||||
|
|
||||||
|
It's easy to show that mount x is forbidden in S if and only if x is
|
||||||
|
forbidden in S by some mount y. And it's easy to find all mounts in S
|
||||||
|
forbidden by a given mount.
|
||||||
|
|
||||||
|
Consider the following operation:
|
||||||
|
Trim(S, m) = S - {x : x is forbidden by m in S}
|
||||||
|
|
||||||
|
Note that if m does not belong to S or has no children outside of S we
|
||||||
|
are guaranteed that Trim(S, m) is equal to S.
|
||||||
|
|
||||||
|
The following is true: if x is forbidden by y in Trim(S, m), it was
|
||||||
|
already forbidden by y in S.
|
||||||
|
|
||||||
|
Proof: Suppose x is forbidden by y in Trim(S, m). Then there is a
|
||||||
|
chain of mounts (x_0 = x, ..., x_k = y, x_{k+1} = r), such that x_{k+1}
|
||||||
|
is the first element that doesn't belong to Trim(S, m) and the
|
||||||
|
mountpoint of x_1 is strictly inside x. If mount r belongs to S, it must
|
||||||
|
have been removed by Trim(S, m), i.e. it was forbidden in S by m.
|
||||||
|
Then there was a mount chain from r to some child of m that stayed in
|
||||||
|
S all the way until m, but that's impossible since x belongs to Trim(S, m)
|
||||||
|
and prepending (x_0, ..., x_k) to that chain demonstrates that x is also
|
||||||
|
forbidden in S by m, and thus can't belong to Trim(S, m).
|
||||||
|
Therefore r can not belong to S and our chain demonstrates that
|
||||||
|
x is forbidden by y in S. QED.
|
||||||
|
|
||||||
|
Corollary: no mount is forbidden by m in Trim(S, m). Indeed, any
|
||||||
|
such mount would have been forbidden by m in S and thus would have been
|
||||||
|
in the part of S removed in Trim(S, m).
|
||||||
|
|
||||||
|
Corollary: no mount is forbidden by m in Trim(Trim(S, m), n). Indeed,
|
||||||
|
any such would have to have been forbidden by m in Trim(S, m), which
|
||||||
|
is impossible.
|
||||||
|
|
||||||
|
Corollary: after
|
||||||
|
S = Trim(S, x_1)
|
||||||
|
S = Trim(S, x_2)
|
||||||
|
...
|
||||||
|
S = Trim(S, x_k)
|
||||||
|
no mount remaining in S will be forbidden by either of x_1,...,x_k.
|
||||||
|
|
||||||
|
The following will reduce S to its maximal non-shifting subset:
|
||||||
|
visited = {}
|
||||||
|
while S contains elements not belonging to visited
|
||||||
|
let m be an arbitrary such element of S
|
||||||
|
S = Trim(S, m)
|
||||||
|
add m to visited
|
||||||
|
|
||||||
|
S never grows, so the number of elements of S not belonging to visited
|
||||||
|
decreases at least by one on each iteration. When the loop terminates,
|
||||||
|
all mounts remaining in S belong to visited. It's easy to see that at
|
||||||
|
the beginning of each iteration no mount remaining in S will be forbidden
|
||||||
|
by any element of visited. In other words, no mount remaining in S will
|
||||||
|
be forbidden, i.e. final value of S will be non-shifting. It will be
|
||||||
|
the maximal non-shifting subset, since we were removing only forbidden
|
||||||
|
elements.
|
||||||
|
|
||||||
|
There are two difficulties in implementing the above in linear
|
||||||
|
time, both due to the fact that Trim() might need to remove more than one
|
||||||
|
element. Naive implementation of Trim() is vulnerable to running into a
|
||||||
|
long chain of mounts, each mounted on top of parent's root. Nothing in
|
||||||
|
that chain is forbidden, so nothing gets removed from it. We need to
|
||||||
|
recognize such chains and avoid walking them again on subsequent calls of
|
||||||
|
Trim(), otherwise we will end up with worst-case time being quadratic by
|
||||||
|
the number of elements in S. Another difficulty is in implementing the
|
||||||
|
outer loop - we need to iterate through all elements of a shrinking set.
|
||||||
|
That would be trivial if we never removed more than one element at a time
|
||||||
|
(linked list, with list_for_each_entry_safe for iterator), but we may
|
||||||
|
need to remove more than one entry, possibly including the ones we have
|
||||||
|
already visited.
|
||||||
|
|
||||||
|
Let's start with naive algorithm for Trim():
|
||||||
|
|
||||||
|
Trim_one(m)
|
||||||
|
found = false
|
||||||
|
for each n in children(m)
|
||||||
|
if n not in S
|
||||||
|
found = true
|
||||||
|
if (mountpoint(n) != root(m))
|
||||||
|
remove m from S
|
||||||
|
break
|
||||||
|
if found
|
||||||
|
Trim_ancestors(m)
|
||||||
|
|
||||||
|
Trim_ancestors(m)
|
||||||
|
for (; parent(m) in S; m = parent(m)) {
|
||||||
|
if (mountpoint(m) != root(parent(m)))
|
||||||
|
remove parent(m) from S
|
||||||
|
}
|
||||||
|
|
||||||
|
If m belongs to S, Trim_one(m) will replace S with Trim(S, m).
|
||||||
|
Proof:
|
||||||
|
Consider the chains excluding elements from Trim(S, m). The last
|
||||||
|
two elements in such chain are m and some child of m that does not belong
|
||||||
|
to S. If m has no such children, Trim(S, m) is equal to S.
|
||||||
|
m itself is removed if and only if the chain has exactly two
|
||||||
|
elements, i.e. when the last element does not overmount the root of m.
|
||||||
|
In other words, that happens when m has a child not in S that does not
|
||||||
|
overmount the root of m.
|
||||||
|
All other elements to remove will be ancestors of m, such that
|
||||||
|
the entire descent chain from them to m is contained in S. Let
|
||||||
|
(x_0, x_1, ..., x_k = m) be the longest such chain. x_i needs to be
|
||||||
|
removed if and only if x_{i+1} does not overmount its root. It's easy
|
||||||
|
to see that Trim_ancestors(m) will iterate through that chain from
|
||||||
|
x_k to x_1 and that it will remove exactly the elements that need to be
|
||||||
|
removed.
|
||||||
|
|
||||||
|
Note that if the loop in Trim_ancestors() walks into an already
|
||||||
|
visited element, we are guaranteed that remaining iterations will see
|
||||||
|
only elements that had already been visited and remove none of them.
|
||||||
|
That's the weakness that makes it vulnerable to long chains of full
|
||||||
|
overmounts.
|
||||||
|
|
||||||
|
It's easy to deal with, if we can afford setting marks on
|
||||||
|
elements of S; we would mark all elements already visited by
|
||||||
|
Trim_ancestors() and have it bail out as soon as it sees an already
|
||||||
|
marked element.
|
||||||
|
|
||||||
|
The problems with iterating through the set can be dealt with in
|
||||||
|
several ways, depending upon the representation we choose for our set.
|
||||||
|
One useful observation is that we are given a closed subset in S - the
|
||||||
|
original set passed to propagate_umount(). Its elements can neither
|
||||||
|
forbid anything nor be forbidden by anything - all their descendents
|
||||||
|
belong to S, so they can not occur anywhere in any excluding chain.
|
||||||
|
In other words, the elements of that subset will remain in S until
|
||||||
|
the end and Trim_one(S, m) is a no-op for all m from that subset.
|
||||||
|
|
||||||
|
That suggests keeping S as a disjoint union of a closed set U
|
||||||
|
('will be unmounted, no matter what') and the set of all elements of
|
||||||
|
S that do not belong to U. That set ('candidates') is all we need
|
||||||
|
to iterate through. Let's represent it as a subset in a cyclic list,
|
||||||
|
consisting of all list elements that are marked as candidates (initially -
|
||||||
|
all of them). Then we could have Trim_ancestors() only remove the mark,
|
||||||
|
leaving the elements on the list. Then Trim_one() would never remove
|
||||||
|
anything other than its argument from the containing list, allowing to
|
||||||
|
use list_for_each_entry_safe() as iterator.
|
||||||
|
|
||||||
|
Assuming that representation we get the following:
|
||||||
|
|
||||||
|
list_for_each_entry_safe(m, ..., Candidates, ...)
|
||||||
|
Trim_one(m)
|
||||||
|
where
|
||||||
|
Trim_one(m)
|
||||||
|
if (m is not marked as a candidate)
|
||||||
|
strip the "seen by Trim_ancestors" mark from m
|
||||||
|
remove m from the Candidates list
|
||||||
|
return
|
||||||
|
|
||||||
|
remove_this = false
|
||||||
|
found = false
|
||||||
|
for each n in children(m)
|
||||||
|
if n not in S
|
||||||
|
found = true
|
||||||
|
if (mountpoint(n) != root(m))
|
||||||
|
remove_this = true
|
||||||
|
break
|
||||||
|
if found
|
||||||
|
Trim_ancestors(m)
|
||||||
|
if remove_this
|
||||||
|
strip the "seen by Trim_ancestors" mark from m
|
||||||
|
strip the "candidate" mark from m
|
||||||
|
remove m from the Candidate list
|
||||||
|
|
||||||
|
Trim_ancestors(m)
|
||||||
|
for (p = parent(m); p is marked as candidate ; m = p, p = parent(p)) {
|
||||||
|
if m is marked as seen by Trim_ancestors
|
||||||
|
return
|
||||||
|
mark m as seen by Trim_ancestors
|
||||||
|
if (mountpoint(m) != root(p))
|
||||||
|
strip the "candidate" mark from p
|
||||||
|
}
|
||||||
|
|
||||||
|
Terminating condition in the loop in Trim_ancestors() is correct,
|
||||||
|
since that that loop will never run into p belonging to U - p is always
|
||||||
|
an ancestor of argument of Trim_one() and since U is closed, the argument
|
||||||
|
of Trim_one() would also have to belong to U. But Trim_one() is never
|
||||||
|
called for elements of U. In other words, p belongs to S if and only
|
||||||
|
if it belongs to candidates.
|
||||||
|
|
||||||
|
Time complexity:
|
||||||
|
* we get no more than O(#S) calls of Trim_one()
|
||||||
|
* the loop over children in Trim_one() never looks at the same child
|
||||||
|
twice through all the calls.
|
||||||
|
* iterations of that loop for children in S are no more than O(#S)
|
||||||
|
in the worst case
|
||||||
|
* at most two children that are not elements of S are considered per
|
||||||
|
call of Trim_one().
|
||||||
|
* the loop in Trim_ancestors() sets its mark once per iteration and
|
||||||
|
no element of S has is set more than once.
|
||||||
|
|
||||||
|
In the end we may have some elements excluded from S by
|
||||||
|
Trim_ancestors() still stuck on the list. We could do a separate
|
||||||
|
loop removing them from the list (also no worse than O(#S) time),
|
||||||
|
but it's easier to leave that until the next phase - there we will
|
||||||
|
iterate through the candidates anyway.
|
||||||
|
|
||||||
|
The caller has already removed all elements of U from their parents'
|
||||||
|
lists of children, which means that checking if child belongs to S is
|
||||||
|
equivalent to checking if it's marked as a candidate; we'll never see
|
||||||
|
the elements of U in the loop over children in Trim_one().
|
||||||
|
|
||||||
|
What's more, if we see that children(m) is empty and m is not
|
||||||
|
locked, we can immediately move m into the committed subset (remove
|
||||||
|
from the parent's list of children, etc.). That's one fewer mount we'll
|
||||||
|
have to look into when we check the list of children of its parent *and*
|
||||||
|
when we get to building the non-revealing subset.
|
||||||
|
|
||||||
|
Maximal non-revealing subsets
|
||||||
|
|
||||||
|
If S is not a non-revealing subset, there is a locked element x in S
|
||||||
|
such that parent of x is not in S.
|
||||||
|
|
||||||
|
Obviously, no non-revealing subset of S may contain x. Removing such
|
||||||
|
elements one by one will obviously end with the maximal non-revealing
|
||||||
|
subset (possibly empty one). Note that removal of an element will
|
||||||
|
require removal of all its locked children, etc.
|
||||||
|
|
||||||
|
If the set had been non-shifting, it will remain non-shifting after
|
||||||
|
such removals.
|
||||||
|
Proof: suppose S was non-shifting, x is a locked element of S, parent of x
|
||||||
|
is not in S and S - {x} is not non-shifting. Then there is an element m
|
||||||
|
in S - {x} and a subtree mounted strictly inside m, such that m contains
|
||||||
|
an element not in in S - {x}. Since S is non-shifting, everything in
|
||||||
|
that subtree must belong to S. But that means that this subtree must
|
||||||
|
contain x somewhere *and* that parent of x either belongs that subtree
|
||||||
|
or is equal to m. Either way it must belong to S. Contradiction.
|
||||||
|
|
||||||
|
// same representation as for finding maximal non-shifting subsets:
|
||||||
|
// S is a disjoint union of a non-revealing set U (the ones we are committed
|
||||||
|
// to unmount) and a set of candidates, represented as a subset of list
|
||||||
|
// elements that have "is a candidate" mark on them.
|
||||||
|
// Elements of U are removed from their parents' lists of children.
|
||||||
|
// In the end candidates becomes empty and maximal non-revealing non-shifting
|
||||||
|
// subset of S is now in U
|
||||||
|
while (Candidates list is non-empty)
|
||||||
|
handle_locked(first(Candidates))
|
||||||
|
|
||||||
|
handle_locked(m)
|
||||||
|
if m is not marked as a candidate
|
||||||
|
strip the "seen by Trim_ancestors" mark from m
|
||||||
|
remove m from the list
|
||||||
|
return
|
||||||
|
cutoff = m
|
||||||
|
for (p = m; p in candidates; p = parent(p)) {
|
||||||
|
strip the "seen by Trim_ancestors" mark from p
|
||||||
|
strip the "candidate" mark from p
|
||||||
|
remove p from the Candidates list
|
||||||
|
if (!locked(p))
|
||||||
|
cutoff = parent(p)
|
||||||
|
}
|
||||||
|
if p in U
|
||||||
|
cutoff = p
|
||||||
|
while m != cutoff
|
||||||
|
remove m from children(parent(m))
|
||||||
|
add m to U
|
||||||
|
m = parent(m)
|
||||||
|
|
||||||
|
Let (x_0, ..., x_n = m) be the maximal chain of descent of m within S.
|
||||||
|
* If it contains some elements of U, let x_k be the last one of those.
|
||||||
|
Then union of U with {x_{k+1}, ..., x_n} is obviously non-revealing.
|
||||||
|
* otherwise if all its elements are locked, then none of {x_0, ..., x_n}
|
||||||
|
may be elements of a non-revealing subset of S.
|
||||||
|
* otherwise let x_k be the first unlocked element of the chain. Then none
|
||||||
|
of {x_0, ..., x_{k-1}} may be an element of a non-revealing subset of
|
||||||
|
S and union of U and {x_k, ..., x_n} is non-revealing.
|
||||||
|
|
||||||
|
handle_locked(m) finds which of these cases applies and adjusts Candidates
|
||||||
|
and U accordingly. U remains non-revealing, union of Candidates and
|
||||||
|
U still contains any non-revealing subset of S and after the call of
|
||||||
|
handle_locked(m) m is guaranteed to be not in Candidates list. So having
|
||||||
|
it called for each element of S would suffice to empty Candidates,
|
||||||
|
leaving U the maximal non-revealing subset of S.
|
||||||
|
|
||||||
|
However, handle_locked(m) is a no-op when m belongs to U, so it's enough
|
||||||
|
to have it called for elements of Candidates list until none remain.
|
||||||
|
|
||||||
|
Time complexity: number of calls of handle_locked() is limited by
|
||||||
|
#Candidates, each iteration of the first loop in handle_locked() removes
|
||||||
|
an element from the list, so their total number of executions is also
|
||||||
|
limited by #Candidates; number of iterations in the second loop is no
|
||||||
|
greater than the number of iterations of the first loop.
|
||||||
|
|
||||||
|
|
||||||
|
Reparenting
|
||||||
|
|
||||||
|
After we'd calculated the final set, we still need to deal with
|
||||||
|
reparenting - if an element of the final set has a child not in it,
|
||||||
|
we need to reparent such child.
|
||||||
|
|
||||||
|
Such children can only be root-overmounting (otherwise the set wouldn't
|
||||||
|
be non-shifting) and their parents can not belong to the original set,
|
||||||
|
since the original is guaranteed to be closed.
|
||||||
|
|
||||||
|
|
||||||
|
Putting all of that together
|
||||||
|
|
||||||
|
The plan is to
|
||||||
|
* find all candidates
|
||||||
|
* trim down to maximal non-shifting subset
|
||||||
|
* trim down to maximal non-revealing subset
|
||||||
|
* reparent anything that needs to be reparented
|
||||||
|
* return the resulting set to the caller
|
||||||
|
|
||||||
|
For the 2nd and 3rd steps we want to separate the set into growing
|
||||||
|
non-revealing subset, initially containing the original set ("U" in
|
||||||
|
terms of the pseudocode above) and everything we are still not sure about
|
||||||
|
("candidates"). It means that for the output of the 1st step we'd like
|
||||||
|
the extra candidates separated from the stuff already in the original set.
|
||||||
|
For the 4th step we would like the additions to U separate from the
|
||||||
|
original set.
|
||||||
|
|
||||||
|
So let's go for
|
||||||
|
* original set ("set"). Linkage via mnt_list
|
||||||
|
* undecided candidates ("candidates"). Subset of a list,
|
||||||
|
consisting of all its elements marked with a new flag (T_UMOUNT_CANDIDATE).
|
||||||
|
Initially all elements of the list will be marked that way; in the
|
||||||
|
end the list will become empty and no mounts will remain marked with
|
||||||
|
that flag.
|
||||||
|
* Reuse T_MARKED for "has been already seen by trim_ancestors()".
|
||||||
|
* anything in U that hadn't been in the original set - elements of
|
||||||
|
candidates will gradually be either discarded or moved there. In other
|
||||||
|
words, it's the candidates we have already decided to unmount. Its role
|
||||||
|
is reasonably close to the old "to_umount", so let's use that name.
|
||||||
|
Linkage via mnt_list.
|
||||||
|
|
||||||
|
For gather_candidates() we'll need to maintain both candidates (S -
|
||||||
|
set) and intersection of S with set. Use T_UMOUNT_CANDIDATE for
|
||||||
|
all elements we encounter, putting the ones not already in the original
|
||||||
|
set into the list of candidates. When we are done, strip that flag from
|
||||||
|
all elements of the original set. That gives a cheap way to check
|
||||||
|
if element belongs to S (in gather_candidates) and to candidates
|
||||||
|
itself (at later stages). Call that predicate is_candidate(); it would
|
||||||
|
be m->mnt_t_flags & T_UMOUNT_CANDIDATE.
|
||||||
|
|
||||||
|
All elements of the original set are marked with MNT_UMOUNT and we'll
|
||||||
|
need the same for elements added when joining the contents of to_umount
|
||||||
|
to set in the end. Let's set MNT_UMOUNT at the time we add an element
|
||||||
|
to to_umount; that's close to what the old 'umount_one' is doing, so
|
||||||
|
let's keep that name. It also gives us another predicate we need -
|
||||||
|
"belongs to union of set and to_umount"; will_be_unmounted() for now.
|
||||||
|
|
||||||
|
Removals from the candidates list should strip both T_MARKED and
|
||||||
|
T_UMOUNT_CANDIDATE; call it remove_from_candidates_list().
|
@ -5,16 +5,23 @@
|
|||||||
|
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
|
#include <linux/fs_context.h>
|
||||||
|
|
||||||
#include "i915_drv.h"
|
#include "i915_drv.h"
|
||||||
#include "i915_gemfs.h"
|
#include "i915_gemfs.h"
|
||||||
#include "i915_utils.h"
|
#include "i915_utils.h"
|
||||||
|
|
||||||
|
static int add_param(struct fs_context *fc, const char *key, const char *val)
|
||||||
|
{
|
||||||
|
return vfs_parse_fs_string(fc, key, val, strlen(val));
|
||||||
|
}
|
||||||
|
|
||||||
void i915_gemfs_init(struct drm_i915_private *i915)
|
void i915_gemfs_init(struct drm_i915_private *i915)
|
||||||
{
|
{
|
||||||
char huge_opt[] = "huge=within_size"; /* r/w */
|
|
||||||
struct file_system_type *type;
|
struct file_system_type *type;
|
||||||
|
struct fs_context *fc;
|
||||||
struct vfsmount *gemfs;
|
struct vfsmount *gemfs;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By creating our own shmemfs mountpoint, we can pass in
|
* By creating our own shmemfs mountpoint, we can pass in
|
||||||
@ -38,8 +45,16 @@ void i915_gemfs_init(struct drm_i915_private *i915)
|
|||||||
if (!type)
|
if (!type)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
|
fc = fs_context_for_mount(type, SB_KERNMOUNT);
|
||||||
if (IS_ERR(gemfs))
|
if (IS_ERR(fc))
|
||||||
|
goto err;
|
||||||
|
ret = add_param(fc, "source", "tmpfs");
|
||||||
|
if (!ret)
|
||||||
|
ret = add_param(fc, "huge", "within_size");
|
||||||
|
if (!ret)
|
||||||
|
gemfs = fc_mount_longterm(fc);
|
||||||
|
put_fs_context(fc);
|
||||||
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
i915->mm.gemfs = gemfs;
|
i915->mm.gemfs = gemfs;
|
||||||
|
@ -3,14 +3,21 @@
|
|||||||
|
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/mount.h>
|
#include <linux/mount.h>
|
||||||
|
#include <linux/fs_context.h>
|
||||||
|
|
||||||
#include "v3d_drv.h"
|
#include "v3d_drv.h"
|
||||||
|
|
||||||
|
static int add_param(struct fs_context *fc, const char *key, const char *val)
|
||||||
|
{
|
||||||
|
return vfs_parse_fs_string(fc, key, val, strlen(val));
|
||||||
|
}
|
||||||
|
|
||||||
void v3d_gemfs_init(struct v3d_dev *v3d)
|
void v3d_gemfs_init(struct v3d_dev *v3d)
|
||||||
{
|
{
|
||||||
char huge_opt[] = "huge=within_size";
|
|
||||||
struct file_system_type *type;
|
struct file_system_type *type;
|
||||||
|
struct fs_context *fc;
|
||||||
struct vfsmount *gemfs;
|
struct vfsmount *gemfs;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* By creating our own shmemfs mountpoint, we can pass in
|
* By creating our own shmemfs mountpoint, we can pass in
|
||||||
@ -28,8 +35,16 @@ void v3d_gemfs_init(struct v3d_dev *v3d)
|
|||||||
if (!type)
|
if (!type)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
|
fc = fs_context_for_mount(type, SB_KERNMOUNT);
|
||||||
if (IS_ERR(gemfs))
|
if (IS_ERR(fc))
|
||||||
|
goto err;
|
||||||
|
ret = add_param(fc, "source", "tmpfs");
|
||||||
|
if (!ret)
|
||||||
|
ret = add_param(fc, "huge", "within_size");
|
||||||
|
if (!ret)
|
||||||
|
gemfs = fc_mount_longterm(fc);
|
||||||
|
put_fs_context(fc);
|
||||||
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
v3d->gemfs = gemfs;
|
v3d->gemfs = gemfs;
|
||||||
|
@ -1588,7 +1588,7 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
|
|||||||
} else {
|
} else {
|
||||||
struct hugetlbfs_fs_context *ctx = fc->fs_private;
|
struct hugetlbfs_fs_context *ctx = fc->fs_private;
|
||||||
ctx->hstate = h;
|
ctx->hstate = h;
|
||||||
mnt = fc_mount(fc);
|
mnt = fc_mount_longterm(fc);
|
||||||
put_fs_context(fc);
|
put_fs_context(fc);
|
||||||
}
|
}
|
||||||
if (IS_ERR(mnt))
|
if (IS_ERR(mnt))
|
||||||
|
40
fs/mount.h
40
fs/mount.h
@ -44,7 +44,6 @@ struct mountpoint {
|
|||||||
struct hlist_node m_hash;
|
struct hlist_node m_hash;
|
||||||
struct dentry *m_dentry;
|
struct dentry *m_dentry;
|
||||||
struct hlist_head m_list;
|
struct hlist_head m_list;
|
||||||
int m_count;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mount {
|
struct mount {
|
||||||
@ -70,8 +69,8 @@ struct mount {
|
|||||||
struct list_head mnt_list;
|
struct list_head mnt_list;
|
||||||
struct list_head mnt_expire; /* link in fs-specific expiry list */
|
struct list_head mnt_expire; /* link in fs-specific expiry list */
|
||||||
struct list_head mnt_share; /* circular list of shared mounts */
|
struct list_head mnt_share; /* circular list of shared mounts */
|
||||||
struct list_head mnt_slave_list;/* list of slave mounts */
|
struct hlist_head mnt_slave_list;/* list of slave mounts */
|
||||||
struct list_head mnt_slave; /* slave list entry */
|
struct hlist_node mnt_slave; /* slave list entry */
|
||||||
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
|
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
|
||||||
struct mnt_namespace *mnt_ns; /* containing namespace */
|
struct mnt_namespace *mnt_ns; /* containing namespace */
|
||||||
struct mountpoint *mnt_mp; /* where is it mounted */
|
struct mountpoint *mnt_mp; /* where is it mounted */
|
||||||
@ -79,21 +78,38 @@ struct mount {
|
|||||||
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
|
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
|
||||||
struct hlist_node mnt_umount;
|
struct hlist_node mnt_umount;
|
||||||
};
|
};
|
||||||
struct list_head mnt_umounting; /* list entry for umount propagation */
|
|
||||||
#ifdef CONFIG_FSNOTIFY
|
#ifdef CONFIG_FSNOTIFY
|
||||||
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
|
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
|
||||||
__u32 mnt_fsnotify_mask;
|
__u32 mnt_fsnotify_mask;
|
||||||
struct list_head to_notify; /* need to queue notification */
|
struct list_head to_notify; /* need to queue notification */
|
||||||
struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
|
struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
|
||||||
#endif
|
#endif
|
||||||
|
int mnt_t_flags; /* namespace_sem-protected flags */
|
||||||
int mnt_id; /* mount identifier, reused */
|
int mnt_id; /* mount identifier, reused */
|
||||||
u64 mnt_id_unique; /* mount ID unique until reboot */
|
u64 mnt_id_unique; /* mount ID unique until reboot */
|
||||||
int mnt_group_id; /* peer group identifier */
|
int mnt_group_id; /* peer group identifier */
|
||||||
int mnt_expiry_mark; /* true if marked for expiry */
|
int mnt_expiry_mark; /* true if marked for expiry */
|
||||||
struct hlist_head mnt_pins;
|
struct hlist_head mnt_pins;
|
||||||
struct hlist_head mnt_stuck_children;
|
struct hlist_head mnt_stuck_children;
|
||||||
|
struct mount *overmount; /* mounted on ->mnt_root */
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
T_SHARED = 1, /* mount is shared */
|
||||||
|
T_UNBINDABLE = 2, /* mount is unbindable */
|
||||||
|
T_MARKED = 4, /* internal mark for propagate_... */
|
||||||
|
T_UMOUNT_CANDIDATE = 8, /* for propagate_umount */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* T_SHARED_MASK is the set of flags that should be cleared when a
|
||||||
|
* mount becomes shared. Currently, this is only the flag that says a
|
||||||
|
* mount cannot be bind mounted, since this is how we create a mount
|
||||||
|
* that shares events with another mount. If you add a new T_*
|
||||||
|
* flag, consider how it interacts with shared mounts.
|
||||||
|
*/
|
||||||
|
T_SHARED_MASK = T_UNBINDABLE,
|
||||||
|
};
|
||||||
|
|
||||||
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
|
#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
|
||||||
|
|
||||||
static inline struct mount *real_mount(struct vfsmount *mnt)
|
static inline struct mount *real_mount(struct vfsmount *mnt)
|
||||||
@ -101,7 +117,7 @@ static inline struct mount *real_mount(struct vfsmount *mnt)
|
|||||||
return container_of(mnt, struct mount, mnt);
|
return container_of(mnt, struct mount, mnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int mnt_has_parent(struct mount *mnt)
|
static inline int mnt_has_parent(const struct mount *mnt)
|
||||||
{
|
{
|
||||||
return mnt != mnt->mnt_parent;
|
return mnt != mnt->mnt_parent;
|
||||||
}
|
}
|
||||||
@ -146,8 +162,8 @@ struct proc_mounts {
|
|||||||
|
|
||||||
extern const struct seq_operations mounts_op;
|
extern const struct seq_operations mounts_op;
|
||||||
|
|
||||||
extern bool __is_local_mountpoint(struct dentry *dentry);
|
extern bool __is_local_mountpoint(const struct dentry *dentry);
|
||||||
static inline bool is_local_mountpoint(struct dentry *dentry)
|
static inline bool is_local_mountpoint(const struct dentry *dentry)
|
||||||
{
|
{
|
||||||
if (!d_mountpoint(dentry))
|
if (!d_mountpoint(dentry))
|
||||||
return false;
|
return false;
|
||||||
@ -160,6 +176,13 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
|
|||||||
return ns->seq == 0;
|
return ns->seq == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool anon_ns_root(const struct mount *m)
|
||||||
|
{
|
||||||
|
struct mnt_namespace *ns = READ_ONCE(m->mnt_ns);
|
||||||
|
|
||||||
|
return !IS_ERR_OR_NULL(ns) && is_anon_ns(ns) && m == ns->root;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool mnt_ns_attached(const struct mount *mnt)
|
static inline bool mnt_ns_attached(const struct mount *mnt)
|
||||||
{
|
{
|
||||||
return !RB_EMPTY_NODE(&mnt->mnt_node);
|
return !RB_EMPTY_NODE(&mnt->mnt_node);
|
||||||
@ -170,7 +193,7 @@ static inline bool mnt_ns_empty(const struct mnt_namespace *ns)
|
|||||||
return RB_EMPTY_ROOT(&ns->mounts);
|
return RB_EMPTY_ROOT(&ns->mounts);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
|
static inline void move_from_ns(struct mount *mnt)
|
||||||
{
|
{
|
||||||
struct mnt_namespace *ns = mnt->mnt_ns;
|
struct mnt_namespace *ns = mnt->mnt_ns;
|
||||||
WARN_ON(!mnt_ns_attached(mnt));
|
WARN_ON(!mnt_ns_attached(mnt));
|
||||||
@ -180,7 +203,6 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
|
|||||||
ns->mnt_first_node = rb_next(&mnt->mnt_node);
|
ns->mnt_first_node = rb_next(&mnt->mnt_node);
|
||||||
rb_erase(&mnt->mnt_node, &ns->mounts);
|
rb_erase(&mnt->mnt_node, &ns->mounts);
|
||||||
RB_CLEAR_NODE(&mnt->mnt_node);
|
RB_CLEAR_NODE(&mnt->mnt_node);
|
||||||
list_add_tail(&mnt->mnt_list, dt_list);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
|
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
|
||||||
|
711
fs/namespace.c
711
fs/namespace.c
File diff suppressed because it is too large
Load Diff
723
fs/pnode.c
723
fs/pnode.c
@ -21,17 +21,12 @@ static inline struct mount *next_peer(struct mount *p)
|
|||||||
|
|
||||||
static inline struct mount *first_slave(struct mount *p)
|
static inline struct mount *first_slave(struct mount *p)
|
||||||
{
|
{
|
||||||
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
|
return hlist_entry(p->mnt_slave_list.first, struct mount, mnt_slave);
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct mount *last_slave(struct mount *p)
|
|
||||||
{
|
|
||||||
return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct mount *next_slave(struct mount *p)
|
static inline struct mount *next_slave(struct mount *p)
|
||||||
{
|
{
|
||||||
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
|
return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mount *get_peer_under_root(struct mount *mnt,
|
static struct mount *get_peer_under_root(struct mount *mnt,
|
||||||
@ -70,69 +65,90 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_make_slave(struct mount *mnt)
|
static inline bool will_be_unmounted(struct mount *m)
|
||||||
{
|
{
|
||||||
struct mount *master, *slave_mnt;
|
return m->mnt.mnt_flags & MNT_UMOUNT;
|
||||||
|
}
|
||||||
|
|
||||||
if (list_empty(&mnt->mnt_share)) {
|
static struct mount *propagation_source(struct mount *mnt)
|
||||||
if (IS_MNT_SHARED(mnt)) {
|
{
|
||||||
mnt_release_group_id(mnt);
|
do {
|
||||||
CLEAR_MNT_SHARED(mnt);
|
|
||||||
}
|
|
||||||
master = mnt->mnt_master;
|
|
||||||
if (!master) {
|
|
||||||
struct list_head *p = &mnt->mnt_slave_list;
|
|
||||||
while (!list_empty(p)) {
|
|
||||||
slave_mnt = list_first_entry(p,
|
|
||||||
struct mount, mnt_slave);
|
|
||||||
list_del_init(&slave_mnt->mnt_slave);
|
|
||||||
slave_mnt->mnt_master = NULL;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
struct mount *m;
|
struct mount *m;
|
||||||
/*
|
for (m = next_peer(mnt); m != mnt; m = next_peer(m)) {
|
||||||
* slave 'mnt' to a peer mount that has the
|
if (!will_be_unmounted(m))
|
||||||
* same root dentry. If none is available then
|
return m;
|
||||||
* slave it to anything that is available.
|
|
||||||
*/
|
|
||||||
for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
|
|
||||||
if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
|
|
||||||
master = m;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
list_del_init(&mnt->mnt_share);
|
mnt = mnt->mnt_master;
|
||||||
mnt->mnt_group_id = 0;
|
} while (mnt && will_be_unmounted(mnt));
|
||||||
CLEAR_MNT_SHARED(mnt);
|
return mnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void transfer_propagation(struct mount *mnt, struct mount *to)
|
||||||
|
{
|
||||||
|
struct hlist_node *p = NULL, *n;
|
||||||
|
struct mount *m;
|
||||||
|
|
||||||
|
hlist_for_each_entry_safe(m, n, &mnt->mnt_slave_list, mnt_slave) {
|
||||||
|
m->mnt_master = to;
|
||||||
|
if (!to)
|
||||||
|
hlist_del_init(&m->mnt_slave);
|
||||||
|
else
|
||||||
|
p = &m->mnt_slave;
|
||||||
}
|
}
|
||||||
list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
|
if (p)
|
||||||
slave_mnt->mnt_master = master;
|
hlist_splice_init(&mnt->mnt_slave_list, p, &to->mnt_slave_list);
|
||||||
list_move(&mnt->mnt_slave, &master->mnt_slave_list);
|
|
||||||
list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
|
|
||||||
INIT_LIST_HEAD(&mnt->mnt_slave_list);
|
|
||||||
mnt->mnt_master = master;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vfsmount lock must be held for write
|
* EXCL[namespace_sem]
|
||||||
*/
|
*/
|
||||||
void change_mnt_propagation(struct mount *mnt, int type)
|
void change_mnt_propagation(struct mount *mnt, int type)
|
||||||
{
|
{
|
||||||
|
struct mount *m = mnt->mnt_master;
|
||||||
|
|
||||||
if (type == MS_SHARED) {
|
if (type == MS_SHARED) {
|
||||||
set_mnt_shared(mnt);
|
set_mnt_shared(mnt);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
do_make_slave(mnt);
|
if (IS_MNT_SHARED(mnt)) {
|
||||||
if (type != MS_SLAVE) {
|
m = propagation_source(mnt);
|
||||||
list_del_init(&mnt->mnt_slave);
|
if (list_empty(&mnt->mnt_share)) {
|
||||||
|
mnt_release_group_id(mnt);
|
||||||
|
} else {
|
||||||
|
list_del_init(&mnt->mnt_share);
|
||||||
|
mnt->mnt_group_id = 0;
|
||||||
|
}
|
||||||
|
CLEAR_MNT_SHARED(mnt);
|
||||||
|
transfer_propagation(mnt, m);
|
||||||
|
}
|
||||||
|
hlist_del_init(&mnt->mnt_slave);
|
||||||
|
if (type == MS_SLAVE) {
|
||||||
|
mnt->mnt_master = m;
|
||||||
|
if (m)
|
||||||
|
hlist_add_head(&mnt->mnt_slave, &m->mnt_slave_list);
|
||||||
|
} else {
|
||||||
mnt->mnt_master = NULL;
|
mnt->mnt_master = NULL;
|
||||||
if (type == MS_UNBINDABLE)
|
if (type == MS_UNBINDABLE)
|
||||||
mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
|
mnt->mnt_t_flags |= T_UNBINDABLE;
|
||||||
else
|
else
|
||||||
mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
|
mnt->mnt_t_flags &= ~T_UNBINDABLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct mount *__propagation_next(struct mount *m,
|
||||||
|
struct mount *origin)
|
||||||
|
{
|
||||||
|
while (1) {
|
||||||
|
struct mount *master = m->mnt_master;
|
||||||
|
|
||||||
|
if (master == origin->mnt_master) {
|
||||||
|
struct mount *next = next_peer(m);
|
||||||
|
return (next == origin) ? NULL : next;
|
||||||
|
} else if (m->mnt_slave.next)
|
||||||
|
return next_slave(m);
|
||||||
|
|
||||||
|
/* back at master */
|
||||||
|
m = master;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,34 +166,24 @@ static struct mount *propagation_next(struct mount *m,
|
|||||||
struct mount *origin)
|
struct mount *origin)
|
||||||
{
|
{
|
||||||
/* are there any slaves of this mount? */
|
/* are there any slaves of this mount? */
|
||||||
if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
|
if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
|
||||||
return first_slave(m);
|
return first_slave(m);
|
||||||
|
|
||||||
while (1) {
|
return __propagation_next(m, origin);
|
||||||
struct mount *master = m->mnt_master;
|
|
||||||
|
|
||||||
if (master == origin->mnt_master) {
|
|
||||||
struct mount *next = next_peer(m);
|
|
||||||
return (next == origin) ? NULL : next;
|
|
||||||
} else if (m->mnt_slave.next != &master->mnt_slave_list)
|
|
||||||
return next_slave(m);
|
|
||||||
|
|
||||||
/* back at master */
|
|
||||||
m = master;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mount *skip_propagation_subtree(struct mount *m,
|
static struct mount *skip_propagation_subtree(struct mount *m,
|
||||||
struct mount *origin)
|
struct mount *origin)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Advance m such that propagation_next will not return
|
* Advance m past everything that gets propagation from it.
|
||||||
* the slaves of m.
|
|
||||||
*/
|
*/
|
||||||
if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
|
struct mount *p = __propagation_next(m, origin);
|
||||||
m = last_slave(m);
|
|
||||||
|
|
||||||
return m;
|
while (p && peers(m, p))
|
||||||
|
p = __propagation_next(p, origin);
|
||||||
|
|
||||||
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mount *next_group(struct mount *m, struct mount *origin)
|
static struct mount *next_group(struct mount *m, struct mount *origin)
|
||||||
@ -185,7 +191,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
|
|||||||
while (1) {
|
while (1) {
|
||||||
while (1) {
|
while (1) {
|
||||||
struct mount *next;
|
struct mount *next;
|
||||||
if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
|
if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
|
||||||
return first_slave(m);
|
return first_slave(m);
|
||||||
next = next_peer(m);
|
next = next_peer(m);
|
||||||
if (m->mnt_group_id == origin->mnt_group_id) {
|
if (m->mnt_group_id == origin->mnt_group_id) {
|
||||||
@ -198,7 +204,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
|
|||||||
/* m is the last peer */
|
/* m is the last peer */
|
||||||
while (1) {
|
while (1) {
|
||||||
struct mount *master = m->mnt_master;
|
struct mount *master = m->mnt_master;
|
||||||
if (m->mnt_slave.next != &master->mnt_slave_list)
|
if (m->mnt_slave.next)
|
||||||
return next_slave(m);
|
return next_slave(m);
|
||||||
m = next_peer(master);
|
m = next_peer(master);
|
||||||
if (master->mnt_group_id == origin->mnt_group_id)
|
if (master->mnt_group_id == origin->mnt_group_id)
|
||||||
@ -212,142 +218,113 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* all accesses are serialized by namespace_sem */
|
static bool need_secondary(struct mount *m, struct mountpoint *dest_mp)
|
||||||
static struct mount *last_dest, *first_source, *last_source, *dest_master;
|
|
||||||
static struct hlist_head *list;
|
|
||||||
|
|
||||||
static inline bool peers(const struct mount *m1, const struct mount *m2)
|
|
||||||
{
|
{
|
||||||
return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
|
|
||||||
{
|
|
||||||
struct mount *child;
|
|
||||||
int type;
|
|
||||||
/* skip ones added by this propagate_mnt() */
|
/* skip ones added by this propagate_mnt() */
|
||||||
if (IS_MNT_NEW(m))
|
if (IS_MNT_NEW(m))
|
||||||
return 0;
|
return false;
|
||||||
/* skip if mountpoint isn't visible in m */
|
/* skip if mountpoint isn't visible in m */
|
||||||
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
|
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
|
||||||
return 0;
|
return false;
|
||||||
/* skip if m is in the anon_ns */
|
/* skip if m is in the anon_ns */
|
||||||
if (is_anon_ns(m->mnt_ns))
|
if (is_anon_ns(m->mnt_ns))
|
||||||
return 0;
|
return false;
|
||||||
|
return true;
|
||||||
if (peers(m, last_dest)) {
|
|
||||||
type = CL_MAKE_SHARED;
|
|
||||||
} else {
|
|
||||||
struct mount *n, *p;
|
|
||||||
bool done;
|
|
||||||
for (n = m; ; n = p) {
|
|
||||||
p = n->mnt_master;
|
|
||||||
if (p == dest_master || IS_MNT_MARKED(p))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
do {
|
|
||||||
struct mount *parent = last_source->mnt_parent;
|
|
||||||
if (peers(last_source, first_source))
|
|
||||||
break;
|
|
||||||
done = parent->mnt_master == p;
|
|
||||||
if (done && peers(n, parent))
|
|
||||||
break;
|
|
||||||
last_source = last_source->mnt_master;
|
|
||||||
} while (!done);
|
|
||||||
|
|
||||||
type = CL_SLAVE;
|
|
||||||
/* beginning of peer group among the slaves? */
|
|
||||||
if (IS_MNT_SHARED(m))
|
|
||||||
type |= CL_MAKE_SHARED;
|
|
||||||
}
|
|
||||||
|
|
||||||
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
|
|
||||||
if (IS_ERR(child))
|
|
||||||
return PTR_ERR(child);
|
|
||||||
read_seqlock_excl(&mount_lock);
|
|
||||||
mnt_set_mountpoint(m, dest_mp, child);
|
|
||||||
if (m->mnt_master != dest_master)
|
|
||||||
SET_MNT_MARK(m->mnt_master);
|
|
||||||
read_sequnlock_excl(&mount_lock);
|
|
||||||
last_dest = m;
|
|
||||||
last_source = child;
|
|
||||||
hlist_add_head(&child->mnt_hash, list);
|
|
||||||
return count_mounts(m->mnt_ns, child);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static struct mount *find_master(struct mount *m,
|
||||||
* mount 'source_mnt' under the destination 'dest_mnt' at
|
struct mount *last_copy,
|
||||||
* dentry 'dest_dentry'. And propagate that mount to
|
struct mount *original)
|
||||||
* all the peer and slave mounts of 'dest_mnt'.
|
{
|
||||||
* Link all the new mounts into a propagation tree headed at
|
struct mount *p;
|
||||||
* source_mnt. Also link all the new mounts using ->mnt_list
|
|
||||||
* headed at source_mnt's ->mnt_list
|
// ascend until there's a copy for something with the same master
|
||||||
|
for (;;) {
|
||||||
|
p = m->mnt_master;
|
||||||
|
if (!p || IS_MNT_MARKED(p))
|
||||||
|
break;
|
||||||
|
m = p;
|
||||||
|
}
|
||||||
|
while (!peers(last_copy, original)) {
|
||||||
|
struct mount *parent = last_copy->mnt_parent;
|
||||||
|
if (parent->mnt_master == p) {
|
||||||
|
if (!peers(parent, m))
|
||||||
|
last_copy = last_copy->mnt_master;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
last_copy = last_copy->mnt_master;
|
||||||
|
}
|
||||||
|
return last_copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* propagate_mnt() - create secondary copies for tree attachment
|
||||||
|
* @dest_mnt: destination mount.
|
||||||
|
* @dest_mp: destination mountpoint.
|
||||||
|
* @source_mnt: source mount.
|
||||||
|
* @tree_list: list of secondaries to be attached.
|
||||||
*
|
*
|
||||||
* @dest_mnt: destination mount.
|
* Create secondary copies for attaching a tree with root @source_mnt
|
||||||
* @dest_dentry: destination dentry.
|
* at mount @dest_mnt with mountpoint @dest_mp. Link all new mounts
|
||||||
* @source_mnt: source mount.
|
* into a propagation graph. Set mountpoints for all secondaries,
|
||||||
* @tree_list : list of heads of trees to be attached.
|
* link their roots into @tree_list via ->mnt_hash.
|
||||||
*/
|
*/
|
||||||
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
|
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
|
||||||
struct mount *source_mnt, struct hlist_head *tree_list)
|
struct mount *source_mnt, struct hlist_head *tree_list)
|
||||||
{
|
{
|
||||||
struct mount *m, *n;
|
struct mount *m, *n, *copy, *this;
|
||||||
int ret = 0;
|
int err = 0, type;
|
||||||
|
|
||||||
/*
|
if (dest_mnt->mnt_master)
|
||||||
* we don't want to bother passing tons of arguments to
|
SET_MNT_MARK(dest_mnt->mnt_master);
|
||||||
* propagate_one(); everything is serialized by namespace_sem,
|
|
||||||
* so globals will do just fine.
|
|
||||||
*/
|
|
||||||
last_dest = dest_mnt;
|
|
||||||
first_source = source_mnt;
|
|
||||||
last_source = source_mnt;
|
|
||||||
list = tree_list;
|
|
||||||
dest_master = dest_mnt->mnt_master;
|
|
||||||
|
|
||||||
/* all peers of dest_mnt, except dest_mnt itself */
|
/* iterate over peer groups, depth first */
|
||||||
for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
|
for (m = dest_mnt; m && !err; m = next_group(m, dest_mnt)) {
|
||||||
ret = propagate_one(n, dest_mp);
|
if (m == dest_mnt) { // have one for dest_mnt itself
|
||||||
if (ret)
|
copy = source_mnt;
|
||||||
goto out;
|
type = CL_MAKE_SHARED;
|
||||||
}
|
n = next_peer(m);
|
||||||
|
if (n == m)
|
||||||
/* all slave groups */
|
continue;
|
||||||
for (m = next_group(dest_mnt, dest_mnt); m;
|
} else {
|
||||||
m = next_group(m, dest_mnt)) {
|
type = CL_SLAVE;
|
||||||
/* everything in that slave group */
|
/* beginning of peer group among the slaves? */
|
||||||
n = m;
|
if (IS_MNT_SHARED(m))
|
||||||
|
type |= CL_MAKE_SHARED;
|
||||||
|
n = m;
|
||||||
|
}
|
||||||
do {
|
do {
|
||||||
ret = propagate_one(n, dest_mp);
|
if (!need_secondary(n, dest_mp))
|
||||||
if (ret)
|
continue;
|
||||||
goto out;
|
if (type & CL_SLAVE) // first in this peer group
|
||||||
n = next_peer(n);
|
copy = find_master(n, copy, source_mnt);
|
||||||
} while (n != m);
|
this = copy_tree(copy, copy->mnt.mnt_root, type);
|
||||||
|
if (IS_ERR(this)) {
|
||||||
|
err = PTR_ERR(this);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
read_seqlock_excl(&mount_lock);
|
||||||
|
mnt_set_mountpoint(n, dest_mp, this);
|
||||||
|
read_sequnlock_excl(&mount_lock);
|
||||||
|
if (n->mnt_master)
|
||||||
|
SET_MNT_MARK(n->mnt_master);
|
||||||
|
copy = this;
|
||||||
|
hlist_add_head(&this->mnt_hash, tree_list);
|
||||||
|
err = count_mounts(n->mnt_ns, this);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
type = CL_MAKE_SHARED;
|
||||||
|
} while ((n = next_peer(n)) != m);
|
||||||
}
|
}
|
||||||
out:
|
|
||||||
read_seqlock_excl(&mount_lock);
|
|
||||||
hlist_for_each_entry(n, tree_list, mnt_hash) {
|
hlist_for_each_entry(n, tree_list, mnt_hash) {
|
||||||
m = n->mnt_parent;
|
m = n->mnt_parent;
|
||||||
if (m->mnt_master != dest_mnt->mnt_master)
|
if (m->mnt_master)
|
||||||
CLEAR_MNT_MARK(m->mnt_master);
|
CLEAR_MNT_MARK(m->mnt_master);
|
||||||
}
|
}
|
||||||
read_sequnlock_excl(&mount_lock);
|
if (dest_mnt->mnt_master)
|
||||||
return ret;
|
CLEAR_MNT_MARK(dest_mnt->mnt_master);
|
||||||
}
|
return err;
|
||||||
|
|
||||||
static struct mount *find_topper(struct mount *mnt)
|
|
||||||
{
|
|
||||||
/* If there is exactly one mount covering mnt completely return it. */
|
|
||||||
struct mount *child;
|
|
||||||
|
|
||||||
if (!list_is_singular(&mnt->mnt_mounts))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
|
|
||||||
if (child->mnt_mountpoint != mnt->mnt.mnt_root)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return child;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -407,12 +384,8 @@ bool propagation_would_overmount(const struct mount *from,
|
|||||||
*/
|
*/
|
||||||
int propagate_mount_busy(struct mount *mnt, int refcnt)
|
int propagate_mount_busy(struct mount *mnt, int refcnt)
|
||||||
{
|
{
|
||||||
struct mount *m, *child, *topper;
|
|
||||||
struct mount *parent = mnt->mnt_parent;
|
struct mount *parent = mnt->mnt_parent;
|
||||||
|
|
||||||
if (mnt == parent)
|
|
||||||
return do_refcount_check(mnt, refcnt);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* quickly check if the current mount can be unmounted.
|
* quickly check if the current mount can be unmounted.
|
||||||
* If not, we don't have to go checking for all other
|
* If not, we don't have to go checking for all other
|
||||||
@ -421,23 +394,27 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
|
|||||||
if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
|
if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
for (m = propagation_next(parent, parent); m;
|
if (mnt == parent)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (struct mount *m = propagation_next(parent, parent); m;
|
||||||
m = propagation_next(m, parent)) {
|
m = propagation_next(m, parent)) {
|
||||||
int count = 1;
|
struct list_head *head;
|
||||||
child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
|
struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
|
||||||
|
|
||||||
if (!child)
|
if (!child)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Is there exactly one mount on the child that covers
|
head = &child->mnt_mounts;
|
||||||
* it completely whose reference should be ignored?
|
if (!list_empty(head)) {
|
||||||
*/
|
/*
|
||||||
topper = find_topper(child);
|
* a mount that covers child completely wouldn't prevent
|
||||||
if (topper)
|
* it being pulled out; any other would.
|
||||||
count += 1;
|
*/
|
||||||
else if (!list_empty(&child->mnt_mounts))
|
if (!list_is_singular(head) || !child->overmount)
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
if (do_refcount_check(child, count))
|
if (do_refcount_check(child, 1))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -463,181 +440,209 @@ void propagate_mount_unlock(struct mount *mnt)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void umount_one(struct mount *mnt, struct list_head *to_umount)
|
static inline bool is_candidate(struct mount *m)
|
||||||
{
|
{
|
||||||
CLEAR_MNT_MARK(mnt);
|
return m->mnt_t_flags & T_UMOUNT_CANDIDATE;
|
||||||
mnt->mnt.mnt_flags |= MNT_UMOUNT;
|
}
|
||||||
list_del_init(&mnt->mnt_child);
|
|
||||||
list_del_init(&mnt->mnt_umounting);
|
static void umount_one(struct mount *m, struct list_head *to_umount)
|
||||||
move_from_ns(mnt, to_umount);
|
{
|
||||||
|
m->mnt.mnt_flags |= MNT_UMOUNT;
|
||||||
|
list_del_init(&m->mnt_child);
|
||||||
|
move_from_ns(m);
|
||||||
|
list_add_tail(&m->mnt_list, to_umount);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void remove_from_candidate_list(struct mount *m)
|
||||||
|
{
|
||||||
|
m->mnt_t_flags &= ~(T_MARKED | T_UMOUNT_CANDIDATE);
|
||||||
|
list_del_init(&m->mnt_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gather_candidates(struct list_head *set,
|
||||||
|
struct list_head *candidates)
|
||||||
|
{
|
||||||
|
struct mount *m, *p, *q;
|
||||||
|
|
||||||
|
list_for_each_entry(m, set, mnt_list) {
|
||||||
|
if (is_candidate(m))
|
||||||
|
continue;
|
||||||
|
m->mnt_t_flags |= T_UMOUNT_CANDIDATE;
|
||||||
|
p = m->mnt_parent;
|
||||||
|
q = propagation_next(p, p);
|
||||||
|
while (q) {
|
||||||
|
struct mount *child = __lookup_mnt(&q->mnt,
|
||||||
|
m->mnt_mountpoint);
|
||||||
|
if (child) {
|
||||||
|
/*
|
||||||
|
* We might've already run into this one. That
|
||||||
|
* must've happened on earlier iteration of the
|
||||||
|
* outer loop; in that case we can skip those
|
||||||
|
* parents that get propagation from q - there
|
||||||
|
* will be nothing new on those as well.
|
||||||
|
*/
|
||||||
|
if (is_candidate(child)) {
|
||||||
|
q = skip_propagation_subtree(q, p);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
child->mnt_t_flags |= T_UMOUNT_CANDIDATE;
|
||||||
|
if (!will_be_unmounted(child))
|
||||||
|
list_add(&child->mnt_list, candidates);
|
||||||
|
}
|
||||||
|
q = propagation_next(q, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list_for_each_entry(m, set, mnt_list)
|
||||||
|
m->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
|
* We know that some child of @m can't be unmounted. In all places where the
|
||||||
* parent propagates to.
|
* chain of descent of @m has child not overmounting the root of parent,
|
||||||
|
* the parent can't be unmounted either.
|
||||||
*/
|
*/
|
||||||
static bool __propagate_umount(struct mount *mnt,
|
static void trim_ancestors(struct mount *m)
|
||||||
struct list_head *to_umount,
|
|
||||||
struct list_head *to_restore)
|
|
||||||
{
|
{
|
||||||
bool progress = false;
|
struct mount *p;
|
||||||
struct mount *child;
|
|
||||||
|
|
||||||
/*
|
for (p = m->mnt_parent; is_candidate(p); m = p, p = p->mnt_parent) {
|
||||||
* The state of the parent won't change if this mount is
|
if (IS_MNT_MARKED(m)) // all candidates beneath are overmounts
|
||||||
* already unmounted or marked as without children.
|
return;
|
||||||
*/
|
SET_MNT_MARK(m);
|
||||||
if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
|
if (m != p->overmount)
|
||||||
goto out;
|
p->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
|
||||||
|
|
||||||
/* Verify topper is the only grandchild that has not been
|
|
||||||
* speculatively unmounted.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
|
|
||||||
if (child->mnt_mountpoint == mnt->mnt.mnt_root)
|
|
||||||
continue;
|
|
||||||
if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
|
|
||||||
continue;
|
|
||||||
/* Found a mounted child */
|
|
||||||
goto children;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Mark mounts that can be unmounted if not locked */
|
|
||||||
SET_MNT_MARK(mnt);
|
|
||||||
progress = true;
|
|
||||||
|
|
||||||
/* If a mount is without children and not locked umount it. */
|
|
||||||
if (!IS_MNT_LOCKED(mnt)) {
|
|
||||||
umount_one(mnt, to_umount);
|
|
||||||
} else {
|
|
||||||
children:
|
|
||||||
list_move_tail(&mnt->mnt_umounting, to_restore);
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
return progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void umount_list(struct list_head *to_umount,
|
|
||||||
struct list_head *to_restore)
|
|
||||||
{
|
|
||||||
struct mount *mnt, *child, *tmp;
|
|
||||||
list_for_each_entry(mnt, to_umount, mnt_list) {
|
|
||||||
list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
|
|
||||||
/* topper? */
|
|
||||||
if (child->mnt_mountpoint == mnt->mnt.mnt_root)
|
|
||||||
list_move_tail(&child->mnt_umounting, to_restore);
|
|
||||||
else
|
|
||||||
umount_one(child, to_umount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void restore_mounts(struct list_head *to_restore)
|
|
||||||
{
|
|
||||||
/* Restore mounts to a clean working state */
|
|
||||||
while (!list_empty(to_restore)) {
|
|
||||||
struct mount *mnt, *parent;
|
|
||||||
struct mountpoint *mp;
|
|
||||||
|
|
||||||
mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
|
|
||||||
CLEAR_MNT_MARK(mnt);
|
|
||||||
list_del_init(&mnt->mnt_umounting);
|
|
||||||
|
|
||||||
/* Should this mount be reparented? */
|
|
||||||
mp = mnt->mnt_mp;
|
|
||||||
parent = mnt->mnt_parent;
|
|
||||||
while (parent->mnt.mnt_flags & MNT_UMOUNT) {
|
|
||||||
mp = parent->mnt_mp;
|
|
||||||
parent = parent->mnt_parent;
|
|
||||||
}
|
|
||||||
if (parent != mnt->mnt_parent) {
|
|
||||||
mnt_change_mountpoint(parent, mp, mnt);
|
|
||||||
mnt_notify_add(mnt);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void cleanup_umount_visitations(struct list_head *visited)
|
|
||||||
{
|
|
||||||
while (!list_empty(visited)) {
|
|
||||||
struct mount *mnt =
|
|
||||||
list_first_entry(visited, struct mount, mnt_umounting);
|
|
||||||
list_del_init(&mnt->mnt_umounting);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* collect all mounts that receive propagation from the mount in @list,
|
* Find and exclude all umount candidates forbidden by @m
|
||||||
* and return these additional mounts in the same list.
|
* (see Documentation/filesystems/propagate_umount.txt)
|
||||||
* @list: the list of mounts to be unmounted.
|
* If we can immediately tell that @m is OK to unmount (unlocked
|
||||||
|
* and all children are already committed to unmounting) commit
|
||||||
|
* to unmounting it.
|
||||||
|
* Only @m itself might be taken from the candidates list;
|
||||||
|
* anything found by trim_ancestors() is marked non-candidate
|
||||||
|
* and left on the list.
|
||||||
|
*/
|
||||||
|
static void trim_one(struct mount *m, struct list_head *to_umount)
|
||||||
|
{
|
||||||
|
bool remove_this = false, found = false, umount_this = false;
|
||||||
|
struct mount *n;
|
||||||
|
|
||||||
|
if (!is_candidate(m)) { // trim_ancestors() left it on list
|
||||||
|
remove_from_candidate_list(m);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry(n, &m->mnt_mounts, mnt_child) {
|
||||||
|
if (!is_candidate(n)) {
|
||||||
|
found = true;
|
||||||
|
if (n != m->overmount) {
|
||||||
|
remove_this = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) {
|
||||||
|
trim_ancestors(m);
|
||||||
|
} else if (!IS_MNT_LOCKED(m) && list_empty(&m->mnt_mounts)) {
|
||||||
|
remove_this = true;
|
||||||
|
umount_this = true;
|
||||||
|
}
|
||||||
|
if (remove_this) {
|
||||||
|
remove_from_candidate_list(m);
|
||||||
|
if (umount_this)
|
||||||
|
umount_one(m, to_umount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void handle_locked(struct mount *m, struct list_head *to_umount)
|
||||||
|
{
|
||||||
|
struct mount *cutoff = m, *p;
|
||||||
|
|
||||||
|
if (!is_candidate(m)) { // trim_ancestors() left it on list
|
||||||
|
remove_from_candidate_list(m);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (p = m; is_candidate(p); p = p->mnt_parent) {
|
||||||
|
remove_from_candidate_list(p);
|
||||||
|
if (!IS_MNT_LOCKED(p))
|
||||||
|
cutoff = p->mnt_parent;
|
||||||
|
}
|
||||||
|
if (will_be_unmounted(p))
|
||||||
|
cutoff = p;
|
||||||
|
while (m != cutoff) {
|
||||||
|
umount_one(m, to_umount);
|
||||||
|
m = m->mnt_parent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @m is not to going away, and it overmounts the top of a stack of mounts
|
||||||
|
* that are going away. We know that all of those are fully overmounted
|
||||||
|
* by the one above (@m being the topmost of the chain), so @m can be slid
|
||||||
|
* in place where the bottom of the stack is attached.
|
||||||
*
|
*
|
||||||
* vfsmount lock must be held for write
|
* NOTE: here we temporarily violate a constraint - two mounts end up with
|
||||||
|
* the same parent and mountpoint; that will be remedied as soon as we
|
||||||
|
* return from propagate_umount() - its caller (umount_tree()) will detach
|
||||||
|
* the stack from the parent it (and now @m) is attached to. umount_tree()
|
||||||
|
* might choose to keep unmounted pieces stuck to each other, but it always
|
||||||
|
* detaches them from the mounts that remain in the tree.
|
||||||
*/
|
*/
|
||||||
int propagate_umount(struct list_head *list)
|
static void reparent(struct mount *m)
|
||||||
{
|
{
|
||||||
struct mount *mnt;
|
struct mount *p = m;
|
||||||
LIST_HEAD(to_restore);
|
struct mountpoint *mp;
|
||||||
LIST_HEAD(to_umount);
|
|
||||||
LIST_HEAD(visited);
|
|
||||||
|
|
||||||
/* Find candidates for unmounting */
|
do {
|
||||||
list_for_each_entry_reverse(mnt, list, mnt_list) {
|
mp = p->mnt_mp;
|
||||||
struct mount *parent = mnt->mnt_parent;
|
p = p->mnt_parent;
|
||||||
struct mount *m;
|
} while (will_be_unmounted(p));
|
||||||
|
|
||||||
/*
|
mnt_change_mountpoint(p, mp, m);
|
||||||
* If this mount has already been visited it is known that it's
|
mnt_notify_add(m);
|
||||||
* entire peer group and all of their slaves in the propagation
|
}
|
||||||
* tree for the mountpoint has already been visited and there is
|
|
||||||
* no need to visit them again.
|
|
||||||
*/
|
|
||||||
if (!list_empty(&mnt->mnt_umounting))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
list_add_tail(&mnt->mnt_umounting, &visited);
|
/**
|
||||||
for (m = propagation_next(parent, parent); m;
|
* propagate_umount - apply propagation rules to the set of mounts for umount()
|
||||||
m = propagation_next(m, parent)) {
|
* @set: the list of mounts to be unmounted.
|
||||||
struct mount *child = __lookup_mnt(&m->mnt,
|
*
|
||||||
mnt->mnt_mountpoint);
|
* Collect all mounts that receive propagation from the mount in @set and have
|
||||||
if (!child)
|
* no obstacles to being unmounted. Add these additional mounts to the set.
|
||||||
continue;
|
*
|
||||||
|
* See Documentation/filesystems/propagate_umount.txt if you do anything in
|
||||||
|
* this area.
|
||||||
|
*
|
||||||
|
* Locks held:
|
||||||
|
* mount_lock (write_seqlock), namespace_sem (exclusive).
|
||||||
|
*/
|
||||||
|
void propagate_umount(struct list_head *set)
|
||||||
|
{
|
||||||
|
struct mount *m, *p;
|
||||||
|
LIST_HEAD(to_umount); // committed to unmounting
|
||||||
|
LIST_HEAD(candidates); // undecided umount candidates
|
||||||
|
|
||||||
if (!list_empty(&child->mnt_umounting)) {
|
// collect all candidates
|
||||||
/*
|
gather_candidates(set, &candidates);
|
||||||
* If the child has already been visited it is
|
|
||||||
* know that it's entire peer group and all of
|
|
||||||
* their slaves in the propgation tree for the
|
|
||||||
* mountpoint has already been visited and there
|
|
||||||
* is no need to visit this subtree again.
|
|
||||||
*/
|
|
||||||
m = skip_propagation_subtree(m, parent);
|
|
||||||
continue;
|
|
||||||
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
|
|
||||||
/*
|
|
||||||
* We have come across a partially unmounted
|
|
||||||
* mount in a list that has not been visited
|
|
||||||
* yet. Remember it has been visited and
|
|
||||||
* continue about our merry way.
|
|
||||||
*/
|
|
||||||
list_add_tail(&child->mnt_umounting, &visited);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check the child and parents while progress is made */
|
// reduce the set until it's non-shifting
|
||||||
while (__propagate_umount(child,
|
list_for_each_entry_safe(m, p, &candidates, mnt_list)
|
||||||
&to_umount, &to_restore)) {
|
trim_one(m, &to_umount);
|
||||||
/* Is the parent a umount candidate? */
|
|
||||||
child = child->mnt_parent;
|
// ... and non-revealing
|
||||||
if (list_empty(&child->mnt_umounting))
|
while (!list_empty(&candidates)) {
|
||||||
break;
|
m = list_first_entry(&candidates,struct mount, mnt_list);
|
||||||
}
|
handle_locked(m, &to_umount);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
umount_list(&to_umount, &to_restore);
|
// now to_umount consists of all acceptable candidates
|
||||||
restore_mounts(&to_restore);
|
// deal with reparenting of remaining overmounts on those
|
||||||
cleanup_umount_visitations(&visited);
|
list_for_each_entry(m, &to_umount, mnt_list) {
|
||||||
list_splice_tail(&to_umount, list);
|
if (m->overmount)
|
||||||
|
reparent(m->overmount);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
// and fold them into the set
|
||||||
|
list_splice_tail_init(&to_umount, set);
|
||||||
}
|
}
|
||||||
|
27
fs/pnode.h
27
fs/pnode.h
@ -10,14 +10,14 @@
|
|||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include "mount.h"
|
#include "mount.h"
|
||||||
|
|
||||||
#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
|
#define IS_MNT_SHARED(m) ((m)->mnt_t_flags & T_SHARED)
|
||||||
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
|
#define IS_MNT_SLAVE(m) ((m)->mnt_master)
|
||||||
#define IS_MNT_NEW(m) (!(m)->mnt_ns)
|
#define IS_MNT_NEW(m) (!(m)->mnt_ns)
|
||||||
#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
|
#define CLEAR_MNT_SHARED(m) ((m)->mnt_t_flags &= ~T_SHARED)
|
||||||
#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
|
#define IS_MNT_UNBINDABLE(m) ((m)->mnt_t_flags & T_UNBINDABLE)
|
||||||
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
|
#define IS_MNT_MARKED(m) ((m)->mnt_t_flags & T_MARKED)
|
||||||
#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
|
#define SET_MNT_MARK(m) ((m)->mnt_t_flags |= T_MARKED)
|
||||||
#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
|
#define CLEAR_MNT_MARK(m) ((m)->mnt_t_flags &= ~T_MARKED)
|
||||||
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
|
#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
|
||||||
|
|
||||||
#define CL_EXPIRE 0x01
|
#define CL_EXPIRE 0x01
|
||||||
@ -25,19 +25,26 @@
|
|||||||
#define CL_COPY_UNBINDABLE 0x04
|
#define CL_COPY_UNBINDABLE 0x04
|
||||||
#define CL_MAKE_SHARED 0x08
|
#define CL_MAKE_SHARED 0x08
|
||||||
#define CL_PRIVATE 0x10
|
#define CL_PRIVATE 0x10
|
||||||
#define CL_SHARED_TO_SLAVE 0x20
|
|
||||||
#define CL_COPY_MNT_NS_FILE 0x40
|
#define CL_COPY_MNT_NS_FILE 0x40
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EXCL[namespace_sem]
|
||||||
|
*/
|
||||||
static inline void set_mnt_shared(struct mount *mnt)
|
static inline void set_mnt_shared(struct mount *mnt)
|
||||||
{
|
{
|
||||||
mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
|
mnt->mnt_t_flags &= ~T_SHARED_MASK;
|
||||||
mnt->mnt.mnt_flags |= MNT_SHARED;
|
mnt->mnt_t_flags |= T_SHARED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool peers(const struct mount *m1, const struct mount *m2)
|
||||||
|
{
|
||||||
|
return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void change_mnt_propagation(struct mount *, int);
|
void change_mnt_propagation(struct mount *, int);
|
||||||
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
|
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
|
||||||
struct hlist_head *);
|
struct hlist_head *);
|
||||||
int propagate_umount(struct list_head *);
|
void propagate_umount(struct list_head *);
|
||||||
int propagate_mount_busy(struct mount *, int);
|
int propagate_mount_busy(struct mount *, int);
|
||||||
void propagate_mount_unlock(struct mount *);
|
void propagate_mount_unlock(struct mount *);
|
||||||
void mnt_release_group_id(struct mount *);
|
void mnt_release_group_id(struct mount *);
|
||||||
|
@ -35,9 +35,6 @@ enum mount_flags {
|
|||||||
MNT_SHRINKABLE = 0x100,
|
MNT_SHRINKABLE = 0x100,
|
||||||
MNT_WRITE_HOLD = 0x200,
|
MNT_WRITE_HOLD = 0x200,
|
||||||
|
|
||||||
MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */
|
|
||||||
MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */
|
|
||||||
|
|
||||||
MNT_INTERNAL = 0x4000,
|
MNT_INTERNAL = 0x4000,
|
||||||
|
|
||||||
MNT_LOCK_ATIME = 0x040000,
|
MNT_LOCK_ATIME = 0x040000,
|
||||||
@ -48,25 +45,15 @@ enum mount_flags {
|
|||||||
MNT_LOCKED = 0x800000,
|
MNT_LOCKED = 0x800000,
|
||||||
MNT_DOOMED = 0x1000000,
|
MNT_DOOMED = 0x1000000,
|
||||||
MNT_SYNC_UMOUNT = 0x2000000,
|
MNT_SYNC_UMOUNT = 0x2000000,
|
||||||
MNT_MARKED = 0x4000000,
|
|
||||||
MNT_UMOUNT = 0x8000000,
|
MNT_UMOUNT = 0x8000000,
|
||||||
|
|
||||||
/*
|
|
||||||
* MNT_SHARED_MASK is the set of flags that should be cleared when a
|
|
||||||
* mount becomes shared. Currently, this is only the flag that says a
|
|
||||||
* mount cannot be bind mounted, since this is how we create a mount
|
|
||||||
* that shares events with another mount. If you add a new MNT_*
|
|
||||||
* flag, consider how it interacts with shared mounts.
|
|
||||||
*/
|
|
||||||
MNT_SHARED_MASK = MNT_UNBINDABLE,
|
|
||||||
MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
|
MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC
|
||||||
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
|
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME
|
||||||
| MNT_READONLY | MNT_NOSYMFOLLOW,
|
| MNT_READONLY | MNT_NOSYMFOLLOW,
|
||||||
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
|
MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,
|
||||||
|
|
||||||
MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
|
MNT_INTERNAL_FLAGS = MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED |
|
||||||
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
|
MNT_SYNC_UMOUNT | MNT_LOCKED
|
||||||
MNT_LOCKED,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vfsmount {
|
struct vfsmount {
|
||||||
@ -98,6 +85,7 @@ int mnt_get_write_access(struct vfsmount *mnt);
|
|||||||
void mnt_put_write_access(struct vfsmount *mnt);
|
void mnt_put_write_access(struct vfsmount *mnt);
|
||||||
|
|
||||||
extern struct vfsmount *fc_mount(struct fs_context *fc);
|
extern struct vfsmount *fc_mount(struct fs_context *fc);
|
||||||
|
extern struct vfsmount *fc_mount_longterm(struct fs_context *fc);
|
||||||
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
|
extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
|
||||||
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
|
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
|
||||||
int flags, const char *name,
|
int flags, const char *name,
|
||||||
|
@ -483,7 +483,7 @@ static struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
|
|||||||
put_user_ns(fc->user_ns);
|
put_user_ns(fc->user_ns);
|
||||||
fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
|
fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
|
||||||
|
|
||||||
mnt = fc_mount(fc);
|
mnt = fc_mount_longterm(fc);
|
||||||
put_fs_context(fc);
|
put_fs_context(fc);
|
||||||
return mnt;
|
return mnt;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user