mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
Merge tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file handling updates from Christian Brauner:
"This contains performance improvements for struct file's new refcount
mechanism and various other performance work:
- The stock kernel transitioning the file to no refs held penalizes
the caller with an extra atomic to block any increments. For cases
where the file is highly likely to be going away this is easily
avoidable.
Add file_ref_put_close() to better handle the common case where
closing a file descriptor also operates on the last reference and
build fput_close_sync() and fput_close() on top of it. This brings
about 1% performance improvement by eliding one atomic in the
common case.
- Predict no error in close() since the vast majority of the time
system call returns 0.
- Reduce the work done in fdget_pos() by predicting that the file was
found and by explicitly comparing the reference count to one and
ignoring the dead zone"
* tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: reduce work in fdget_pos()
fs: use fput_close() in path_openat()
fs: use fput_close() in filp_close()
fs: use fput_close_sync() in close()
file: add fput and file_ref_put routines optimized for use when closing a fd
fs: predict no error in close()
This commit is contained in:
52
fs/file.c
52
fs/file.c
@@ -26,6 +26,28 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
|
||||
{
|
||||
/*
|
||||
* If the reference count was already in the dead zone, then this
|
||||
* put() operation is imbalanced. Warn, put the reference count back to
|
||||
* DEAD and tell the caller to not deconstruct the object.
|
||||
*/
|
||||
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a put() operation on a saturated refcount. Restore the
|
||||
* mean saturation value and tell the caller to not deconstruct the
|
||||
* object.
|
||||
*/
|
||||
if (cnt > FILE_REF_MAXREF)
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* __file_ref_put - Slowpath of file_ref_put()
|
||||
* @ref: Pointer to the reference count
|
||||
@@ -67,24 +89,7 @@ bool __file_ref_put(file_ref_t *ref, unsigned long cnt)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the reference count was already in the dead zone, then this
|
||||
* put() operation is imbalanced. Warn, put the reference count back to
|
||||
* DEAD and tell the caller to not deconstruct the object.
|
||||
*/
|
||||
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a put() operation on a saturated refcount. Restore the
|
||||
* mean saturation value and tell the caller to not deconstruct the
|
||||
* object.
|
||||
*/
|
||||
if (cnt > FILE_REF_MAXREF)
|
||||
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
|
||||
return false;
|
||||
return __file_ref_put_badval(ref, cnt);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__file_ref_put);
|
||||
|
||||
@@ -1179,8 +1184,13 @@ struct fd fdget_raw(unsigned int fd)
|
||||
*/
|
||||
static inline bool file_needs_f_pos_lock(struct file *file)
|
||||
{
|
||||
return (file->f_mode & FMODE_ATOMIC_POS) &&
|
||||
(file_count(file) > 1 || file->f_op->iterate_shared);
|
||||
if (!(file->f_mode & FMODE_ATOMIC_POS))
|
||||
return false;
|
||||
if (__file_ref_read_raw(&file->f_ref) != FILE_REF_ONEREF)
|
||||
return true;
|
||||
if (file->f_op->iterate_shared)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool file_seek_cur_needs_f_lock(struct file *file)
|
||||
@@ -1198,7 +1208,7 @@ struct fd fdget_pos(unsigned int fd)
|
||||
struct fd f = fdget(fd);
|
||||
struct file *file = fd_file(f);
|
||||
|
||||
if (file && file_needs_f_pos_lock(file)) {
|
||||
if (likely(file) && file_needs_f_pos_lock(file)) {
|
||||
f.word |= FDPUT_POS_UNLOCK;
|
||||
mutex_lock(&file->f_pos_lock);
|
||||
}
|
||||
|
||||
@@ -512,30 +512,36 @@ void flush_delayed_fput(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(flush_delayed_fput);
|
||||
|
||||
static void __fput_deferred(struct file *file)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
* task_work_add() will fail. Fall through to delayed
|
||||
* fput to avoid leaking *file.
|
||||
*/
|
||||
}
|
||||
|
||||
if (llist_add(&file->f_llist, &delayed_fput_list))
|
||||
schedule_delayed_work(&delayed_fput_work, 1);
|
||||
}
|
||||
|
||||
void fput(struct file *file)
|
||||
{
|
||||
if (file_ref_put(&file->f_ref)) {
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
|
||||
file_free(file);
|
||||
return;
|
||||
}
|
||||
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
|
||||
init_task_work(&file->f_task_work, ____fput);
|
||||
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
|
||||
return;
|
||||
/*
|
||||
* After this task has run exit_task_work(),
|
||||
* task_work_add() will fail. Fall through to delayed
|
||||
* fput to avoid leaking *file.
|
||||
*/
|
||||
}
|
||||
|
||||
if (llist_add(&file->f_llist, &delayed_fput_list))
|
||||
schedule_delayed_work(&delayed_fput_work, 1);
|
||||
}
|
||||
if (unlikely(file_ref_put(&file->f_ref)))
|
||||
__fput_deferred(file);
|
||||
}
|
||||
EXPORT_SYMBOL(fput);
|
||||
|
||||
/*
|
||||
* synchronous analog of fput(); for kernel threads that might be needed
|
||||
@@ -550,10 +556,32 @@ void __fput_sync(struct file *file)
|
||||
if (file_ref_put(&file->f_ref))
|
||||
__fput(file);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(fput);
|
||||
EXPORT_SYMBOL(__fput_sync);
|
||||
|
||||
/*
|
||||
* Equivalent to __fput_sync(), but optimized for being called with the last
|
||||
* reference.
|
||||
*
|
||||
* See file_ref_put_close() for details.
|
||||
*/
|
||||
void fput_close_sync(struct file *file)
|
||||
{
|
||||
if (likely(file_ref_put_close(&file->f_ref)))
|
||||
__fput(file);
|
||||
}
|
||||
|
||||
/*
|
||||
* Equivalent to fput(), but optimized for being called with the last
|
||||
* reference.
|
||||
*
|
||||
* See file_ref_put_close() for details.
|
||||
*/
|
||||
void fput_close(struct file *file)
|
||||
{
|
||||
if (file_ref_put_close(&file->f_ref))
|
||||
__fput_deferred(file);
|
||||
}
|
||||
|
||||
void __init files_init(void)
|
||||
{
|
||||
struct kmem_cache_args args = {
|
||||
|
||||
@@ -118,6 +118,9 @@ static inline void put_file_access(struct file *file)
|
||||
}
|
||||
}
|
||||
|
||||
void fput_close_sync(struct file *);
|
||||
void fput_close(struct file *);
|
||||
|
||||
/*
|
||||
* super.c
|
||||
*/
|
||||
|
||||
@@ -4010,7 +4010,7 @@ static struct file *path_openat(struct nameidata *nd,
|
||||
WARN_ON(1);
|
||||
error = -EINVAL;
|
||||
}
|
||||
fput(file);
|
||||
fput_close(file);
|
||||
if (error == -EOPENSTALE) {
|
||||
if (flags & LOOKUP_RCU)
|
||||
error = -ECHILD;
|
||||
|
||||
15
fs/open.c
15
fs/open.c
@@ -1552,7 +1552,7 @@ int filp_close(struct file *filp, fl_owner_t id)
|
||||
int retval;
|
||||
|
||||
retval = filp_flush(filp, id);
|
||||
fput(filp);
|
||||
fput_close(filp);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -1578,13 +1578,16 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
* We're returning to user space. Don't bother
|
||||
* with any delayed fput() cases.
|
||||
*/
|
||||
__fput_sync(file);
|
||||
fput_close_sync(file);
|
||||
|
||||
if (likely(retval == 0))
|
||||
return 0;
|
||||
|
||||
/* can't restart close syscall because file table entry was cleared */
|
||||
if (unlikely(retval == -ERESTARTSYS ||
|
||||
retval == -ERESTARTNOINTR ||
|
||||
retval == -ERESTARTNOHAND ||
|
||||
retval == -ERESTART_RESTARTBLOCK))
|
||||
if (retval == -ERESTARTSYS ||
|
||||
retval == -ERESTARTNOINTR ||
|
||||
retval == -ERESTARTNOHAND ||
|
||||
retval == -ERESTART_RESTARTBLOCK)
|
||||
retval = -EINTR;
|
||||
|
||||
return retval;
|
||||
|
||||
@@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
|
||||
atomic_long_set(&ref->refcnt, cnt - 1);
|
||||
}
|
||||
|
||||
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt);
|
||||
bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
|
||||
|
||||
/**
|
||||
@@ -160,6 +161,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
|
||||
return __file_ref_put(ref, cnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF
|
||||
* @ref: Pointer to the reference count
|
||||
*
|
||||
* Semantically it is equivalent to calling file_ref_put(), but it trades lower
|
||||
* performance in face of other CPUs also modifying the refcount for higher
|
||||
* performance when this happens to be the last reference.
|
||||
*
|
||||
* For the last reference file_ref_put() issues 2 atomics. One to drop the
|
||||
* reference and another to transition it to FILE_REF_DEAD. This routine does
|
||||
* the work in one step, but in order to do it has to pre-read the variable which
|
||||
* decreases scalability.
|
||||
*
|
||||
* Use with close() et al, stick to file_ref_put() by default.
|
||||
*/
|
||||
static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref)
|
||||
{
|
||||
long old, new;
|
||||
|
||||
old = atomic_long_read(&ref->refcnt);
|
||||
do {
|
||||
if (unlikely(old < 0))
|
||||
return __file_ref_put_badval(ref, old);
|
||||
|
||||
if (old == FILE_REF_ONEREF)
|
||||
new = FILE_REF_DEAD;
|
||||
else
|
||||
new = old - 1;
|
||||
} while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new));
|
||||
|
||||
return new == FILE_REF_DEAD;
|
||||
}
|
||||
|
||||
/**
|
||||
* file_ref_read - Read the number of file references
|
||||
* @ref: Pointer to the reference count
|
||||
@@ -174,4 +208,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref)
|
||||
return c >= FILE_REF_RELEASED ? 0 : c + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* __file_ref_read_raw - Return the value stored in ref->refcnt
|
||||
* @ref: Pointer to the reference count
|
||||
*
|
||||
* Return: The raw value found in the counter
|
||||
*
|
||||
* A hack for file_needs_f_pos_lock(), you probably want to use
|
||||
* file_ref_read() instead.
|
||||
*/
|
||||
static inline unsigned long __file_ref_read_raw(file_ref_t *ref)
|
||||
{
|
||||
return atomic_long_read(&ref->refcnt);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user