[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
committed by
Linus Torvalds
parent
6e72ad2c58
commit
ab2af1f500
@@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files)
|
||||
close_files(files);
|
||||
/*
|
||||
* Free the fd and fdset arrays if we expanded them.
|
||||
* If the fdtable was embedded, pass files for freeing
|
||||
* at the end of the RCU grace period. Otherwise,
|
||||
* you can free files immediately.
|
||||
*/
|
||||
fdt = files_fdtable(files);
|
||||
if (fdt->fd != &files->fd_array[0])
|
||||
free_fd_array(fdt->fd, fdt->max_fds);
|
||||
if (fdt->max_fdset > __FD_SETSIZE) {
|
||||
free_fdset(fdt->open_fds, fdt->max_fdset);
|
||||
free_fdset(fdt->close_on_exec, fdt->max_fdset);
|
||||
}
|
||||
kmem_cache_free(files_cachep, files);
|
||||
if (fdt == &files->fdtab)
|
||||
fdt->free_files = files;
|
||||
else
|
||||
kmem_cache_free(files_cachep, files);
|
||||
free_fdtable(fdt);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/futex.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/audit.h>
|
||||
@@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int count_open_files(struct files_struct *files, int size)
|
||||
static int count_open_files(struct fdtable *fdt)
|
||||
{
|
||||
int size = fdt->max_fdset;
|
||||
int i;
|
||||
struct fdtable *fdt;
|
||||
|
||||
/* Find the last open fd */
|
||||
fdt = files_fdtable(files);
|
||||
for (i = size/(8*sizeof(long)); i > 0; ) {
|
||||
if (fdt->open_fds->fds_bits[--i])
|
||||
break;
|
||||
@@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void)
|
||||
atomic_set(&newf->count, 1);
|
||||
|
||||
spin_lock_init(&newf->file_lock);
|
||||
fdt = files_fdtable(newf);
|
||||
fdt = &newf->fdtab;
|
||||
fdt->next_fd = 0;
|
||||
fdt->max_fds = NR_OPEN_DEFAULT;
|
||||
fdt->max_fdset = __FD_SETSIZE;
|
||||
fdt->close_on_exec = &newf->close_on_exec_init;
|
||||
fdt->open_fds = &newf->open_fds_init;
|
||||
fdt->fd = &newf->fd_array[0];
|
||||
INIT_RCU_HEAD(&fdt->rcu);
|
||||
fdt->free_files = NULL;
|
||||
fdt->next = NULL;
|
||||
rcu_assign_pointer(newf->fdt, fdt);
|
||||
out:
|
||||
return newf;
|
||||
}
|
||||
@@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
|
||||
old_fdt = files_fdtable(oldf);
|
||||
new_fdt = files_fdtable(newf);
|
||||
size = old_fdt->max_fdset;
|
||||
open_files = count_open_files(oldf, old_fdt->max_fdset);
|
||||
open_files = count_open_files(old_fdt);
|
||||
expand = 0;
|
||||
|
||||
/*
|
||||
@@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
|
||||
spin_unlock(&newf->file_lock);
|
||||
if (error < 0)
|
||||
goto out_release;
|
||||
new_fdt = files_fdtable(newf);
|
||||
/*
|
||||
* Reacquire the oldf lock and a pointer to its fd table
|
||||
* who knows it may have a new bigger fd table. We need
|
||||
* the latest pointer.
|
||||
*/
|
||||
spin_lock(&oldf->file_lock);
|
||||
old_fdt = files_fdtable(oldf);
|
||||
}
|
||||
|
||||
old_fds = old_fdt->fd;
|
||||
@@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
|
||||
*/
|
||||
FD_CLR(open_files - i, new_fdt->open_fds);
|
||||
}
|
||||
*new_fds++ = f;
|
||||
rcu_assign_pointer(*new_fds++, f);
|
||||
}
|
||||
spin_unlock(&oldf->file_lock);
|
||||
|
||||
|
Reference in New Issue
Block a user