[PATCH] files: files struct with RCU

Patch to eliminate struct files_struct.file_lock spinlock on the reader side
and use rcu refcounting rcuref_xxx api for the f_count refcounter.  The
updates to the fdtable are done by allocating a new fdtable structure and
setting files->fdt to point to the new structure.  The fdtable structure is
protected by RCU thereby allowing lock-free lookup.  For fd arrays/sets that
are vmalloced, we use keventd to free them since RCU callbacks can't sleep.  A
global list of fdtable to be freed is not scalable, so we use a per-cpu list.
If keventd is already handling the current cpu's work, we use a timer to defer
queueing of that work.

Since the last publication, this patch has been re-written to avoid using
explicit memory barriers and use rcu_assign_pointer(), rcu_dereference()
premitives instead.  This required that the fd information is kept in a
separate structure (fdtable) and updated atomically.

Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Dipankar Sarma
2005-09-09 13:04:13 -07:00
committed by Linus Torvalds
parent 6e72ad2c58
commit ab2af1f500
10 changed files with 354 additions and 175 deletions

View File

@@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files)
close_files(files);
/*
* Free the fd and fdset arrays if we expanded them.
* If the fdtable was embedded, pass files for freeing
* at the end of the RCU grace period. Otherwise,
* you can free files immediately.
*/
fdt = files_fdtable(files);
if (fdt->fd != &files->fd_array[0])
free_fd_array(fdt->fd, fdt->max_fds);
if (fdt->max_fdset > __FD_SETSIZE) {
free_fdset(fdt->open_fds, fdt->max_fdset);
free_fdset(fdt->close_on_exec, fdt->max_fdset);
}
kmem_cache_free(files_cachep, files);
if (fdt == &files->fdtab)
fdt->free_files = files;
else
kmem_cache_free(files_cachep, files);
free_fdtable(fdt);
}
}

View File

@@ -35,6 +35,7 @@
#include <linux/syscalls.h>
#include <linux/jiffies.h>
#include <linux/futex.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/audit.h>
@@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
return 0;
}
static int count_open_files(struct files_struct *files, int size)
static int count_open_files(struct fdtable *fdt)
{
int size = fdt->max_fdset;
int i;
struct fdtable *fdt;
/* Find the last open fd */
fdt = files_fdtable(files);
for (i = size/(8*sizeof(long)); i > 0; ) {
if (fdt->open_fds->fds_bits[--i])
break;
@@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void)
atomic_set(&newf->count, 1);
spin_lock_init(&newf->file_lock);
fdt = files_fdtable(newf);
fdt = &newf->fdtab;
fdt->next_fd = 0;
fdt->max_fds = NR_OPEN_DEFAULT;
fdt->max_fdset = __FD_SETSIZE;
fdt->close_on_exec = &newf->close_on_exec_init;
fdt->open_fds = &newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
INIT_RCU_HEAD(&fdt->rcu);
fdt->free_files = NULL;
fdt->next = NULL;
rcu_assign_pointer(newf->fdt, fdt);
out:
return newf;
}
@@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
old_fdt = files_fdtable(oldf);
new_fdt = files_fdtable(newf);
size = old_fdt->max_fdset;
open_files = count_open_files(oldf, old_fdt->max_fdset);
open_files = count_open_files(old_fdt);
expand = 0;
/*
@@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
spin_unlock(&newf->file_lock);
if (error < 0)
goto out_release;
new_fdt = files_fdtable(newf);
/*
* Reacquire the oldf lock and a pointer to its fd table
* who knows it may have a new bigger fd table. We need
* the latest pointer.
*/
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
}
old_fds = old_fdt->fd;
@@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
*/
FD_CLR(open_files - i, new_fdt->open_fds);
}
*new_fds++ = f;
rcu_assign_pointer(*new_fds++, f);
}
spin_unlock(&oldf->file_lock);