Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs update from Al Viro:

 - big one - consolidation of descriptor-related logics; almost all of
   that is moved to fs/file.c

   (BTW, I'm seriously tempted to rename the result to fd.c.  As it is,
   we have a situation when file_table.c is about handling of struct
   file and file.c is about handling of descriptor tables; the reasons
   are historical - file_table.c used to be about a static array of
   struct file we used to have way back).

   A lot of stray ends got cleaned up and converted to saner primitives,
   disgusting mess in android/binder.c is still disgusting, but at least
   doesn't poke so much in descriptor table guts anymore.  A bunch of
   relatively minor races got fixed in process, plus an ext4 struct file
   leak.

 - related thing - fget_light() partially unuglified; see fdget() in
   there (and yes, it generates the code as good as we used to have).

 - also related - bits of Cyrill's procfs stuff that got entangled into
   that work; _not_ all of it, just the initial move to fs/proc/fd.c and
   switch of fdinfo to seq_file.

 - Alex's fs/coredump.c spiltoff - the same story, had been easier to
   take that commit than mess with conflicts.  The rest is a separate
   pile, this was just a mechanical code movement.

 - a few misc patches all over the place.  Not all for this cycle,
   there'll be more (and quite a few currently sit in akpm's tree)."

Fix up trivial conflicts in the android binder driver, and some fairly
simple conflicts due to two different changes to the sock_alloc_file()
interface ("take descriptor handling from sock_alloc_file() to callers"
vs "net: Providing protocol type via system.sockprotoname xattr of
/proc/PID/fd entries" adding a dentry name to the socket)

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (72 commits)
  MAX_LFS_FILESIZE should be a loff_t
  compat: fs: Generic compat_sys_sendfile implementation
  fs: push rcu_barrier() from deactivate_locked_super() to filesystems
  btrfs: reada_extent doesn't need kref for refcount
  coredump: move core dump functionality into its own file
  coredump: prevent double-free on an error path in core dumper
  usb/gadget: fix misannotations
  fcntl: fix misannotations
  ceph: don't abuse d_delete() on failure exits
  hypfs: ->d_parent is never NULL or negative
  vfs: delete surplus inode NULL check
  switch simple cases of fget_light to fdget
  new helpers: fdget()/fdput()
  switch o2hb_region_dev_write() to fget_light()
  proc_map_files_readdir(): don't bother with grabbing files
  make get_file() return its argument
  vhost_set_vring(): turn pollstart/pollstop into bool
  switch prctl_set_mm_exe_file() to fget_light()
  switch xfs_find_handle() to fget_light()
  switch xfs_swapext() to fget_light()
  ...
This commit is contained in:
Linus Torvalds
2012-10-02 20:25:04 -07:00
142 changed files with 2914 additions and 2876 deletions

130
fs/open.c
View File

@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
{
struct inode * inode;
struct inode *inode;
struct dentry *dentry;
struct file * file;
struct fd f;
int error;
error = -EINVAL;
if (length < 0)
goto out;
error = -EBADF;
file = fget(fd);
if (!file)
f = fdget(fd);
if (!f.file)
goto out;
/* explicitly opened as large or we are on 64-bit box */
if (file->f_flags & O_LARGEFILE)
if (f.file->f_flags & O_LARGEFILE)
small = 0;
dentry = file->f_path.dentry;
dentry = f.file->f_path.dentry;
inode = dentry->d_inode;
error = -EINVAL;
if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
goto out_putf;
error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
goto out_putf;
sb_start_write(inode->i_sb);
error = locks_verify_truncate(inode, file, length);
error = locks_verify_truncate(inode, f.file, length);
if (!error)
error = security_path_truncate(&file->f_path);
error = security_path_truncate(&f.file->f_path);
if (!error)
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
sb_end_write(inode->i_sb);
out_putf:
fput(file);
fdput(f);
out:
return error;
}
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
{
struct file *file;
struct fd f = fdget(fd);
int error = -EBADF;
file = fget(fd);
if (file) {
error = do_fallocate(file, mode, offset, len);
fput(file);
if (f.file) {
error = do_fallocate(f.file, mode, offset, len);
fdput(f);
}
return error;
}
@@ -400,16 +398,15 @@ out:
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
{
struct file *file;
struct fd f = fdget_raw(fd);
struct inode *inode;
int error, fput_needed;
int error = -EBADF;
error = -EBADF;
file = fget_raw_light(fd, &fput_needed);
if (!file)
if (!f.file)
goto out;
inode = file->f_path.dentry->d_inode;
inode = f.file->f_path.dentry->d_inode;
error = -ENOTDIR;
if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
if (!error)
set_fs_pwd(current->fs, &file->f_path);
set_fs_pwd(current->fs, &f.file->f_path);
out_putf:
fput_light(file, fput_needed);
fdput(f);
out:
return error;
}
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
{
struct file * file;
struct fd f = fdget(fd);
int error = -EBADF;
struct dentry * dentry;
file = fget(fd);
if (!file)
if (!f.file)
goto out;
error = mnt_want_write_file(file);
error = mnt_want_write_file(f.file);
if (error)
goto out_fput;
dentry = file->f_path.dentry;
audit_inode(NULL, dentry);
error = chown_common(&file->f_path, user, group);
mnt_drop_write_file(file);
audit_inode(NULL, f.file->f_path.dentry);
error = chown_common(&f.file->f_path, user, group);
mnt_drop_write_file(f.file);
out_fput:
fput(file);
fdput(f);
out:
return error;
}
@@ -803,50 +797,6 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
__clear_open_fd(fd, fdt);
if (fd < files->next_fd)
files->next_fd = fd;
}
void put_unused_fd(unsigned int fd)
{
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
}
EXPORT_SYMBOL(put_unused_fd);
/*
* Install a file pointer in the fd array.
*
* The VFS is full of places where we drop the files lock between
* setting the open_fds bitmap and installing the file in the file
* array. At any such point, we are vulnerable to a dup2() race
* installing a file in the array before us. We need to detect this and
* fput() the struct file we are about to overwrite in this case.
*
* It should never happen - if we allow dup2() do it, _really_ bad things
* will follow.
*/
void fd_install(unsigned int fd, struct file *file)
{
struct files_struct *files = current->files;
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
}
EXPORT_SYMBOL(fd_install);
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
@@ -858,7 +808,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
op->mode = 0;
/* Must never be set by userspace */
flags &= ~FMODE_NONOTIFY;
flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
/*
* O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1038,23 +988,7 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
struct file * filp;
struct files_struct *files = current->files;
struct fdtable *fdt;
int retval;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
goto out_unlock;
filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
__clear_close_on_exec(fd, fdt);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
retval = filp_close(filp, files);
int retval = __close_fd(current->files, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
@@ -1064,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
retval = -EINTR;
return retval;
out_unlock:
spin_unlock(&files->file_lock);
return -EBADF;
}
EXPORT_SYMBOL(sys_close);