Merge branch 'iocb' into for-next

This commit is contained in:
Al Viro
2015-04-11 22:24:41 -04:00
68 changed files with 201 additions and 271 deletions

150
fs/aio.c
View File

@@ -151,6 +151,38 @@ struct kioctx {
unsigned id;
};
/*
* We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
* cancelled or completed (this makes a certain amount of sense because
* successful cancellation - io_cancel() - does deliver the completion to
* userspace).
*
* And since most things don't implement kiocb cancellation and we'd really like
* kiocb completion to be lockless when possible, we use ki_cancel to
* synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
* with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
*/
#define KIOCB_CANCELLED ((void *) (~0ULL))
struct aio_kiocb {
struct kiocb common;
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
struct iocb __user *ki_user_iocb; /* user's aiocb */
__u64 ki_user_data; /* user's data for completion */
struct list_head ki_list; /* the aio core uses this
* for cancellation */
/*
* If the aio_resfd field of the userspace iocb is not zero,
* this is the underlying eventfd context to deliver events to.
*/
struct eventfd_ctx *ki_eventfd;
};
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx)
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
@@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
static int kiocb_cancel(struct kiocb *kiocb)
static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
@@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);
return cancel(kiocb);
return cancel(&kiocb->common);
}
static void free_ioctx(struct work_struct *work)
@@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
struct kiocb *req;
struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct kiocb, ki_list);
struct aio_kiocb, ki_list);
list_del_init(&req->ki_list);
kiocb_cancel(req);
@@ -778,22 +811,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
return 0;
}
/* wait_on_sync_kiocb:
* Waits on the given sync kiocb to complete.
*/
ssize_t wait_on_sync_kiocb(struct kiocb *req)
{
while (!req->ki_ctx) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (req->ki_ctx)
break;
io_schedule();
}
__set_current_state(TASK_RUNNING);
return req->ki_user_data;
}
EXPORT_SYMBOL(wait_on_sync_kiocb);
/*
* exit_aio: called when the last user of mm goes away. At this point, there is
* no way for any new requests to be submited or any of the io_* syscalls to be
@@ -948,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
static inline struct kiocb *aio_get_req(struct kioctx *ctx)
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;
struct aio_kiocb *req;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
@@ -971,10 +988,10 @@ out_put:
return NULL;
}
static void kiocb_free(struct kiocb *req)
static void kiocb_free(struct aio_kiocb *req)
{
if (req->ki_filp)
fput(req->ki_filp);
if (req->common.ki_filp)
fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1027,9 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
void aio_complete(struct kiocb *iocb, long res, long res2)
static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
@@ -1025,13 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
if (is_sync_kiocb(iocb)) {
iocb->ki_user_data = res;
smp_wmb();
iocb->ki_ctx = ERR_PTR(-EXDEV);
wake_up_process(iocb->ki_obj.tsk);
return;
}
BUG_ON(is_sync_kiocb(kiocb));
if (iocb->ki_list.next) {
unsigned long flags;
@@ -1057,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
event->obj = (u64)(unsigned long)iocb->ki_obj.user;
event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
@@ -1066,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);
/* after flagging the request as done, we
@@ -1113,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
percpu_ref_put(&ctx->reqs);
}
EXPORT_SYMBOL(aio_complete);
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
@@ -1344,12 +1355,13 @@ typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
size_t *len,
struct iovec **iovec,
bool compat)
{
ssize_t ret;
*nr_segs = kiocb->ki_nbytes;
*nr_segs = *len;
#ifdef CONFIG_COMPAT
if (compat)
@@ -1364,21 +1376,22 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
if (ret < 0)
return ret;
/* ki_nbytes now reflect bytes instead of segs */
kiocb->ki_nbytes = ret;
/* len now reflect bytes instead of segs */
*len = ret;
return 0;
}
static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
int rw, char __user *buf,
unsigned long *nr_segs,
size_t len,
struct iovec *iovec)
{
if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
if (unlikely(!access_ok(!rw, buf, len)))
return -EFAULT;
iovec->iov_base = buf;
iovec->iov_len = kiocb->ki_nbytes;
iovec->iov_len = len;
*nr_segs = 1;
return 0;
}
@@ -1388,7 +1401,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
* Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
char __user *buf, bool compat)
char __user *buf, size_t len, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
@@ -1423,21 +1436,21 @@ rw_common:
if (!rw_op && !iter_op)
return -EINVAL;
ret = (opcode == IOCB_CMD_PREADV ||
opcode == IOCB_CMD_PWRITEV)
? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
&iovec, compat)
: aio_setup_single_vector(req, rw, buf, &nr_segs,
iovec);
if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
&len, &iovec, compat);
else
ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
len, iovec);
if (!ret)
ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
ret = rw_verify_area(rw, file, &req->ki_pos, len);
if (ret < 0) {
if (iovec != inline_vecs)
kfree(iovec);
return ret;
}
req->ki_nbytes = ret;
len = ret;
/* XXX: move/kill - rw_verify_area()? */
/* This matches the pread()/pwrite() logic */
@@ -1450,7 +1463,7 @@ rw_common:
file_start_write(file);
if (iter_op) {
iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
iov_iter_init(&iter, rw, iovec, nr_segs, len);
ret = iter_op(req, &iter);
} else {
ret = rw_op(req, iovec, nr_segs, req->ki_pos);
@@ -1500,7 +1513,7 @@ rw_common:
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
struct kiocb *req;
struct aio_kiocb *req;
ssize_t ret;
/* enforce forwards compatibility on users */
@@ -1523,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
req->ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->ki_filp)) {
req->common.ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = 0;
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1542,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = NULL;
goto out_put_req;
}
req->common.ki_flags |= IOCB_EVENTFD;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1568,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
}
req->ki_obj.user = user_iocb;
req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
req->ki_pos = iocb->aio_offset;
req->ki_nbytes = iocb->aio_nbytes;
ret = aio_run_iocb(req, iocb->aio_lio_opcode,
ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
iocb->aio_nbytes,
compat);
if (ret)
goto out_put_req;
@@ -1643,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
u32 key)
static struct aio_kiocb *
lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
struct list_head *pos;
struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
@@ -1654,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
return NULL;
/* TODO: use a hash or array, this sucks. */
list_for_each(pos, &ctx->active_reqs) {
struct kiocb *kiocb = list_kiocb(pos);
if (kiocb->ki_obj.user == iocb)
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
@@ -1676,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
struct kioctx *ctx;
struct kiocb *kiocb;
struct aio_kiocb *kiocb;
u32 key;
int ret;