[PATCH] Add vector AIO support

This work is initially done by Zach Brown to add support for vectored aio.
These are the core changes for AIO to support
IOCB_CMD_PREADV/IOCB_CMD_PWRITEV.

[akpm@osdl.org: huge build fix]
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Badari Pulavarty
2006-09-30 23:28:49 -07:00
committed by Linus Torvalds
parent 543ade1fc9
commit eed4e51fb6
5 changed files with 203 additions and 108 deletions

169
fs/aio.c
View File

@@ -415,6 +415,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
req->ki_retry = NULL;
req->ki_dtor = NULL;
req->private = NULL;
req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);
/* Check if the completion queue has enough free space to
@@ -460,6 +461,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
if (req->ki_dtor)
req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec)
kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;
@@ -1301,42 +1304,60 @@ asmlinkage long sys_io_destroy(aio_context_t ctx)
return -EINVAL;
}
/*
* aio_p{read,write} are the default ki_retry methods for
* IO_CMD_P{READ,WRITE}. They maintains kiocb retry state around potentially
* multiple calls to f_op->aio_read(). They loop around partial progress
* instead of returning -EIOCBRETRY because they don't have the means to call
* kick_iocb().
*/
static ssize_t aio_pread(struct kiocb *iocb)
static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
{
struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];
BUG_ON(ret <= 0);
while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
ssize_t this = min((ssize_t)iov->iov_len, ret);
iov->iov_base += this;
iov->iov_len -= this;
iocb->ki_left -= this;
ret -= this;
if (iov->iov_len == 0) {
iocb->ki_cur_seg++;
iov++;
}
}
/* the caller should not have done more io than what fit in
* the remaining iovecs */
BUG_ON(ret > 0 && iocb->ki_left == 0);
}
static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
ssize_t ret = 0;
unsigned short opcode;
if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
(iocb->ki_opcode == IOCB_CMD_PREAD)) {
rw_op = file->f_op->aio_read;
opcode = IOCB_CMD_PREADV;
} else {
rw_op = file->f_op->aio_write;
opcode = IOCB_CMD_PWRITEV;
}
do {
iocb->ki_inline_vec.iov_base = iocb->ki_buf;
iocb->ki_inline_vec.iov_len = iocb->ki_left;
ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
iocb->ki_nr_segs - iocb->ki_cur_seg,
iocb->ki_pos);
if (ret > 0)
aio_advance_iovec(iocb, ret);
ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec,
1, iocb->ki_pos);
/*
* Can't just depend on iocb->ki_left to determine
* whether we are done. This may have been a short read.
*/
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
}
/*
* For pipes and sockets we return once we have some data; for
* regular files we retry till we complete the entire read or
* find that we can't read any more data (e.g short reads).
*/
/* retry all partial writes. retry partial reads as long as its a
* regular file. */
} while (ret > 0 && iocb->ki_left > 0 &&
!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode));
(opcode == IOCB_CMD_PWRITEV ||
(!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
/* This means we must have transferred all that we could */
/* No need to retry anymore */
@@ -1346,30 +1367,6 @@ static ssize_t aio_pread(struct kiocb *iocb)
return ret;
}
/* see aio_pread() */
static ssize_t aio_pwrite(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
ssize_t ret = 0;
do {
iocb->ki_inline_vec.iov_base = iocb->ki_buf;
iocb->ki_inline_vec.iov_len = iocb->ki_left;
ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec,
1, iocb->ki_pos);
if (ret > 0) {
iocb->ki_buf += ret;
iocb->ki_left -= ret;
}
} while (ret > 0 && iocb->ki_left > 0);
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;
return ret;
}
static ssize_t aio_fdsync(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
@@ -1390,6 +1387,38 @@ static ssize_t aio_fsync(struct kiocb *iocb)
return ret;
}
static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb)
{
ssize_t ret;
ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf,
kiocb->ki_nbytes, 1,
&kiocb->ki_inline_vec, &kiocb->ki_iovec);
if (ret < 0)
goto out;
kiocb->ki_nr_segs = kiocb->ki_nbytes;
kiocb->ki_cur_seg = 0;
/* ki_nbytes/left now reflect bytes instead of segs */
kiocb->ki_nbytes = ret;
kiocb->ki_left = ret;
ret = 0;
out:
return ret;
}
static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
{
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
kiocb->ki_iovec->iov_len = kiocb->ki_left;
kiocb->ki_nr_segs = 1;
kiocb->ki_cur_seg = 0;
kiocb->ki_nbytes = kiocb->ki_left;
return 0;
}
/*
* aio_setup_iocb:
* Performs the initial checks and aio retry method
@@ -1412,9 +1441,12 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
ret = aio_setup_single_vector(kiocb);
if (ret)
break;
ret = -EINVAL;
if (file->f_op->aio_read)
kiocb->ki_retry = aio_pread;
kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITE:
ret = -EBADF;
@@ -1427,9 +1459,40 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
ret = aio_setup_single_vector(kiocb);
if (ret)
break;
ret = -EINVAL;
if (file->f_op->aio_write)
kiocb->ki_retry = aio_pwrite;
kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PREADV:
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
break;
ret = security_file_permission(file, MAY_READ);
if (unlikely(ret))
break;
ret = aio_setup_vectored_rw(READ, kiocb);
if (ret)
break;
ret = -EINVAL;
if (file->f_op->aio_read)
kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_PWRITEV:
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
break;
ret = security_file_permission(file, MAY_WRITE);
if (unlikely(ret))
break;
ret = aio_setup_vectored_rw(WRITE, kiocb);
if (ret)
break;
ret = -EINVAL;
if (file->f_op->aio_write)
kiocb->ki_retry = aio_rw_vect_retry;
break;
case IOCB_CMD_FDSYNC:
ret = -EINVAL;