Merge branch 'pnfs-submit' of git://git.open-osd.org/linux-open-osd

* 'pnfs-submit' of git://git.open-osd.org/linux-open-osd: (32 commits)
  pnfs-obj: pg_test check for max_io_size
  NFSv4.1: define nfs_generic_pg_test
  NFSv4.1: use pnfs_generic_pg_test directly by layout driver
  NFSv4.1: change pg_test return type to bool
  NFSv4.1: unify pnfs_pageio_init functions
  pnfs-obj: objlayout_encode_layoutcommit implementation
  pnfs: encode_layoutcommit
  pnfs-obj: report errors and .encode_layoutreturn Implementation.
  pnfs: encode_layoutreturn
  pnfs: layoutret_on_setattr
  pnfs: layoutreturn
  pnfs-obj: osd raid engine read/write implementation
  pnfs: support for non-rpc layout drivers
  pnfs-obj: define per-inode private structure
  pnfs: alloc and free layout_hdr layoutdriver methods
  pnfs-obj: objio_osd device information retrieval and caching
  pnfs-obj: decode layout, alloc/free lseg
  pnfs-obj: pnfs_osd XDR client implementation
  pnfs-obj: pnfs_osd XDR definitions
  pnfs-obj: objlayoutdriver module skeleton
  ...
This commit is contained in:
Linus Torvalds
2011-05-29 14:10:13 -07:00
32 changed files with 3910 additions and 282 deletions

View File

@@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo)
atomic_inc(&lo->plh_refcount);
}
static struct pnfs_layout_hdr *
pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
}
static void
pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
}
static void
destroy_layout_hdr(struct pnfs_layout_hdr *lo)
{
dprintk("%s: freeing layout cache %p\n", __func__, lo);
BUG_ON(!list_empty(&lo->plh_layouts));
NFS_I(lo->plh_inode)->layout = NULL;
kfree(lo);
pnfs_free_layout_hdr(lo);
}
static void
@@ -228,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg)
{
struct inode *inode = lseg->pls_layout->plh_inode;
BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
if (list_empty(&lseg->pls_layout->plh_segs)) {
set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
@@ -261,11 +276,72 @@ put_lseg(struct pnfs_layout_segment *lseg)
}
EXPORT_SYMBOL_GPL(put_lseg);
static bool
should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
static inline u64
end_offset(u64 start, u64 len)
{
return (recall_iomode == IOMODE_ANY ||
lseg_iomode == recall_iomode);
u64 end;
end = start + len;
return end >= start ? end : NFS4_MAX_UINT64;
}
/* last octet in a range */
static inline u64
last_byte_offset(u64 start, u64 len)
{
u64 end;
BUG_ON(!len);
end = start + len;
return end > start ? end - 1 : NFS4_MAX_UINT64;
}
/*
* is l2 fully contained in l1?
* start1 end1
* [----------------------------------)
* start2 end2
* [----------------)
*/
static inline int
lo_seg_contained(struct pnfs_layout_range *l1,
struct pnfs_layout_range *l2)
{
u64 start1 = l1->offset;
u64 end1 = end_offset(start1, l1->length);
u64 start2 = l2->offset;
u64 end2 = end_offset(start2, l2->length);
return (start1 <= start2) && (end1 >= end2);
}
/*
* is l1 and l2 intersecting?
* start1 end1
* [----------------------------------)
* start2 end2
* [----------------)
*/
static inline int
lo_seg_intersecting(struct pnfs_layout_range *l1,
struct pnfs_layout_range *l2)
{
u64 start1 = l1->offset;
u64 end1 = end_offset(start1, l1->length);
u64 start2 = l2->offset;
u64 end2 = end_offset(start2, l2->length);
return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
(end2 == NFS4_MAX_UINT64 || end2 > start1);
}
static bool
should_free_lseg(struct pnfs_layout_range *lseg_range,
struct pnfs_layout_range *recall_range)
{
return (recall_range->iomode == IOMODE_ANY ||
lseg_range->iomode == recall_range->iomode) &&
lo_seg_intersecting(lseg_range, recall_range);
}
/* Returns 1 if lseg is removed from list, 0 otherwise */
@@ -296,7 +372,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
int
mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
u32 iomode)
struct pnfs_layout_range *recall_range)
{
struct pnfs_layout_segment *lseg, *next;
int invalid = 0, removed = 0;
@@ -309,7 +385,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return 0;
}
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
if (!recall_range ||
should_free_lseg(&lseg->pls_range, recall_range)) {
dprintk("%s: freeing lseg %p iomode %d "
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
@@ -358,7 +435,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
lo = nfsi->layout;
if (lo) {
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
}
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -467,7 +544,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
u32 iomode,
struct pnfs_layout_range *range,
gfp_t gfp_flags)
{
struct inode *ino = lo->plh_inode;
@@ -499,11 +576,11 @@ send_layoutget(struct pnfs_layout_hdr *lo,
goto out_err_free;
}
lgp->args.minlength = NFS4_MAX_UINT64;
lgp->args.minlength = PAGE_CACHE_SIZE;
if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length;
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
lgp->args.range.iomode = iomode;
lgp->args.range.offset = 0;
lgp->args.range.length = NFS4_MAX_UINT64;
lgp->args.range = *range;
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
@@ -518,7 +595,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
nfs4_proc_layoutget(lgp);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
set_bit(lo_fail_bit(iomode), &lo->plh_flags);
set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
}
/* free xdr pages */
@@ -542,6 +619,51 @@ out_err_free:
return NULL;
}
/* Initiates a LAYOUTRETURN(FILE) */
int
_pnfs_return_layout(struct inode *ino)
{
struct pnfs_layout_hdr *lo = NULL;
struct nfs_inode *nfsi = NFS_I(ino);
LIST_HEAD(tmp_list);
struct nfs4_layoutreturn *lrp;
nfs4_stateid stateid;
int status = 0;
dprintk("--> %s\n", __func__);
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
spin_unlock(&ino->i_lock);
dprintk("%s: no layout segments to return\n", __func__);
goto out;
}
stateid = nfsi->layout->plh_stateid;
/* Reference matched in nfs4_layoutreturn_release */
get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
goto out;
}
lrp->args.stateid = stateid;
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
lrp->args.inode = ino;
lrp->clp = NFS_SERVER(ino)->nfs_client;
status = nfs4_proc_layoutreturn(lrp);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
}
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
@@ -625,10 +747,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
* are seen first.
*/
static s64
cmp_layout(u32 iomode1, u32 iomode2)
cmp_layout(struct pnfs_layout_range *l1,
struct pnfs_layout_range *l2)
{
s64 d;
/* high offset > low offset */
d = l1->offset - l2->offset;
if (d)
return d;
/* short length > long length */
d = l2->length - l1->length;
if (d)
return d;
/* read > read/write */
return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
}
static void
@@ -636,13 +771,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_segment *lp;
int found = 0;
dprintk("%s:Begin\n", __func__);
assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry(lp, &lo->plh_segs, pls_list) {
if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
continue;
list_add_tail(&lseg->pls_list, &lp->pls_list);
dprintk("%s: inserted lseg %p "
@@ -652,16 +786,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
lseg->pls_range.offset, lseg->pls_range.length,
lp, lp->pls_range.iomode, lp->pls_range.offset,
lp->pls_range.length);
found = 1;
break;
}
if (!found) {
list_add_tail(&lseg->pls_list, &lo->plh_segs);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
__func__, lseg, lseg->pls_range.iomode,
lseg->pls_range.offset, lseg->pls_range.length);
goto out;
}
list_add_tail(&lseg->pls_list, &lo->plh_segs);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
__func__, lseg, lseg->pls_range.iomode,
lseg->pls_range.offset, lseg->pls_range.length);
out:
get_layout_hdr(lo);
dprintk("%s:Return\n", __func__);
@@ -672,7 +804,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
{
struct pnfs_layout_hdr *lo;
lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
if (!lo)
return NULL;
atomic_set(&lo->plh_refcount, 1);
@@ -705,7 +837,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
if (likely(nfsi->layout == NULL)) /* Won the race? */
nfsi->layout = new;
else
kfree(new);
pnfs_free_layout_hdr(new);
return nfsi->layout;
}
@@ -721,16 +853,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
* READ RW true
*/
static int
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
is_matching_lseg(struct pnfs_layout_range *ls_range,
struct pnfs_layout_range *range)
{
return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
struct pnfs_layout_range range1;
if ((range->iomode == IOMODE_RW &&
ls_range->iomode != IOMODE_RW) ||
!lo_seg_intersecting(ls_range, range))
return 0;
/* range1 covers only the first byte in the range */
range1 = *range;
range1.length = 1;
return lo_seg_contained(ls_range, &range1);
}
/*
* lookup range in layout
*/
static struct pnfs_layout_segment *
pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
pnfs_find_lseg(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range)
{
struct pnfs_layout_segment *lseg, *ret = NULL;
@@ -739,11 +883,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
is_matching_lseg(lseg, iomode)) {
is_matching_lseg(&lseg->pls_range, range)) {
ret = get_lseg(lseg);
break;
}
if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
if (cmp_layout(range, &lseg->pls_range) > 0)
break;
}
@@ -759,9 +903,17 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
u64 count,
enum pnfs_iomode iomode,
gfp_t gfp_flags)
{
struct pnfs_layout_range arg = {
.iomode = iomode,
.offset = pos,
.length = count,
};
unsigned pg_offset;
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
@@ -789,7 +941,7 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
/* Check to see if the layout for the given range already exists */
lseg = pnfs_find_lseg(lo, iomode);
lseg = pnfs_find_lseg(lo, &arg);
if (lseg)
goto out_unlock;
@@ -811,7 +963,14 @@ pnfs_update_layout(struct inode *ino,
spin_unlock(&clp->cl_lock);
}
lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
pg_offset = arg.offset & ~PAGE_CACHE_MASK;
if (pg_offset) {
arg.offset -= pg_offset;
arg.length += pg_offset;
}
arg.length = PAGE_CACHE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
if (!lseg && first) {
spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts);
@@ -838,17 +997,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
/* Verify we got what we asked for.
* Note that because the xdr parsing only accepts a single
* element array, this can fail even if the server is behaving
* correctly.
*/
if (lgp->args.range.iomode > res->range.iomode ||
res->range.offset != 0 ||
res->range.length != NFS4_MAX_UINT64) {
status = -EINVAL;
goto out;
}
/* Inject layout blob into I/O device driver */
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
if (!lseg || IS_ERR(lseg)) {
@@ -895,51 +1043,64 @@ out_forget_reply:
goto out;
}
static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev,
struct nfs_page *req)
bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
enum pnfs_iomode access_type;
gfp_t gfp_flags;
/* We assume that pg_ioflags == 0 iff we're reading a page */
if (pgio->pg_ioflags == 0) {
access_type = IOMODE_READ;
gfp_flags = GFP_KERNEL;
} else {
access_type = IOMODE_RW;
gfp_flags = GFP_NOFS;
}
if (pgio->pg_count == prev->wb_bytes) {
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
IOMODE_READ,
GFP_KERNEL);
req_offset(req),
pgio->pg_count,
access_type,
gfp_flags);
return true;
}
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
if (pgio->pg_lseg &&
req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length))
return false;
return true;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
void
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
/*
* Called by non rpc-based layout drivers
*/
int
pnfs_ld_write_done(struct nfs_write_data *data)
{
struct pnfs_layoutdriver_type *ld;
int status;
ld = NFS_SERVER(inode)->pnfs_curr_ld;
pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
}
static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_count == prev->wb_bytes) {
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
IOMODE_RW,
GFP_NOFS);
if (!data->pnfs_error) {
pnfs_set_layoutcommit(data);
data->mds_ops->rpc_call_done(&data->task, data);
data->mds_ops->rpc_release(data);
return 0;
}
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
void
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
struct pnfs_layoutdriver_type *ld;
ld = NFS_SERVER(inode)->pnfs_curr_ld;
pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
data->pnfs_error);
status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
data->mds_ops, NFS_FILE_SYNC);
return status ? : -EAGAIN;
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
@@ -965,6 +1126,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
return trypnfs;
}
/*
* Called by non rpc-based layout drivers
*/
int
pnfs_ld_read_done(struct nfs_read_data *data)
{
int status;
if (!data->pnfs_error) {
__nfs4_read_done_cb(data);
data->mds_ops->rpc_call_done(&data->task, data);
data->mds_ops->rpc_release(data);
return 0;
}
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
data->pnfs_error);
status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
data->mds_ops);
return status ? : -EAGAIN;
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
/*
* Call the appropriate parallel I/O subsystem read function.
*/