Merge branch 'next'

This commit is contained in:
Trond Myklebust
2008-10-15 15:54:56 -04:00
26 changed files with 961 additions and 506 deletions

View File

@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
}
/* save the nodename */
clnt->cl_nodelen = strlen(utsname()->nodename);
clnt->cl_nodelen = strlen(init_utsname()->nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
rpc_register_client(clnt);
return clnt;

View File

@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
return rpc_run_task(&task_setup_data);
}
/*
* In the case where rpc clients have been cloned, we want to make
* sure that we use the program number/version etc of the actual
* owner of the xprt. To do so, we walk back up the tree of parents
* to find whoever created the transport and/or whoever has the
* autobind flag set.
*/
static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
{
struct rpc_clnt *parent = clnt->cl_parent;
while (parent != clnt) {
if (parent->cl_xprt != clnt->cl_xprt)
break;
if (clnt->cl_autobind)
break;
clnt = parent;
parent = parent->cl_parent;
}
return clnt;
}
/**
* rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
*/
void rpcb_getport_async(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_clnt *clnt;
struct rpc_procinfo *proc;
u32 bind_version;
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_xprt *xprt;
struct rpc_clnt *rpcb_clnt;
static struct rpcbind_args *map;
struct rpc_task *child;
@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
size_t salen;
int status;
clnt = rpcb_find_transport_owner(task->tk_client);
xprt = clnt->cl_xprt;
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __func__,
clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
/* Autobind on cloned rpc clients is discouraged */
BUG_ON(clnt->cl_parent != clnt);
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on(&xprt->binding, task, NULL);
@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
status = -ENOMEM;
dprintk("RPC: %5u %s: no memory available\n",
task->tk_pid, __func__);
goto bailout_nofree;
goto bailout_release_client;
}
map->r_prog = clnt->cl_prog;
map->r_vers = clnt->cl_vers;
@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
task->tk_pid, __func__);
return;
}
rpc_put_task(child);
task->tk_xprt->stat.bind_count++;
xprt->stat.bind_count++;
rpc_put_task(child);
return;
bailout_release_client:
rpc_release_client(rpcb_clnt);
bailout_nofree:
rpcb_wake_rpcbind_waiters(xprt, status);
task->tk_status = status;

View File

@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
goto out;
}
result = -EINVAL;
if (try_module_get(THIS_MODULE)) {
list_add_tail(&transport->list, &xprt_list);
printk(KERN_INFO "RPC: Registered %s transport module.\n",
transport->name);
result = 0;
}
list_add_tail(&transport->list, &xprt_list);
printk(KERN_INFO "RPC: Registered %s transport module.\n",
transport->name);
result = 0;
out:
spin_unlock(&xprt_list_lock);
@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
"RPC: Unregistered %s transport module.\n",
transport->name);
list_del_init(&transport->list);
module_put(THIS_MODULE);
goto out;
}
}

View File

@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
}
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
return n;
if (n == nsegs)
return 0;
seg[n].mr_page = NULL;
@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (hdrlen == 0)
return -1;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
" headerp 0x%p base 0x%p lkey 0x%x\n",
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's.
*/
static void
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{
int i, npages, curlen, olen;
char *destp;
@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
} else
rqst->rq_rcv_buf.tail[0].iov_len = 0;
if (pad) {
/* implicit padding on terminal chunk */
unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
while (pad--)
p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
}
if (copy_len)
dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n",
@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
if (++xprt->connect_cookie == 0) /* maintain a reserved value */
++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
} else {
if (xprt_test_and_clear_connected(xprt))
xprt_wake_pending_tasks(xprt, ep->rep_connected);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
spin_unlock_bh(&xprt->transport_lock);
}
@ -792,14 +805,20 @@ repost:
((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen;
/* special case - last chunk may omit padding */
if (rdmalen &= 3) {
rdmalen = 4 - rdmalen;
status += rdmalen;
}
} else {
/* else ordinary inline */
rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len;
}
/* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break;
case htonl(RDMA_NOMSG):

View File

@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
#if !RPCRDMA_PERSISTENT_REGISTRATION
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
#else
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
#endif
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0;
#ifdef RPC_DEBUG
@ -139,6 +136,14 @@ static ctl_table xr_tunables_table[] = {
.extra1 = &min_memreg,
.extra2 = &max_memreg,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "rdma_pad_optimize",
.data = &xprt_rdma_pad_optimize,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = 0,
},
@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: closing\n", __func__);
if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
/* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > (30 * HZ))
xprt->reestablish_timeout = (30 * HZ);
else if (xprt->reestablish_timeout < (5 * HZ))
xprt->reestablish_timeout = (5 * HZ);
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
outfail:
@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
req->rl_reply->rr_xprt = xprt;
}
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
}
/* Must suppress retransmit to maintain credits */
if (req->rl_connect_cookie == xprt->connect_cookie)
goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
task->tk_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
}
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int rc;
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);

File diff suppressed because it is too large Load Diff

View File

@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/*
* Interface Adapter -- one per transport instance
*/
@ -58,6 +61,8 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
u32 ri_dma_lkey;
int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
union {
struct ib_mw *mw;
struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
} frmr;
} r;
struct list_head mw_list;
} *rl_mw;
@ -175,6 +184,7 @@ struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@ -198,7 +208,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
@ -273,6 +283,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/