Merge branch 'for-2.6.28' of git://linux-nfs.org/~bfields/linux
* 'for-2.6.28' of git://linux-nfs.org/~bfields/linux: (59 commits) svcrdma: Fix IRD/ORD polarity svcrdma: Update svc_rdma_send_error to use DMA LKEY svcrdma: Modify the RPC reply path to use FRMR when available svcrdma: Modify the RPC recv path to use FRMR when available svcrdma: Add support to svc_rdma_send to handle chained WR svcrdma: Modify post recv path to use local dma key svcrdma: Add a service to register a Fast Reg MR with the device svcrdma: Query device for Fast Reg support during connection setup svcrdma: Add FRMR get/put services NLM: Remove unused argument from svc_addsock() function NLM: Remove "proto" argument from lockd_up() NLM: Always start both UDP and TCP listeners lockd: Remove unused fields in the nlm_reboot structure lockd: Add helper to sanity check incoming NOTIFY requests lockd: change nlmclnt_grant() to take a "struct sockaddr *" lockd: Adjust nlmsvc_lookup_host() to accomodate AF_INET6 addresses lockd: Adjust nlmclnt_lookup_host() signature to accomodate non-AF_INET lockd: Support non-AF_INET addresses in nlm_lookup_host() NLM: Convert nlm_lookup_host() to use a single argument svcrdma: Add Fast Reg MR Data Types ...
This commit is contained in:
@@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
|
||||
clnt->cl_procinfo = version->procs;
|
||||
clnt->cl_maxproc = version->nrprocs;
|
||||
clnt->cl_protname = program->name;
|
||||
clnt->cl_prog = program->number;
|
||||
clnt->cl_prog = args->prognumber ? : program->number;
|
||||
clnt->cl_vers = version->number;
|
||||
clnt->cl_stats = program->stats;
|
||||
clnt->cl_metrics = rpc_alloc_iostats(clnt);
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <linux/in6.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <net/ipv6.h>
|
||||
|
||||
#include <linux/sunrpc/clnt.h>
|
||||
#include <linux/sunrpc/sched.h>
|
||||
@@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
|
||||
}
|
||||
|
||||
static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
|
||||
u32 version, struct rpc_message *msg,
|
||||
int *result)
|
||||
u32 version, struct rpc_message *msg)
|
||||
{
|
||||
struct rpc_clnt *rpcb_clnt;
|
||||
int error = 0;
|
||||
int result, error = 0;
|
||||
|
||||
*result = 0;
|
||||
msg->rpc_resp = &result;
|
||||
|
||||
rpcb_clnt = rpcb_create_local(addr, addrlen, version);
|
||||
if (!IS_ERR(rpcb_clnt)) {
|
||||
@@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
|
||||
} else
|
||||
error = PTR_ERR(rpcb_clnt);
|
||||
|
||||
if (error < 0)
|
||||
if (error < 0) {
|
||||
printk(KERN_WARNING "RPC: failed to contact local rpcbind "
|
||||
"server (errno %d).\n", -error);
|
||||
dprintk("RPC: registration status %d/%d\n", error, *result);
|
||||
return error;
|
||||
}
|
||||
|
||||
return error;
|
||||
if (!result)
|
||||
return -EACCES;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
|
||||
* @vers: RPC version number to bind
|
||||
* @prot: transport protocol to register
|
||||
* @port: port value to register
|
||||
* @okay: OUT: result code
|
||||
*
|
||||
* Returns zero if the registration request was dispatched successfully
|
||||
* and the rpcbind daemon returned success. Otherwise, returns an errno
|
||||
* value that reflects the nature of the error (request could not be
|
||||
* dispatched, timed out, or rpcbind returned an error).
|
||||
*
|
||||
* RPC services invoke this function to advertise their contact
|
||||
* information via the system's rpcbind daemon. RPC services
|
||||
@@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
|
||||
* all registered transports for [program, version] from the local
|
||||
* rpcbind database.
|
||||
*
|
||||
* Returns zero if the registration request was dispatched
|
||||
* successfully and a reply was received. The rpcbind daemon's
|
||||
* boolean result code is stored in *okay.
|
||||
*
|
||||
* Returns an errno value and sets *result to zero if there was
|
||||
* some problem that prevented the rpcbind request from being
|
||||
* dispatched, or if the rpcbind daemon did not respond within
|
||||
* the timeout.
|
||||
*
|
||||
* This function uses rpcbind protocol version 2 to contact the
|
||||
* local rpcbind daemon.
|
||||
*
|
||||
@@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
|
||||
* IN6ADDR_ANY (ie available for all AF_INET and AF_INET6
|
||||
* addresses).
|
||||
*/
|
||||
int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
|
||||
int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
|
||||
{
|
||||
struct rpcbind_args map = {
|
||||
.r_prog = prog,
|
||||
@@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
|
||||
};
|
||||
struct rpc_message msg = {
|
||||
.rpc_argp = &map,
|
||||
.rpc_resp = okay,
|
||||
};
|
||||
|
||||
dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
|
||||
@@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
|
||||
|
||||
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
|
||||
sizeof(rpcb_inaddr_loopback),
|
||||
RPCBVERS_2, &msg, okay);
|
||||
RPCBVERS_2, &msg);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
|
||||
|
||||
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
|
||||
sizeof(rpcb_inaddr_loopback),
|
||||
RPCBVERS_4, msg, msg->rpc_resp);
|
||||
RPCBVERS_4, msg);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
|
||||
char buf[64];
|
||||
|
||||
/* Construct AF_INET6 universal address */
|
||||
snprintf(buf, sizeof(buf),
|
||||
NIP6_FMT".%u.%u",
|
||||
NIP6(address_to_register->sin6_addr),
|
||||
port >> 8, port & 0xff);
|
||||
if (ipv6_addr_any(&address_to_register->sin6_addr))
|
||||
snprintf(buf, sizeof(buf), "::.%u.%u",
|
||||
port >> 8, port & 0xff);
|
||||
else
|
||||
snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
|
||||
NIP6(address_to_register->sin6_addr),
|
||||
port >> 8, port & 0xff);
|
||||
map->r_addr = buf;
|
||||
|
||||
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
|
||||
@@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
|
||||
|
||||
return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
|
||||
sizeof(rpcb_in6addr_loopback),
|
||||
RPCBVERS_4, msg, msg->rpc_resp);
|
||||
RPCBVERS_4, msg);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
|
||||
* @version: RPC version number of service to (un)register
|
||||
* @address: address family, IP address, and port to (un)register
|
||||
* @netid: netid of transport protocol to (un)register
|
||||
* @result: result code from rpcbind RPC call
|
||||
*
|
||||
* Returns zero if the registration request was dispatched successfully
|
||||
* and the rpcbind daemon returned success. Otherwise, returns an errno
|
||||
* value that reflects the nature of the error (request could not be
|
||||
* dispatched, timed out, or rpcbind returned an error).
|
||||
*
|
||||
* RPC services invoke this function to advertise their contact
|
||||
* information via the system's rpcbind daemon. RPC services
|
||||
@@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
|
||||
* to zero. Callers pass a netid of "" to unregister all
|
||||
* transport netids associated with [program, version, address].
|
||||
*
|
||||
* Returns zero if the registration request was dispatched
|
||||
* successfully and a reply was received. The rpcbind daemon's
|
||||
* result code is stored in *result.
|
||||
*
|
||||
* Returns an errno value and sets *result to zero if there was
|
||||
* some problem that prevented the rpcbind request from being
|
||||
* dispatched, or if the rpcbind daemon did not respond within
|
||||
* the timeout.
|
||||
*
|
||||
* This function uses rpcbind protocol version 4 to contact the
|
||||
* local rpcbind daemon. The local rpcbind daemon must support
|
||||
* version 4 of the rpcbind protocol in order for these functions
|
||||
@@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
|
||||
* advertises the service on all IPv4 and IPv6 addresses.
|
||||
*/
|
||||
int rpcb_v4_register(const u32 program, const u32 version,
|
||||
const struct sockaddr *address, const char *netid,
|
||||
int *result)
|
||||
const struct sockaddr *address, const char *netid)
|
||||
{
|
||||
struct rpcbind_args map = {
|
||||
.r_prog = program,
|
||||
@@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version,
|
||||
};
|
||||
struct rpc_message msg = {
|
||||
.rpc_argp = &map,
|
||||
.rpc_resp = result,
|
||||
};
|
||||
|
||||
*result = 0;
|
||||
|
||||
switch (address->sa_family) {
|
||||
case AF_INET:
|
||||
return rpcb_register_netid4((struct sockaddr_in *)address,
|
||||
@@ -633,7 +624,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
|
||||
static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p,
|
||||
struct rpcbind_args *rpcb)
|
||||
{
|
||||
dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n",
|
||||
dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n",
|
||||
rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
|
||||
*p++ = htonl(rpcb->r_prog);
|
||||
*p++ = htonl(rpcb->r_vers);
|
||||
@@ -648,7 +639,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p,
|
||||
unsigned short *portp)
|
||||
{
|
||||
*portp = (unsigned short) ntohl(*p++);
|
||||
dprintk("RPC: rpcb_decode_getport result %u\n",
|
||||
dprintk("RPC: rpcb getport result: %u\n",
|
||||
*portp);
|
||||
return 0;
|
||||
}
|
||||
@@ -657,7 +648,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
|
||||
unsigned int *boolp)
|
||||
{
|
||||
*boolp = (unsigned int) ntohl(*p++);
|
||||
dprintk("RPC: rpcb_decode_set: call %s\n",
|
||||
dprintk("RPC: rpcb set/unset call %s\n",
|
||||
(*boolp ? "succeeded" : "failed"));
|
||||
return 0;
|
||||
}
|
||||
@@ -665,7 +656,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
|
||||
static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
|
||||
struct rpcbind_args *rpcb)
|
||||
{
|
||||
dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n",
|
||||
dprintk("RPC: encoding rpcb request (%u, %u, %s)\n",
|
||||
rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
|
||||
*p++ = htonl(rpcb->r_prog);
|
||||
*p++ = htonl(rpcb->r_vers);
|
||||
|
251
net/sunrpc/svc.c
251
net/sunrpc/svc.c
@@ -28,6 +28,8 @@
|
||||
|
||||
#define RPCDBG_FACILITY RPCDBG_SVCDSP
|
||||
|
||||
static void svc_unregister(const struct svc_serv *serv);
|
||||
|
||||
#define svc_serv_is_pooled(serv) ((serv)->sv_function)
|
||||
|
||||
/*
|
||||
@@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
|
||||
*/
|
||||
static struct svc_serv *
|
||||
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
|
||||
void (*shutdown)(struct svc_serv *serv))
|
||||
sa_family_t family, void (*shutdown)(struct svc_serv *serv))
|
||||
{
|
||||
struct svc_serv *serv;
|
||||
unsigned int vers;
|
||||
@@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
|
||||
|
||||
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
|
||||
return NULL;
|
||||
serv->sv_family = family;
|
||||
serv->sv_name = prog->pg_name;
|
||||
serv->sv_program = prog;
|
||||
serv->sv_nrthreads = 1;
|
||||
@@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
|
||||
spin_lock_init(&pool->sp_lock);
|
||||
}
|
||||
|
||||
|
||||
/* Remove any stale portmap registrations */
|
||||
svc_register(serv, 0, 0);
|
||||
svc_unregister(serv);
|
||||
|
||||
return serv;
|
||||
}
|
||||
|
||||
struct svc_serv *
|
||||
svc_create(struct svc_program *prog, unsigned int bufsize,
|
||||
void (*shutdown)(struct svc_serv *serv))
|
||||
sa_family_t family, void (*shutdown)(struct svc_serv *serv))
|
||||
{
|
||||
return __svc_create(prog, bufsize, /*npools*/1, shutdown);
|
||||
return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
|
||||
}
|
||||
EXPORT_SYMBOL(svc_create);
|
||||
|
||||
struct svc_serv *
|
||||
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
|
||||
void (*shutdown)(struct svc_serv *serv),
|
||||
sa_family_t family, void (*shutdown)(struct svc_serv *serv),
|
||||
svc_thread_fn func, struct module *mod)
|
||||
{
|
||||
struct svc_serv *serv;
|
||||
unsigned int npools = svc_pool_map_get();
|
||||
|
||||
serv = __svc_create(prog, bufsize, npools, shutdown);
|
||||
serv = __svc_create(prog, bufsize, npools, family, shutdown);
|
||||
|
||||
if (serv != NULL) {
|
||||
serv->sv_function = func;
|
||||
@@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv)
|
||||
if (svc_serv_is_pooled(serv))
|
||||
svc_pool_map_put();
|
||||
|
||||
/* Unregister service with the portmapper */
|
||||
svc_register(serv, 0, 0);
|
||||
svc_unregister(serv);
|
||||
kfree(serv->sv_pools);
|
||||
kfree(serv);
|
||||
}
|
||||
@@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp)
|
||||
}
|
||||
EXPORT_SYMBOL(svc_exit_thread);
|
||||
|
||||
#ifdef CONFIG_SUNRPC_REGISTER_V4
|
||||
|
||||
/*
|
||||
* Register an RPC service with the local portmapper.
|
||||
* To unregister a service, call this routine with
|
||||
* proto and port == 0.
|
||||
* Register an "inet" protocol family netid with the local
|
||||
* rpcbind daemon via an rpcbind v4 SET request.
|
||||
*
|
||||
* No netconfig infrastructure is available in the kernel, so
|
||||
* we map IP_ protocol numbers to netids by hand.
|
||||
*
|
||||
* Returns zero on success; a negative errno value is returned
|
||||
* if any error occurs.
|
||||
*/
|
||||
int
|
||||
svc_register(struct svc_serv *serv, int proto, unsigned short port)
|
||||
static int __svc_rpcb_register4(const u32 program, const u32 version,
|
||||
const unsigned short protocol,
|
||||
const unsigned short port)
|
||||
{
|
||||
struct sockaddr_in sin = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_addr.s_addr = htonl(INADDR_ANY),
|
||||
.sin_port = htons(port),
|
||||
};
|
||||
char *netid;
|
||||
|
||||
switch (protocol) {
|
||||
case IPPROTO_UDP:
|
||||
netid = RPCBIND_NETID_UDP;
|
||||
break;
|
||||
case IPPROTO_TCP:
|
||||
netid = RPCBIND_NETID_TCP;
|
||||
break;
|
||||
default:
|
||||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
|
||||
return rpcb_v4_register(program, version,
|
||||
(struct sockaddr *)&sin, netid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register an "inet6" protocol family netid with the local
|
||||
* rpcbind daemon via an rpcbind v4 SET request.
|
||||
*
|
||||
* No netconfig infrastructure is available in the kernel, so
|
||||
* we map IP_ protocol numbers to netids by hand.
|
||||
*
|
||||
* Returns zero on success; a negative errno value is returned
|
||||
* if any error occurs.
|
||||
*/
|
||||
static int __svc_rpcb_register6(const u32 program, const u32 version,
|
||||
const unsigned short protocol,
|
||||
const unsigned short port)
|
||||
{
|
||||
struct sockaddr_in6 sin6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_addr = IN6ADDR_ANY_INIT,
|
||||
.sin6_port = htons(port),
|
||||
};
|
||||
char *netid;
|
||||
|
||||
switch (protocol) {
|
||||
case IPPROTO_UDP:
|
||||
netid = RPCBIND_NETID_UDP6;
|
||||
break;
|
||||
case IPPROTO_TCP:
|
||||
netid = RPCBIND_NETID_TCP6;
|
||||
break;
|
||||
default:
|
||||
return -EPROTONOSUPPORT;
|
||||
}
|
||||
|
||||
return rpcb_v4_register(program, version,
|
||||
(struct sockaddr *)&sin6, netid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register a kernel RPC service via rpcbind version 4.
|
||||
*
|
||||
* Returns zero on success; a negative errno value is returned
|
||||
* if any error occurs.
|
||||
*/
|
||||
static int __svc_register(const u32 program, const u32 version,
|
||||
const sa_family_t family,
|
||||
const unsigned short protocol,
|
||||
const unsigned short port)
|
||||
{
|
||||
int error;
|
||||
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
return __svc_rpcb_register4(program, version,
|
||||
protocol, port);
|
||||
case AF_INET6:
|
||||
error = __svc_rpcb_register6(program, version,
|
||||
protocol, port);
|
||||
if (error < 0)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Work around bug in some versions of Linux rpcbind
|
||||
* which don't allow registration of both inet and
|
||||
* inet6 netids.
|
||||
*
|
||||
* Error return ignored for now.
|
||||
*/
|
||||
__svc_rpcb_register4(program, version,
|
||||
protocol, port);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EAFNOSUPPORT;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SUNRPC_REGISTER_V4 */
|
||||
|
||||
/*
|
||||
* Register a kernel RPC service via rpcbind version 2.
|
||||
*
|
||||
* Returns zero on success; a negative errno value is returned
|
||||
* if any error occurs.
|
||||
*/
|
||||
static int __svc_register(const u32 program, const u32 version,
|
||||
sa_family_t family,
|
||||
const unsigned short protocol,
|
||||
const unsigned short port)
|
||||
{
|
||||
if (family != AF_INET)
|
||||
return -EAFNOSUPPORT;
|
||||
|
||||
return rpcb_register(program, version, protocol, port);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SUNRPC_REGISTER_V4 */
|
||||
|
||||
/**
|
||||
* svc_register - register an RPC service with the local portmapper
|
||||
* @serv: svc_serv struct for the service to register
|
||||
* @proto: transport protocol number to advertise
|
||||
* @port: port to advertise
|
||||
*
|
||||
* Service is registered for any address in serv's address family
|
||||
*/
|
||||
int svc_register(const struct svc_serv *serv, const unsigned short proto,
|
||||
const unsigned short port)
|
||||
{
|
||||
struct svc_program *progp;
|
||||
unsigned long flags;
|
||||
unsigned int i;
|
||||
int error = 0, dummy;
|
||||
int error = 0;
|
||||
|
||||
if (!port)
|
||||
clear_thread_flag(TIF_SIGPENDING);
|
||||
BUG_ON(proto == 0 && port == 0);
|
||||
|
||||
for (progp = serv->sv_program; progp; progp = progp->pg_next) {
|
||||
for (i = 0; i < progp->pg_nvers; i++) {
|
||||
if (progp->pg_vers[i] == NULL)
|
||||
continue;
|
||||
|
||||
dprintk("svc: svc_register(%s, %s, %d, %d)%s\n",
|
||||
dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
|
||||
progp->pg_name,
|
||||
i,
|
||||
proto == IPPROTO_UDP? "udp" : "tcp",
|
||||
port,
|
||||
i,
|
||||
serv->sv_family,
|
||||
progp->pg_vers[i]->vs_hidden?
|
||||
" (but not telling portmap)" : "");
|
||||
|
||||
if (progp->pg_vers[i]->vs_hidden)
|
||||
continue;
|
||||
|
||||
error = rpcb_register(progp->pg_prog, i, proto, port, &dummy);
|
||||
error = __svc_register(progp->pg_prog, i,
|
||||
serv->sv_family, proto, port);
|
||||
if (error < 0)
|
||||
break;
|
||||
if (port && !dummy) {
|
||||
error = -EACCES;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!port) {
|
||||
spin_lock_irqsave(¤t->sighand->siglock, flags);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
|
||||
return error;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SUNRPC_REGISTER_V4
|
||||
|
||||
static void __svc_unregister(const u32 program, const u32 version,
|
||||
const char *progname)
|
||||
{
|
||||
struct sockaddr_in6 sin6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_addr = IN6ADDR_ANY_INIT,
|
||||
.sin6_port = 0,
|
||||
};
|
||||
int error;
|
||||
|
||||
error = rpcb_v4_register(program, version,
|
||||
(struct sockaddr *)&sin6, "");
|
||||
dprintk("svc: %s(%sv%u), error %d\n",
|
||||
__func__, progname, version, error);
|
||||
}
|
||||
|
||||
#else /* CONFIG_SUNRPC_REGISTER_V4 */
|
||||
|
||||
static void __svc_unregister(const u32 program, const u32 version,
|
||||
const char *progname)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = rpcb_register(program, version, 0, 0);
|
||||
dprintk("svc: %s(%sv%u), error %d\n",
|
||||
__func__, progname, version, error);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SUNRPC_REGISTER_V4 */
|
||||
|
||||
/*
|
||||
* All netids, bind addresses and ports registered for [program, version]
|
||||
* are removed from the local rpcbind database (if the service is not
|
||||
* hidden) to make way for a new instance of the service.
|
||||
*
|
||||
* The result of unregistration is reported via dprintk for those who want
|
||||
* verification of the result, but is otherwise not important.
|
||||
*/
|
||||
static void svc_unregister(const struct svc_serv *serv)
|
||||
{
|
||||
struct svc_program *progp;
|
||||
unsigned long flags;
|
||||
unsigned int i;
|
||||
|
||||
clear_thread_flag(TIF_SIGPENDING);
|
||||
|
||||
for (progp = serv->sv_program; progp; progp = progp->pg_next) {
|
||||
for (i = 0; i < progp->pg_nvers; i++) {
|
||||
if (progp->pg_vers[i] == NULL)
|
||||
continue;
|
||||
if (progp->pg_vers[i]->vs_hidden)
|
||||
continue;
|
||||
|
||||
__svc_unregister(progp->pg_prog, i, progp->pg_name);
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
spin_lock_irqsave(¤t->sighand->siglock, flags);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(svc_xprt_init);
|
||||
|
||||
int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
|
||||
int flags)
|
||||
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
|
||||
struct svc_serv *serv,
|
||||
unsigned short port, int flags)
|
||||
{
|
||||
struct svc_xprt_class *xcl;
|
||||
struct sockaddr_in sin = {
|
||||
.sin_family = AF_INET,
|
||||
.sin_addr.s_addr = htonl(INADDR_ANY),
|
||||
.sin_port = htons(port),
|
||||
};
|
||||
struct sockaddr_in6 sin6 = {
|
||||
.sin6_family = AF_INET6,
|
||||
.sin6_addr = IN6ADDR_ANY_INIT,
|
||||
.sin6_port = htons(port),
|
||||
};
|
||||
struct sockaddr *sap;
|
||||
size_t len;
|
||||
|
||||
switch (serv->sv_family) {
|
||||
case AF_INET:
|
||||
sap = (struct sockaddr *)&sin;
|
||||
len = sizeof(sin);
|
||||
break;
|
||||
case AF_INET6:
|
||||
sap = (struct sockaddr *)&sin6;
|
||||
len = sizeof(sin6);
|
||||
break;
|
||||
default:
|
||||
return ERR_PTR(-EAFNOSUPPORT);
|
||||
}
|
||||
|
||||
return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
|
||||
}
|
||||
|
||||
int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
|
||||
int flags)
|
||||
{
|
||||
struct svc_xprt_class *xcl;
|
||||
|
||||
dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
|
||||
spin_lock(&svc_xprt_class_lock);
|
||||
list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {
|
||||
@@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
|
||||
goto err;
|
||||
|
||||
spin_unlock(&svc_xprt_class_lock);
|
||||
newxprt = xcl->xcl_ops->
|
||||
xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin),
|
||||
flags);
|
||||
newxprt = __svc_xpo_create(xcl, serv, port, flags);
|
||||
if (IS_ERR(newxprt)) {
|
||||
module_put(xcl->xcl_owner);
|
||||
return PTR_ERR(newxprt);
|
||||
|
@@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
|
||||
struct svc_sock *svsk;
|
||||
struct sock *inet;
|
||||
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
|
||||
int val;
|
||||
|
||||
dprintk("svc: svc_setup_socket %p\n", sock);
|
||||
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
|
||||
@@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
|
||||
else
|
||||
svc_tcp_init(svsk, serv);
|
||||
|
||||
/*
|
||||
* We start one listener per sv_serv. We want AF_INET
|
||||
* requests to be automatically shunted to our AF_INET6
|
||||
* listener using a mapped IPv4 address. Make sure
|
||||
* no-one starts an equivalent IPv4 listener, which
|
||||
* would steal our incoming connections.
|
||||
*/
|
||||
val = 0;
|
||||
if (serv->sv_family == AF_INET6)
|
||||
kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
|
||||
(char *)&val, sizeof(val));
|
||||
|
||||
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
|
||||
svsk, svsk->sk_sk);
|
||||
|
||||
@@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
|
||||
|
||||
int svc_addsock(struct svc_serv *serv,
|
||||
int fd,
|
||||
char *name_return,
|
||||
int *proto)
|
||||
char *name_return)
|
||||
{
|
||||
int err = 0;
|
||||
struct socket *so = sockfd_lookup(fd, &err);
|
||||
@@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv,
|
||||
sockfd_put(so);
|
||||
return err;
|
||||
}
|
||||
if (proto) *proto = so->sk->sk_protocol;
|
||||
return one_sock_name(name_return, svsk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(svc_addsock);
|
||||
|
@@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
|
||||
*
|
||||
* Assumptions:
|
||||
* - chunk[0]->position points to pages[0] at an offset of 0
|
||||
* - pages[] is not physically or virtually contigous and consists of
|
||||
* - pages[] is not physically or virtually contiguous and consists of
|
||||
* PAGE_SIZE elements.
|
||||
*
|
||||
* Output:
|
||||
@@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
|
||||
* chunk in the read list
|
||||
*
|
||||
*/
|
||||
static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
|
||||
static int map_read_chunks(struct svcxprt_rdma *xprt,
|
||||
struct svc_rqst *rqstp,
|
||||
struct svc_rdma_op_ctxt *head,
|
||||
struct rpcrdma_msg *rmsgp,
|
||||
@@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt,
|
||||
return sge_no;
|
||||
}
|
||||
|
||||
static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt,
|
||||
struct kvec *vec,
|
||||
u64 *sgl_offset,
|
||||
int count)
|
||||
/* Map a read-chunk-list to an XDR and fast register the page-list.
|
||||
*
|
||||
* Assumptions:
|
||||
* - chunk[0] position points to pages[0] at an offset of 0
|
||||
* - pages[] will be made physically contiguous by creating a one-off memory
|
||||
* region using the fastreg verb.
|
||||
* - byte_count is # of bytes in read-chunk-list
|
||||
* - ch_count is # of chunks in read-chunk-list
|
||||
*
|
||||
* Output:
|
||||
* - sge array pointing into pages[] array.
|
||||
* - chunk_sge array specifying sge index and count for each
|
||||
* chunk in the read list
|
||||
*/
|
||||
static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
|
||||
struct svc_rqst *rqstp,
|
||||
struct svc_rdma_op_ctxt *head,
|
||||
struct rpcrdma_msg *rmsgp,
|
||||
struct svc_rdma_req_map *rpl_map,
|
||||
struct svc_rdma_req_map *chl_map,
|
||||
int ch_count,
|
||||
int byte_count)
|
||||
{
|
||||
int page_no;
|
||||
int ch_no;
|
||||
u32 offset;
|
||||
struct rpcrdma_read_chunk *ch;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
int ret = 0;
|
||||
|
||||
frmr = svc_rdma_get_frmr(xprt);
|
||||
if (IS_ERR(frmr))
|
||||
return -ENOMEM;
|
||||
|
||||
head->frmr = frmr;
|
||||
head->arg.head[0] = rqstp->rq_arg.head[0];
|
||||
head->arg.tail[0] = rqstp->rq_arg.tail[0];
|
||||
head->arg.pages = &head->pages[head->count];
|
||||
head->hdr_count = head->count; /* save count of hdr pages */
|
||||
head->arg.page_base = 0;
|
||||
head->arg.page_len = byte_count;
|
||||
head->arg.len = rqstp->rq_arg.len + byte_count;
|
||||
head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
|
||||
|
||||
/* Fast register the page list */
|
||||
frmr->kva = page_address(rqstp->rq_arg.pages[0]);
|
||||
frmr->direction = DMA_FROM_DEVICE;
|
||||
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
|
||||
frmr->map_len = byte_count;
|
||||
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
|
||||
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
page_address(rqstp->rq_arg.pages[page_no]),
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
|
||||
}
|
||||
head->count += page_no;
|
||||
|
||||
/* rq_respages points one past arg pages */
|
||||
rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
|
||||
|
||||
/* Create the reply and chunk maps */
|
||||
offset = 0;
|
||||
ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
|
||||
for (ch_no = 0; ch_no < ch_count; ch_no++) {
|
||||
rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
|
||||
rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length;
|
||||
chl_map->ch[ch_no].count = 1;
|
||||
chl_map->ch[ch_no].start = ch_no;
|
||||
offset += ch->rc_target.rs_length;
|
||||
ch++;
|
||||
}
|
||||
|
||||
ret = svc_rdma_fastreg(xprt, frmr);
|
||||
if (ret)
|
||||
goto fatal_err;
|
||||
|
||||
return ch_no;
|
||||
|
||||
fatal_err:
|
||||
printk("svcrdma: error fast registering xdr for xprt %p", xprt);
|
||||
svc_rdma_put_frmr(xprt, frmr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt,
|
||||
struct svc_rdma_fastreg_mr *frmr,
|
||||
struct kvec *vec,
|
||||
u64 *sgl_offset,
|
||||
int count)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctxt->count = count;
|
||||
ctxt->direction = DMA_FROM_DEVICE;
|
||||
for (i = 0; i < count; i++) {
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
ctxt->sge[i].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
vec[i].iov_base, vec[i].iov_len,
|
||||
DMA_FROM_DEVICE);
|
||||
ctxt->sge[i].length = 0; /* in case map fails */
|
||||
if (!frmr) {
|
||||
ctxt->sge[i].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
vec[i].iov_base,
|
||||
vec[i].iov_len,
|
||||
DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr))
|
||||
return -EINVAL;
|
||||
ctxt->sge[i].lkey = xprt->sc_dma_lkey;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
} else {
|
||||
ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
|
||||
ctxt->sge[i].lkey = frmr->mr->lkey;
|
||||
}
|
||||
ctxt->sge[i].length = vec[i].iov_len;
|
||||
ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey;
|
||||
*sgl_offset = *sgl_offset + vec[i].iov_len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
|
||||
@@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *hdr_ctxt)
|
||||
{
|
||||
struct ib_send_wr read_wr;
|
||||
struct ib_send_wr inv_wr;
|
||||
int err = 0;
|
||||
int ch_no;
|
||||
int ch_count;
|
||||
@@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
||||
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
|
||||
if (ch_count > RPCSVC_MAXPAGES)
|
||||
return -EINVAL;
|
||||
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map,
|
||||
ch_count, byte_count);
|
||||
|
||||
if (!xprt->sc_frmr_pg_list_len)
|
||||
sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map, ch_count,
|
||||
byte_count);
|
||||
else
|
||||
sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
|
||||
rpl_map, chl_map, ch_count,
|
||||
byte_count);
|
||||
if (sge_count < 0) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sgl_offset = 0;
|
||||
ch_no = 0;
|
||||
|
||||
@@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
|
||||
next_sge:
|
||||
ctxt = svc_rdma_get_context(xprt);
|
||||
ctxt->direction = DMA_FROM_DEVICE;
|
||||
ctxt->frmr = hdr_ctxt->frmr;
|
||||
ctxt->read_hdr = NULL;
|
||||
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
|
||||
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
|
||||
/* Prepare READ WR */
|
||||
memset(&read_wr, 0, sizeof read_wr);
|
||||
ctxt->wr_op = IB_WR_RDMA_READ;
|
||||
read_wr.wr_id = (unsigned long)ctxt;
|
||||
read_wr.opcode = IB_WR_RDMA_READ;
|
||||
ctxt->wr_op = read_wr.opcode;
|
||||
read_wr.send_flags = IB_SEND_SIGNALED;
|
||||
read_wr.wr.rdma.rkey = ch->rc_target.rs_handle;
|
||||
read_wr.wr.rdma.remote_addr =
|
||||
@@ -327,10 +444,15 @@ next_sge:
|
||||
read_wr.sg_list = ctxt->sge;
|
||||
read_wr.num_sge =
|
||||
rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
|
||||
rdma_set_ctxt_sge(xprt, ctxt,
|
||||
&rpl_map->sge[chl_map->ch[ch_no].start],
|
||||
&sgl_offset,
|
||||
read_wr.num_sge);
|
||||
err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
|
||||
&rpl_map->sge[chl_map->ch[ch_no].start],
|
||||
&sgl_offset,
|
||||
read_wr.num_sge);
|
||||
if (err) {
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
goto out;
|
||||
}
|
||||
if (((ch+1)->rc_discrim == 0) &&
|
||||
(read_wr.num_sge == chl_map->ch[ch_no].count)) {
|
||||
/*
|
||||
@@ -339,6 +461,29 @@ next_sge:
|
||||
* the client and the RPC needs to be enqueued.
|
||||
*/
|
||||
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
|
||||
if (hdr_ctxt->frmr) {
|
||||
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
/*
|
||||
* Invalidate the local MR used to map the data
|
||||
* sink.
|
||||
*/
|
||||
if (xprt->sc_dev_caps &
|
||||
SVCRDMA_DEVCAP_READ_W_INV) {
|
||||
read_wr.opcode =
|
||||
IB_WR_RDMA_READ_WITH_INV;
|
||||
ctxt->wr_op = read_wr.opcode;
|
||||
read_wr.ex.invalidate_rkey =
|
||||
ctxt->frmr->mr->lkey;
|
||||
} else {
|
||||
/* Prepare INVALIDATE WR */
|
||||
memset(&inv_wr, 0, sizeof inv_wr);
|
||||
inv_wr.opcode = IB_WR_LOCAL_INV;
|
||||
inv_wr.send_flags = IB_SEND_SIGNALED;
|
||||
inv_wr.ex.invalidate_rkey =
|
||||
hdr_ctxt->frmr->mr->lkey;
|
||||
read_wr.next = &inv_wr;
|
||||
}
|
||||
}
|
||||
ctxt->read_hdr = hdr_ctxt;
|
||||
}
|
||||
/* Post the read */
|
||||
|
@@ -69,9 +69,127 @@
|
||||
* array is only concerned with the reply we are assured that we have
|
||||
* on extra page for the RPCRMDA header.
|
||||
*/
|
||||
static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
int fast_reg_xdr(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
{
|
||||
int sge_no;
|
||||
u32 sge_bytes;
|
||||
u32 page_bytes;
|
||||
u32 page_off;
|
||||
int page_no = 0;
|
||||
u8 *frva;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
frmr = svc_rdma_get_frmr(xprt);
|
||||
if (IS_ERR(frmr))
|
||||
return -ENOMEM;
|
||||
vec->frmr = frmr;
|
||||
|
||||
/* Skip the RPCRDMA header */
|
||||
sge_no = 1;
|
||||
|
||||
/* Map the head. */
|
||||
frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
|
||||
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
|
||||
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
|
||||
vec->count = 2;
|
||||
sge_no++;
|
||||
|
||||
/* Build the FRMR */
|
||||
frmr->kva = frva;
|
||||
frmr->direction = DMA_TO_DEVICE;
|
||||
frmr->access_flags = 0;
|
||||
frmr->map_len = PAGE_SIZE;
|
||||
frmr->page_list_len = 1;
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)xdr->head[0].iov_base,
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
|
||||
page_off = xdr->page_base;
|
||||
page_bytes = xdr->page_len + page_off;
|
||||
if (!page_bytes)
|
||||
goto encode_tail;
|
||||
|
||||
/* Map the pages */
|
||||
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
||||
vec->sge[sge_no].iov_len = page_bytes;
|
||||
sge_no++;
|
||||
while (page_bytes) {
|
||||
struct page *page;
|
||||
|
||||
page = xdr->pages[page_no++];
|
||||
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
|
||||
page_bytes -= sge_bytes;
|
||||
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
|
||||
PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
page_off = 0; /* reset for next time through loop */
|
||||
frmr->map_len += PAGE_SIZE;
|
||||
frmr->page_list_len++;
|
||||
}
|
||||
vec->count++;
|
||||
|
||||
encode_tail:
|
||||
/* Map tail */
|
||||
if (0 == xdr->tail[0].iov_len)
|
||||
goto done;
|
||||
|
||||
vec->count++;
|
||||
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
|
||||
|
||||
if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
|
||||
((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
|
||||
/*
|
||||
* If head and tail use the same page, we don't need
|
||||
* to map it again.
|
||||
*/
|
||||
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
|
||||
} else {
|
||||
void *va;
|
||||
|
||||
/* Map another page for the tail */
|
||||
page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
|
||||
va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
|
||||
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
|
||||
|
||||
frmr->page_list->page_list[page_no] =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
frmr->page_list->page_list[page_no]))
|
||||
goto fatal_err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
frmr->map_len += PAGE_SIZE;
|
||||
frmr->page_list_len++;
|
||||
}
|
||||
|
||||
done:
|
||||
if (svc_rdma_fastreg(xprt, frmr))
|
||||
goto fatal_err;
|
||||
|
||||
return 0;
|
||||
|
||||
fatal_err:
|
||||
printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
|
||||
svc_rdma_put_frmr(xprt, frmr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int map_xdr(struct svcxprt_rdma *xprt,
|
||||
struct xdr_buf *xdr,
|
||||
struct svc_rdma_req_map *vec)
|
||||
{
|
||||
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
|
||||
int sge_no;
|
||||
@@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
||||
BUG_ON(xdr->len !=
|
||||
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
|
||||
|
||||
if (xprt->sc_frmr_pg_list_len)
|
||||
return fast_reg_xdr(xprt, xdr, vec);
|
||||
|
||||
/* Skip the first sge, this is for the RPCRDMA header */
|
||||
sge_no = 1;
|
||||
|
||||
@@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt,
|
||||
|
||||
BUG_ON(sge_no > sge_max);
|
||||
vec->count = sge_no;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Assumptions:
|
||||
* - We are using FRMR
|
||||
* - or -
|
||||
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
|
||||
*/
|
||||
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
||||
@@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
|
||||
sge_no = 0;
|
||||
|
||||
/* Copy the remaining SGE */
|
||||
while (bc != 0 && xdr_sge_no < vec->count) {
|
||||
sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
|
||||
sge_bytes = min((size_t)bc,
|
||||
(size_t)(vec->sge[xdr_sge_no].iov_len-sge_off));
|
||||
while (bc != 0) {
|
||||
sge_bytes = min_t(size_t,
|
||||
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
|
||||
sge[sge_no].length = sge_bytes;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge[sge_no].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (dma_mapping_error(xprt->sc_cm_id->device->dma_device,
|
||||
sge[sge_no].addr))
|
||||
goto err;
|
||||
if (!vec->frmr) {
|
||||
sge[sge_no].addr =
|
||||
ib_dma_map_single(xprt->sc_cm_id->device,
|
||||
(void *)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
|
||||
sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge[sge_no].lkey = xprt->sc_dma_lkey;
|
||||
} else {
|
||||
sge[sge_no].addr = (unsigned long)
|
||||
vec->sge[xdr_sge_no].iov_base + sge_off;
|
||||
sge[sge_no].lkey = vec->frmr->mr->lkey;
|
||||
}
|
||||
ctxt->count++;
|
||||
ctxt->frmr = vec->frmr;
|
||||
sge_off = 0;
|
||||
sge_no++;
|
||||
ctxt->count++;
|
||||
xdr_sge_no++;
|
||||
BUG_ON(xdr_sge_no > vec->count);
|
||||
bc -= sge_bytes;
|
||||
}
|
||||
|
||||
BUG_ON(bc != 0);
|
||||
BUG_ON(xdr_sge_no > vec->count);
|
||||
|
||||
/* Prepare WRITE WR */
|
||||
memset(&write_wr, 0, sizeof write_wr);
|
||||
ctxt->wr_op = IB_WR_RDMA_WRITE;
|
||||
@@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
|
||||
res_ary = (struct rpcrdma_write_array *)
|
||||
&rdma_resp->rm_body.rm_chunks[1];
|
||||
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
if (vec->frmr)
|
||||
max_write = vec->frmr->map_len;
|
||||
else
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
|
||||
/* Write chunks start at the pagelist */
|
||||
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
|
||||
@@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
||||
res_ary = (struct rpcrdma_write_array *)
|
||||
&rdma_resp->rm_body.rm_chunks[2];
|
||||
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
if (vec->frmr)
|
||||
max_write = vec->frmr->map_len;
|
||||
else
|
||||
max_write = xprt->sc_max_sge * PAGE_SIZE;
|
||||
|
||||
/* xdr offset starts at RPC message */
|
||||
for (xdr_off = 0, chunk_no = 0;
|
||||
@@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
|
||||
ch = &arg_ary->wc_array[chunk_no].wc_target;
|
||||
write_len = min(xfer_len, ch->rs_length);
|
||||
|
||||
|
||||
/* Prepare the reply chunk given the length actually
|
||||
* written */
|
||||
rs_offset = get_unaligned(&(ch->rs_offset));
|
||||
@@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
int byte_count)
|
||||
{
|
||||
struct ib_send_wr send_wr;
|
||||
struct ib_send_wr inv_wr;
|
||||
int sge_no;
|
||||
int sge_bytes;
|
||||
int page_no;
|
||||
@@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
/* Prepare the context */
|
||||
ctxt->pages[0] = page;
|
||||
ctxt->count = 1;
|
||||
ctxt->frmr = vec->frmr;
|
||||
if (vec->frmr)
|
||||
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
else
|
||||
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
|
||||
|
||||
/* Prepare the SGE for the RPCRDMA Header */
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[0].addr =
|
||||
ib_dma_map_page(rdma->sc_cm_id->device,
|
||||
page, 0, PAGE_SIZE, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
|
||||
ctxt->direction = DMA_TO_DEVICE;
|
||||
|
||||
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
|
||||
ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey;
|
||||
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
|
||||
|
||||
/* Determine how many of our SGE are to be transmitted */
|
||||
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
|
||||
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
|
||||
byte_count -= sge_bytes;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[sge_no].addr =
|
||||
ib_dma_map_single(rdma->sc_cm_id->device,
|
||||
vec->sge[sge_no].iov_base,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (!vec->frmr) {
|
||||
ctxt->sge[sge_no].addr =
|
||||
ib_dma_map_single(rdma->sc_cm_id->device,
|
||||
vec->sge[sge_no].iov_base,
|
||||
sge_bytes, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
|
||||
ctxt->sge[sge_no].addr))
|
||||
goto err;
|
||||
atomic_inc(&rdma->sc_dma_used);
|
||||
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
|
||||
} else {
|
||||
ctxt->sge[sge_no].addr = (unsigned long)
|
||||
vec->sge[sge_no].iov_base;
|
||||
ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
|
||||
}
|
||||
ctxt->sge[sge_no].length = sge_bytes;
|
||||
ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey;
|
||||
}
|
||||
BUG_ON(byte_count != 0);
|
||||
|
||||
@@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
|
||||
ctxt->count++;
|
||||
rqstp->rq_respages[page_no] = NULL;
|
||||
/* If there are more pages than SGE, terminate SGE list */
|
||||
/*
|
||||
* If there are more pages than SGE, terminate SGE
|
||||
* list so that svc_rdma_unmap_dma doesn't attempt to
|
||||
* unmap garbage.
|
||||
*/
|
||||
if (page_no+1 >= sge_no)
|
||||
ctxt->sge[page_no+1].length = 0;
|
||||
}
|
||||
BUG_ON(sge_no > rdma->sc_max_sge);
|
||||
BUG_ON(sge_no > ctxt->count);
|
||||
memset(&send_wr, 0, sizeof send_wr);
|
||||
ctxt->wr_op = IB_WR_SEND;
|
||||
send_wr.wr_id = (unsigned long)ctxt;
|
||||
@@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma,
|
||||
send_wr.num_sge = sge_no;
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
send_wr.send_flags = IB_SEND_SIGNALED;
|
||||
if (vec->frmr) {
|
||||
/* Prepare INVALIDATE WR */
|
||||
memset(&inv_wr, 0, sizeof inv_wr);
|
||||
inv_wr.opcode = IB_WR_LOCAL_INV;
|
||||
inv_wr.send_flags = IB_SEND_SIGNALED;
|
||||
inv_wr.ex.invalidate_rkey =
|
||||
vec->frmr->mr->lkey;
|
||||
send_wr.next = &inv_wr;
|
||||
}
|
||||
|
||||
ret = svc_rdma_send(rdma, &send_wr);
|
||||
if (ret)
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
goto err;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
svc_rdma_put_frmr(rdma, vec->frmr);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
|
||||
@@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
ctxt = svc_rdma_get_context(rdma);
|
||||
ctxt->direction = DMA_TO_DEVICE;
|
||||
vec = svc_rdma_get_req_map();
|
||||
xdr_to_sge(rdma, &rqstp->rq_res, vec);
|
||||
|
||||
ret = map_xdr(rdma, &rqstp->rq_res, vec);
|
||||
if (ret)
|
||||
goto err0;
|
||||
inline_bytes = rqstp->rq_res.len;
|
||||
|
||||
/* Create the RDMA response header */
|
||||
@@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n",
|
||||
ret);
|
||||
goto error;
|
||||
goto err1;
|
||||
}
|
||||
inline_bytes -= ret;
|
||||
|
||||
@@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
if (ret < 0) {
|
||||
printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n",
|
||||
ret);
|
||||
goto error;
|
||||
goto err1;
|
||||
}
|
||||
inline_bytes -= ret;
|
||||
|
||||
@@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
|
||||
svc_rdma_put_req_map(vec);
|
||||
dprintk("svcrdma: send_reply returns %d\n", ret);
|
||||
return ret;
|
||||
error:
|
||||
|
||||
err1:
|
||||
put_page(res_page);
|
||||
err0:
|
||||
svc_rdma_put_req_map(vec);
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
put_page(res_page);
|
||||
return ret;
|
||||
}
|
||||
|
@@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
|
||||
ctxt->xprt = xprt;
|
||||
INIT_LIST_HEAD(&ctxt->dto_q);
|
||||
ctxt->count = 0;
|
||||
ctxt->frmr = NULL;
|
||||
atomic_inc(&xprt->sc_ctxt_used);
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
|
||||
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
struct svcxprt_rdma *xprt = ctxt->xprt;
|
||||
int i;
|
||||
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr,
|
||||
ctxt->sge[i].length,
|
||||
ctxt->direction);
|
||||
/*
|
||||
* Unmap the DMA addr in the SGE if the lkey matches
|
||||
* the sc_dma_lkey, otherwise, ignore it since it is
|
||||
* an FRMR lkey and will be unmapped later when the
|
||||
* last WR that uses it completes.
|
||||
*/
|
||||
if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(xprt->sc_cm_id->device,
|
||||
ctxt->sge[i].addr,
|
||||
ctxt->sge[i].length,
|
||||
ctxt->direction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
|
||||
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
|
||||
}
|
||||
map->count = 0;
|
||||
map->frmr = NULL;
|
||||
return map;
|
||||
}
|
||||
|
||||
@@ -315,6 +325,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
|
||||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Processs a completion context
|
||||
*/
|
||||
static void process_context(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_op_ctxt *ctxt)
|
||||
{
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
|
||||
switch (ctxt->wr_op) {
|
||||
case IB_WR_SEND:
|
||||
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
|
||||
svc_rdma_put_frmr(xprt, ctxt->frmr);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_WRITE:
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_READ:
|
||||
case IB_WR_RDMA_READ_WITH_INV:
|
||||
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
|
||||
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
|
||||
BUG_ON(!read_hdr);
|
||||
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
|
||||
svc_rdma_put_frmr(xprt, ctxt->frmr);
|
||||
spin_lock_bh(&xprt->sc_rq_dto_lock);
|
||||
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
|
||||
list_add_tail(&read_hdr->dto_q,
|
||||
&xprt->sc_read_complete_q);
|
||||
spin_unlock_bh(&xprt->sc_rq_dto_lock);
|
||||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "svcrdma: unexpected completion type, "
|
||||
"opcode=%d\n",
|
||||
ctxt->wr_op);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send Queue Completion Handler - potentially called on interrupt context.
|
||||
*
|
||||
@@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
|
||||
struct ib_cq *cq = xprt->sc_sq_cq;
|
||||
int ret;
|
||||
|
||||
|
||||
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
|
||||
return;
|
||||
|
||||
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
|
||||
atomic_inc(&rdma_stat_sq_poll);
|
||||
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
|
||||
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
|
||||
xprt = ctxt->xprt;
|
||||
|
||||
svc_rdma_unmap_dma(ctxt);
|
||||
if (wc.status != IB_WC_SUCCESS)
|
||||
/* Close the transport */
|
||||
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
|
||||
@@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
|
||||
atomic_dec(&xprt->sc_sq_count);
|
||||
wake_up(&xprt->sc_send_wait);
|
||||
|
||||
switch (ctxt->wr_op) {
|
||||
case IB_WR_SEND:
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
break;
|
||||
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
|
||||
if (ctxt)
|
||||
process_context(xprt, ctxt);
|
||||
|
||||
case IB_WR_RDMA_WRITE:
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
case IB_WR_RDMA_READ:
|
||||
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
|
||||
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
|
||||
BUG_ON(!read_hdr);
|
||||
spin_lock_bh(&xprt->sc_rq_dto_lock);
|
||||
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
|
||||
list_add_tail(&read_hdr->dto_q,
|
||||
&xprt->sc_read_complete_q);
|
||||
spin_unlock_bh(&xprt->sc_rq_dto_lock);
|
||||
svc_xprt_enqueue(&xprt->sc_xprt);
|
||||
}
|
||||
svc_rdma_put_context(ctxt, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "svcrdma: unexpected completion type, "
|
||||
"opcode=%d, status=%d\n",
|
||||
wc.opcode, wc.status);
|
||||
break;
|
||||
}
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
}
|
||||
|
||||
@@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
|
||||
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
|
||||
init_waitqueue_head(&cma_xprt->sc_send_wait);
|
||||
|
||||
spin_lock_init(&cma_xprt->sc_lock);
|
||||
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
|
||||
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
|
||||
|
||||
cma_xprt->sc_ord = svcrdma_ord;
|
||||
|
||||
@@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
||||
struct ib_recv_wr recv_wr, *bad_recv_wr;
|
||||
struct svc_rdma_op_ctxt *ctxt;
|
||||
struct page *page;
|
||||
unsigned long pa;
|
||||
dma_addr_t pa;
|
||||
int sge_no;
|
||||
int buflen;
|
||||
int ret;
|
||||
@@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
||||
BUG_ON(sge_no >= xprt->sc_max_sge);
|
||||
page = svc_rdma_get_page();
|
||||
ctxt->pages[sge_no] = page;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
pa = ib_dma_map_page(xprt->sc_cm_id->device,
|
||||
page, 0, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
|
||||
goto err_put_ctxt;
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
ctxt->sge[sge_no].addr = pa;
|
||||
ctxt->sge[sge_no].length = PAGE_SIZE;
|
||||
ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey;
|
||||
ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
|
||||
buflen += PAGE_SIZE;
|
||||
}
|
||||
ctxt->count = sge_no;
|
||||
@@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
}
|
||||
return ret;
|
||||
|
||||
err_put_ctxt:
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
|
||||
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
|
||||
"event=%d\n", cma_id, cma_id->context, event->event);
|
||||
handle_connect_req(cma_id,
|
||||
event->param.conn.responder_resources);
|
||||
event->param.conn.initiator_depth);
|
||||
break;
|
||||
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
@@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
|
||||
{
|
||||
struct ib_mr *mr;
|
||||
struct ib_fast_reg_page_list *pl;
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
|
||||
if (!frmr)
|
||||
goto err;
|
||||
|
||||
mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES);
|
||||
if (!mr)
|
||||
goto err_free_frmr;
|
||||
|
||||
pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
|
||||
RPCSVC_MAXPAGES);
|
||||
if (!pl)
|
||||
goto err_free_mr;
|
||||
|
||||
frmr->mr = mr;
|
||||
frmr->page_list = pl;
|
||||
INIT_LIST_HEAD(&frmr->frmr_list);
|
||||
return frmr;
|
||||
|
||||
err_free_mr:
|
||||
ib_dereg_mr(mr);
|
||||
err_free_frmr:
|
||||
kfree(frmr);
|
||||
err:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
|
||||
{
|
||||
struct svc_rdma_fastreg_mr *frmr;
|
||||
|
||||
while (!list_empty(&xprt->sc_frmr_q)) {
|
||||
frmr = list_entry(xprt->sc_frmr_q.next,
|
||||
struct svc_rdma_fastreg_mr, frmr_list);
|
||||
list_del_init(&frmr->frmr_list);
|
||||
ib_dereg_mr(frmr->mr);
|
||||
ib_free_fast_reg_page_list(frmr->page_list);
|
||||
kfree(frmr);
|
||||
}
|
||||
}
|
||||
|
||||
struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
|
||||
{
|
||||
struct svc_rdma_fastreg_mr *frmr = NULL;
|
||||
|
||||
spin_lock_bh(&rdma->sc_frmr_q_lock);
|
||||
if (!list_empty(&rdma->sc_frmr_q)) {
|
||||
frmr = list_entry(rdma->sc_frmr_q.next,
|
||||
struct svc_rdma_fastreg_mr, frmr_list);
|
||||
list_del_init(&frmr->frmr_list);
|
||||
frmr->map_len = 0;
|
||||
frmr->page_list_len = 0;
|
||||
}
|
||||
spin_unlock_bh(&rdma->sc_frmr_q_lock);
|
||||
if (frmr)
|
||||
return frmr;
|
||||
|
||||
return rdma_alloc_frmr(rdma);
|
||||
}
|
||||
|
||||
static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
int page_no;
|
||||
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
|
||||
dma_addr_t addr = frmr->page_list->page_list[page_no];
|
||||
if (ib_dma_mapping_error(frmr->mr->device, addr))
|
||||
continue;
|
||||
atomic_dec(&xprt->sc_dma_used);
|
||||
ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
|
||||
frmr->direction);
|
||||
}
|
||||
}
|
||||
|
||||
void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
if (frmr) {
|
||||
frmr_unmap_dma(rdma, frmr);
|
||||
spin_lock_bh(&rdma->sc_frmr_q_lock);
|
||||
BUG_ON(!list_empty(&frmr->frmr_list));
|
||||
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
|
||||
spin_unlock_bh(&rdma->sc_frmr_q_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the xpo_recvfrom function for listening endpoints. Its
|
||||
* purpose is to accept incoming connections. The CMA callback handler
|
||||
@@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
||||
struct rdma_conn_param conn_param;
|
||||
struct ib_qp_init_attr qp_attr;
|
||||
struct ib_device_attr devattr;
|
||||
int dma_mr_acc;
|
||||
int need_dma_mr;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
@@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
|
||||
}
|
||||
newxprt->sc_qp = newxprt->sc_cm_id->qp;
|
||||
|
||||
/* Register all of physical memory */
|
||||
newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd,
|
||||
IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE);
|
||||
if (IS_ERR(newxprt->sc_phys_mr)) {
|
||||
dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret);
|
||||
/*
|
||||
* Use the most secure set of MR resources based on the
|
||||
* transport type and available memory management features in
|
||||
* the device. Here's the table implemented below:
|
||||
*
|
||||
* Fast Global DMA Remote WR
|
||||
* Reg LKEY MR Access
|
||||
* Sup'd Sup'd Needed Needed
|
||||
*
|
||||
* IWARP N N Y Y
|
||||
* N Y Y Y
|
||||
* Y N Y N
|
||||
* Y Y N -
|
||||
*
|
||||
* IB N N Y N
|
||||
* N Y N -
|
||||
* Y N Y N
|
||||
* Y Y N -
|
||||
*
|
||||
* NB: iWARP requires remote write access for the data sink
|
||||
* of an RDMA_READ. IB does not.
|
||||
*/
|
||||
if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
|
||||
newxprt->sc_frmr_pg_list_len =
|
||||
devattr.max_fast_reg_page_list_len;
|
||||
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if a DMA MR is required and if so, what privs are required
|
||||
*/
|
||||
switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) {
|
||||
case RDMA_TRANSPORT_IWARP:
|
||||
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
|
||||
if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc =
|
||||
(IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE);
|
||||
} else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
|
||||
} else
|
||||
need_dma_mr = 0;
|
||||
break;
|
||||
case RDMA_TRANSPORT_IB:
|
||||
if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
|
||||
need_dma_mr = 1;
|
||||
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
|
||||
} else
|
||||
need_dma_mr = 0;
|
||||
break;
|
||||
default:
|
||||
goto errout;
|
||||
}
|
||||
|
||||
/* Create the DMA MR if needed, otherwise, use the DMA LKEY */
|
||||
if (need_dma_mr) {
|
||||
/* Register all of physical memory */
|
||||
newxprt->sc_phys_mr =
|
||||
ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
|
||||
if (IS_ERR(newxprt->sc_phys_mr)) {
|
||||
dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
|
||||
ret);
|
||||
goto errout;
|
||||
}
|
||||
newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
|
||||
} else
|
||||
newxprt->sc_dma_lkey =
|
||||
newxprt->sc_cm_id->device->local_dma_lkey;
|
||||
|
||||
/* Post receive buffers */
|
||||
for (i = 0; i < newxprt->sc_max_requests; i++) {
|
||||
ret = svc_rdma_post_recv(newxprt);
|
||||
@@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work)
|
||||
WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
|
||||
WARN_ON(atomic_read(&rdma->sc_dma_used) != 0);
|
||||
|
||||
/* De-allocate fastreg mr */
|
||||
rdma_dealloc_frmr_q(rdma);
|
||||
|
||||
/* Destroy the QP if present (not a listener) */
|
||||
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
|
||||
ib_destroy_qp(rdma->sc_qp);
|
||||
@@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to register the kvec representing the RPC memory with the
|
||||
* device.
|
||||
*
|
||||
* Returns:
|
||||
* NULL : The device does not support fastreg or there were no more
|
||||
* fastreg mr.
|
||||
* frmr : The kvec register request was successfully posted.
|
||||
* <0 : An error was encountered attempting to register the kvec.
|
||||
*/
|
||||
int svc_rdma_fastreg(struct svcxprt_rdma *xprt,
|
||||
struct svc_rdma_fastreg_mr *frmr)
|
||||
{
|
||||
struct ib_send_wr fastreg_wr;
|
||||
u8 key;
|
||||
|
||||
/* Bump the key */
|
||||
key = (u8)(frmr->mr->lkey & 0x000000FF);
|
||||
ib_update_fast_reg_key(frmr->mr, ++key);
|
||||
|
||||
/* Prepare FASTREG WR */
|
||||
memset(&fastreg_wr, 0, sizeof fastreg_wr);
|
||||
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
|
||||
fastreg_wr.send_flags = IB_SEND_SIGNALED;
|
||||
fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
|
||||
fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
|
||||
fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
|
||||
fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
|
||||
fastreg_wr.wr.fast_reg.length = frmr->map_len;
|
||||
fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
|
||||
fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
|
||||
return svc_rdma_send(xprt, &fastreg_wr);
|
||||
}
|
||||
|
||||
int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
|
||||
{
|
||||
struct ib_send_wr *bad_wr;
|
||||
struct ib_send_wr *bad_wr, *n_wr;
|
||||
int wr_count;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
|
||||
return -ENOTCONN;
|
||||
|
||||
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
|
||||
BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
|
||||
wr->opcode);
|
||||
wr_count = 1;
|
||||
for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
|
||||
wr_count++;
|
||||
|
||||
/* If the SQ is full, wait until an SQ entry is available */
|
||||
while (1) {
|
||||
spin_lock_bh(&xprt->sc_lock);
|
||||
if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
|
||||
if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) {
|
||||
spin_unlock_bh(&xprt->sc_lock);
|
||||
atomic_inc(&rdma_stat_sq_starve);
|
||||
|
||||
@@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
/* Bumped used SQ WR count and post */
|
||||
svc_xprt_get(&xprt->sc_xprt);
|
||||
/* Take a transport ref for each WR posted */
|
||||
for (i = 0; i < wr_count; i++)
|
||||
svc_xprt_get(&xprt->sc_xprt);
|
||||
|
||||
/* Bump used SQ WR count and post */
|
||||
atomic_add(wr_count, &xprt->sc_sq_count);
|
||||
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
|
||||
if (!ret)
|
||||
atomic_inc(&xprt->sc_sq_count);
|
||||
else {
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
if (ret) {
|
||||
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
|
||||
atomic_sub(wr_count, &xprt->sc_sq_count);
|
||||
for (i = 0; i < wr_count; i ++)
|
||||
svc_xprt_put(&xprt->sc_xprt);
|
||||
dprintk("svcrdma: failed to post SQ WR rc=%d, "
|
||||
"sc_sq_count=%d, sc_sq_depth=%d\n",
|
||||
ret, atomic_read(&xprt->sc_sq_count),
|
||||
xprt->sc_sq_depth);
|
||||
}
|
||||
spin_unlock_bh(&xprt->sc_lock);
|
||||
if (ret)
|
||||
wake_up(&xprt->sc_send_wait);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
@@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
|
||||
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
|
||||
|
||||
/* Prepare SGE for local address */
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
|
||||
p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
sge.lkey = xprt->sc_phys_mr->lkey;
|
||||
if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
|
||||
put_page(p);
|
||||
return;
|
||||
}
|
||||
atomic_inc(&xprt->sc_dma_used);
|
||||
sge.lkey = xprt->sc_dma_lkey;
|
||||
sge.length = length;
|
||||
|
||||
ctxt = svc_rdma_get_context(xprt);
|
||||
@@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
|
||||
if (ret) {
|
||||
dprintk("svcrdma: Error %d posting send for protocol error\n",
|
||||
ret);
|
||||
ib_dma_unmap_page(xprt->sc_cm_id->device,
|
||||
sge.addr, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
svc_rdma_put_context(ctxt, 1);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user