Merge tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs
Pull second round of 9p patches from Eric Van Hensbergen: "Several of these patches were rebased in order to correct style issues. Only stylistic changes were made versus the patches which were in linux-next for two weeks. The rebases have been in linux-next for 3 days and have passed my regressions. The bulk of these are RDMA fixes and improvements. There's also some additions on the extended attributes front to support some additional namespaces and a new option for TCP to force allocation of mount requests from a priviledged port" * tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs: fs/9p: Remove the unused variable "err" in v9fs_vfs_getattr() 9P: Add cancelled() to the transport functions. 9P/RDMA: count posted buffers without a pending request 9P/RDMA: Improve error handling in rdma_request 9P/RDMA: Do not free req->rc in error handling in rdma_request() 9P/RDMA: Use a semaphore to protect the RQ 9P/RDMA: Protect against duplicate replies 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB 9pnet: refactor struct p9_fcall alloc code 9P/RDMA: rdma_request() needs not allocate req->rc 9P: Fix fcall allocation for rdma fs/9p: xattr: add trusted and security namespaces net/9p: add privport option to 9p tcp transport
This commit is contained in:
@@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL
|
||||
If you don't know what Access Control Lists are, say N
|
||||
|
||||
endif
|
||||
|
||||
|
||||
config 9P_FS_SECURITY
|
||||
bool "9P Security Labels"
|
||||
depends on 9P_FS
|
||||
help
|
||||
Security labels support alternative access control models
|
||||
implemented by security modules like SELinux. This option
|
||||
enables an extended attribute handler for file security
|
||||
labels in the 9P filesystem.
|
||||
|
||||
If you are not using a security module that requires using
|
||||
extended attributes for file security labels, say N.
|
||||
|
@@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o
|
||||
v9fs.o \
|
||||
fid.o \
|
||||
xattr.o \
|
||||
xattr_user.o
|
||||
xattr_user.o \
|
||||
xattr_trusted.o
|
||||
|
||||
9p-$(CONFIG_9P_FSCACHE) += cache.o
|
||||
9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
|
||||
9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
|
||||
|
@@ -1054,13 +1054,11 @@ static int
|
||||
v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
struct v9fs_session_info *v9ses;
|
||||
struct p9_fid *fid;
|
||||
struct p9_wstat *st;
|
||||
|
||||
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
|
||||
err = -EPERM;
|
||||
v9ses = v9fs_dentry2v9ses(dentry);
|
||||
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
|
||||
generic_fillattr(dentry->d_inode, stat);
|
||||
|
@@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||
|
||||
const struct xattr_handler *v9fs_xattr_handlers[] = {
|
||||
&v9fs_xattr_user_handler,
|
||||
&v9fs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_9P_FS_POSIX_ACL
|
||||
&v9fs_xattr_acl_access_handler,
|
||||
&v9fs_xattr_acl_default_handler,
|
||||
#endif
|
||||
#ifdef CONFIG_9P_FS_SECURITY
|
||||
&v9fs_xattr_security_handler,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
@@ -20,6 +20,8 @@
|
||||
|
||||
extern const struct xattr_handler *v9fs_xattr_handlers[];
|
||||
extern struct xattr_handler v9fs_xattr_user_handler;
|
||||
extern struct xattr_handler v9fs_xattr_trusted_handler;
|
||||
extern struct xattr_handler v9fs_xattr_security_handler;
|
||||
extern const struct xattr_handler v9fs_xattr_acl_access_handler;
|
||||
extern const struct xattr_handler v9fs_xattr_acl_default_handler;
|
||||
|
||||
|
80
fs/9p/xattr_security.c
Normal file
80
fs/9p/xattr_security.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright IBM Corporation, 2010
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include "xattr.h"
|
||||
|
||||
static int v9fs_xattr_security_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
|
||||
memcpy(full_name+prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_get(dentry, full_name, buffer, size);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int v9fs_xattr_security_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
|
||||
memcpy(full_name + prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
struct xattr_handler v9fs_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.get = v9fs_xattr_security_get,
|
||||
.set = v9fs_xattr_security_set,
|
||||
};
|
80
fs/9p/xattr_trusted.c
Normal file
80
fs/9p/xattr_trusted.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright IBM Corporation, 2010
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include "xattr.h"
|
||||
|
||||
static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(full_name+prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_get(dentry, full_name, buffer, size);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(full_name + prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
struct xattr_handler v9fs_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.get = v9fs_xattr_trusted_get,
|
||||
.set = v9fs_xattr_trusted_set,
|
||||
};
|
@@ -26,6 +26,9 @@
|
||||
#ifndef NET_9P_TRANSPORT_H
|
||||
#define NET_9P_TRANSPORT_H
|
||||
|
||||
#define P9_DEF_MIN_RESVPORT (665U)
|
||||
#define P9_DEF_MAX_RESVPORT (1023U)
|
||||
|
||||
/**
|
||||
* struct p9_trans_module - transport module interface
|
||||
* @list: used to maintain a list of currently available transports
|
||||
@@ -37,6 +40,8 @@
|
||||
* @close: member function to discard a connection on this transport
|
||||
* @request: member function to issue a request to the transport
|
||||
* @cancel: member function to cancel a request (if it hasn't been sent)
|
||||
* @cancelled: member function to notify that a cancelled request will not
|
||||
* not receive a reply
|
||||
*
|
||||
* This is the basic API for a transport module which is registered by the
|
||||
* transport module with the 9P core network module and used by the client
|
||||
@@ -55,6 +60,7 @@ struct p9_trans_module {
|
||||
void (*close) (struct p9_client *);
|
||||
int (*request) (struct p9_client *, struct p9_req_t *req);
|
||||
int (*cancel) (struct p9_client *, struct p9_req_t *req);
|
||||
int (*cancelled)(struct p9_client *, struct p9_req_t *req);
|
||||
int (*zc_request)(struct p9_client *, struct p9_req_t *,
|
||||
char *, char *, int , int, int, int);
|
||||
};
|
||||
|
@@ -204,6 +204,17 @@ free_and_return:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct p9_fcall *p9_fcall_alloc(int alloc_msize)
|
||||
{
|
||||
struct p9_fcall *fc;
|
||||
fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
|
||||
if (!fc)
|
||||
return NULL;
|
||||
fc->capacity = alloc_msize;
|
||||
fc->sdata = (char *) fc + sizeof(struct p9_fcall);
|
||||
return fc;
|
||||
}
|
||||
|
||||
/**
|
||||
* p9_tag_alloc - lookup/allocate a request by tag
|
||||
* @c: client session to lookup tag within
|
||||
@@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
|
||||
col = tag % P9_ROW_MAXTAG;
|
||||
|
||||
req = &c->reqs[row][col];
|
||||
if (!req->tc) {
|
||||
if (!req->wq) {
|
||||
req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
|
||||
if (!req->wq) {
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
if (!req->wq)
|
||||
goto grow_failed;
|
||||
init_waitqueue_head(req->wq);
|
||||
req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
|
||||
GFP_NOFS);
|
||||
req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
|
||||
GFP_NOFS);
|
||||
if ((!req->tc) || (!req->rc)) {
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
kfree(req->tc);
|
||||
kfree(req->rc);
|
||||
kfree(req->wq);
|
||||
req->tc = req->rc = NULL;
|
||||
req->wq = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
req->tc->capacity = alloc_msize;
|
||||
req->rc->capacity = alloc_msize;
|
||||
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
|
||||
req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
|
||||
}
|
||||
|
||||
if (!req->tc)
|
||||
req->tc = p9_fcall_alloc(alloc_msize);
|
||||
if (!req->rc)
|
||||
req->rc = p9_fcall_alloc(alloc_msize);
|
||||
if (!req->tc || !req->rc)
|
||||
goto grow_failed;
|
||||
|
||||
p9pdu_reset(req->tc);
|
||||
p9pdu_reset(req->rc);
|
||||
|
||||
req->tc->tag = tag-1;
|
||||
req->status = REQ_STATUS_ALLOC;
|
||||
|
||||
return &c->reqs[row][col];
|
||||
return req;
|
||||
|
||||
grow_failed:
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
kfree(req->tc);
|
||||
kfree(req->rc);
|
||||
kfree(req->wq);
|
||||
req->tc = req->rc = NULL;
|
||||
req->wq = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
|
||||
return PTR_ERR(req);
|
||||
|
||||
|
||||
/* if we haven't received a response for oldreq,
|
||||
remove it from the list. */
|
||||
/*
|
||||
* if we haven't received a response for oldreq,
|
||||
* remove it from the list, and notify the transport
|
||||
* layer that the reply will never arrive.
|
||||
*/
|
||||
spin_lock(&c->lock);
|
||||
if (oldreq->status == REQ_STATUS_FLSH)
|
||||
if (oldreq->status == REQ_STATUS_FLSH) {
|
||||
list_del(&oldreq->req_list);
|
||||
spin_unlock(&c->lock);
|
||||
spin_unlock(&c->lock);
|
||||
if (c->trans_mod->cancelled)
|
||||
c->trans_mod->cancelled(c, req);
|
||||
} else {
|
||||
spin_unlock(&c->lock);
|
||||
}
|
||||
|
||||
p9_free_req(c, req);
|
||||
return 0;
|
||||
|
@@ -63,6 +63,7 @@ struct p9_fd_opts {
|
||||
int rfd;
|
||||
int wfd;
|
||||
u16 port;
|
||||
int privport;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -87,12 +88,15 @@ struct p9_trans_fd {
|
||||
enum {
|
||||
/* Options that take integer arguments */
|
||||
Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
|
||||
/* Options that take no arguments */
|
||||
Opt_privport,
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
{Opt_port, "port=%u"},
|
||||
{Opt_rfdno, "rfdno=%u"},
|
||||
{Opt_wfdno, "wfdno=%u"},
|
||||
{Opt_privport, "privport"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
@@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
|
||||
static LIST_HEAD(p9_poll_pending_list);
|
||||
static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
|
||||
|
||||
static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
|
||||
static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
|
||||
|
||||
static void p9_mux_poll_stop(struct p9_conn *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
|
||||
if (!*p)
|
||||
continue;
|
||||
token = match_token(p, tokens, args);
|
||||
if (token != Opt_err) {
|
||||
if ((token != Opt_err) && (token != Opt_privport)) {
|
||||
r = match_int(&args[0], &option);
|
||||
if (r < 0) {
|
||||
p9_debug(P9_DEBUG_ERROR,
|
||||
@@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
|
||||
case Opt_wfdno:
|
||||
opts->wfd = option;
|
||||
break;
|
||||
case Opt_privport:
|
||||
opts->privport = 1;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
@@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int p9_bind_privport(struct socket *sock)
|
||||
{
|
||||
struct sockaddr_in cl;
|
||||
int port, err = -EINVAL;
|
||||
|
||||
memset(&cl, 0, sizeof(cl));
|
||||
cl.sin_family = AF_INET;
|
||||
cl.sin_addr.s_addr = INADDR_ANY;
|
||||
for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
|
||||
cl.sin_port = htons((ushort)port);
|
||||
err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
|
||||
if (err != -EADDRINUSE)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
|
||||
{
|
||||
@@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (opts.privport) {
|
||||
err = p9_bind_privport(csocket);
|
||||
if (err < 0) {
|
||||
pr_err("%s (%d): problem binding to privport\n",
|
||||
__func__, task_pid_nr(current));
|
||||
sock_release(csocket);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = csocket->ops->connect(csocket,
|
||||
(struct sockaddr *)&sin_server,
|
||||
sizeof(struct sockaddr_in), 0);
|
||||
|
@@ -57,9 +57,7 @@
|
||||
#define P9_RDMA_IRD 0
|
||||
#define P9_RDMA_ORD 0
|
||||
#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
|
||||
#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
|
||||
* safely advertise a maxsize
|
||||
* of 64k */
|
||||
#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
|
||||
|
||||
/**
|
||||
* struct p9_trans_rdma - RDMA transport instance
|
||||
@@ -75,7 +73,9 @@
|
||||
* @sq_depth: The depth of the Send Queue
|
||||
* @sq_sem: Semaphore for the SQ
|
||||
* @rq_depth: The depth of the Receive Queue.
|
||||
* @rq_count: Count of requests in the Receive Queue.
|
||||
* @rq_sem: Semaphore for the RQ
|
||||
* @excess_rc : Amount of posted Receive Contexts without a pending request.
|
||||
* See rdma_request()
|
||||
* @addr: The remote peer's address
|
||||
* @req_lock: Protects the active request list
|
||||
* @cm_done: Completion event for connection management tracking
|
||||
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
|
||||
int sq_depth;
|
||||
struct semaphore sq_sem;
|
||||
int rq_depth;
|
||||
atomic_t rq_count;
|
||||
struct semaphore rq_sem;
|
||||
atomic_t excess_rc;
|
||||
struct sockaddr_in addr;
|
||||
spinlock_t req_lock;
|
||||
|
||||
@@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
|
||||
if (!req)
|
||||
goto err_out;
|
||||
|
||||
/* Check that we have not yet received a reply for this request.
|
||||
*/
|
||||
if (unlikely(req->rc)) {
|
||||
pr_err("Duplicate reply for request %d", tag);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
req->rc = c->rc;
|
||||
req->status = REQ_STATUS_RCVD;
|
||||
p9_client_cb(client, req);
|
||||
@@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
|
||||
|
||||
switch (c->wc_op) {
|
||||
case IB_WC_RECV:
|
||||
atomic_dec(&rdma->rq_count);
|
||||
handle_recv(client, rdma, c, wc.status, wc.byte_len);
|
||||
up(&rdma->rq_sem);
|
||||
break;
|
||||
|
||||
case IB_WC_SEND:
|
||||
@@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
||||
struct p9_rdma_context *c = NULL;
|
||||
struct p9_rdma_context *rpl_context = NULL;
|
||||
|
||||
/* When an error occurs between posting the recv and the send,
|
||||
* there will be a receive context posted without a pending request.
|
||||
* Since there is no way to "un-post" it, we remember it and skip
|
||||
* post_recv() for the next request.
|
||||
* So here,
|
||||
* see if we are this `next request' and need to absorb an excess rc.
|
||||
* If yes, then drop and free our own, and do not recv_post().
|
||||
**/
|
||||
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
|
||||
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
|
||||
/* Got one ! */
|
||||
kfree(req->rc);
|
||||
req->rc = NULL;
|
||||
goto dont_need_post_recv;
|
||||
} else {
|
||||
/* We raced and lost. */
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate an fcall for the reply */
|
||||
rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
|
||||
if (!rpl_context) {
|
||||
err = -ENOMEM;
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the request has a buffer, steal it, otherwise
|
||||
* allocate a new one. Typically, requests should already
|
||||
* have receive buffers allocated and just swap them around
|
||||
*/
|
||||
if (!req->rc) {
|
||||
req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
|
||||
GFP_NOFS);
|
||||
if (req->rc) {
|
||||
req->rc->sdata = (char *) req->rc +
|
||||
sizeof(struct p9_fcall);
|
||||
req->rc->capacity = client->msize;
|
||||
}
|
||||
goto recv_error;
|
||||
}
|
||||
rpl_context->rc = req->rc;
|
||||
if (!rpl_context->rc) {
|
||||
err = -ENOMEM;
|
||||
goto err_free2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Post a receive buffer for this request. We need to ensure
|
||||
@@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
||||
* outstanding request, so we must keep a count to avoid
|
||||
* overflowing the RQ.
|
||||
*/
|
||||
if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
|
||||
err = post_recv(client, rpl_context);
|
||||
if (err)
|
||||
goto err_free1;
|
||||
} else
|
||||
atomic_dec(&rdma->rq_count);
|
||||
if (down_interruptible(&rdma->rq_sem)) {
|
||||
err = -EINTR;
|
||||
goto recv_error;
|
||||
}
|
||||
|
||||
err = post_recv(client, rpl_context);
|
||||
if (err) {
|
||||
p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
|
||||
goto recv_error;
|
||||
}
|
||||
/* remove posted receive buffer from request structure */
|
||||
req->rc = NULL;
|
||||
|
||||
dont_need_post_recv:
|
||||
/* Post the request */
|
||||
c = kmalloc(sizeof *c, GFP_NOFS);
|
||||
if (!c) {
|
||||
err = -ENOMEM;
|
||||
goto err_free1;
|
||||
goto send_error;
|
||||
}
|
||||
c->req = req;
|
||||
|
||||
c->busa = ib_dma_map_single(rdma->cm_id->device,
|
||||
c->req->tc->sdata, c->req->tc->size,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
|
||||
goto error;
|
||||
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
|
||||
err = -EIO;
|
||||
goto send_error;
|
||||
}
|
||||
|
||||
sge.addr = c->busa;
|
||||
sge.length = c->req->tc->size;
|
||||
@@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
||||
wr.sg_list = &sge;
|
||||
wr.num_sge = 1;
|
||||
|
||||
if (down_interruptible(&rdma->sq_sem))
|
||||
goto error;
|
||||
if (down_interruptible(&rdma->sq_sem)) {
|
||||
err = -EINTR;
|
||||
goto send_error;
|
||||
}
|
||||
|
||||
return ib_post_send(rdma->qp, &wr, &bad_wr);
|
||||
err = ib_post_send(rdma->qp, &wr, &bad_wr);
|
||||
if (err)
|
||||
goto send_error;
|
||||
|
||||
error:
|
||||
/* Success */
|
||||
return 0;
|
||||
|
||||
/* Handle errors that happened during or while preparing the send: */
|
||||
send_error:
|
||||
kfree(c);
|
||||
kfree(rpl_context->rc);
|
||||
p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
|
||||
|
||||
/* Ach.
|
||||
* We did recv_post(), but not send. We have one recv_post in excess.
|
||||
*/
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
return err;
|
||||
|
||||
/* Handle errors that happened during or while preparing post_recv(): */
|
||||
recv_error:
|
||||
kfree(rpl_context);
|
||||
p9_debug(P9_DEBUG_ERROR, "EIO\n");
|
||||
return -EIO;
|
||||
err_free1:
|
||||
kfree(rpl_context->rc);
|
||||
err_free2:
|
||||
kfree(rpl_context);
|
||||
err_close:
|
||||
spin_lock_irqsave(&rdma->req_lock, flags);
|
||||
if (rdma->state < P9_RDMA_CLOSING) {
|
||||
rdma->state = P9_RDMA_CLOSING;
|
||||
@@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
|
||||
spin_lock_init(&rdma->req_lock);
|
||||
init_completion(&rdma->cm_done);
|
||||
sema_init(&rdma->sq_sem, rdma->sq_depth);
|
||||
atomic_set(&rdma->rq_count, 0);
|
||||
sema_init(&rdma->rq_sem, rdma->rq_depth);
|
||||
atomic_set(&rdma->excess_rc, 0);
|
||||
|
||||
return rdma;
|
||||
}
|
||||
@@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* A request has been fully flushed without a reply.
|
||||
* That means we have posted one buffer in excess.
|
||||
*/
|
||||
static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
|
||||
{
|
||||
struct p9_trans_rdma *rdma = client->trans;
|
||||
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* trans_create_rdma - Transport method for creating atransport instance
|
||||
* @client: client instance
|
||||
|
Reference in New Issue
Block a user