From 5d47a35600270e7115061cb1320ee60ae9bcb6b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Feb 2008 17:24:07 -0500 Subject: [PATCH 1/2] NFS: Fix a potential file corruption issue when writing If the inode is flagged as having an invalid mapping, then we can't rely on the PageUptodate() flag. Ensure that we don't use the "anti-fragmentation" write optimisation in nfs_updatepage(), since that will cause NFS to write out areas of the page that are no longer guaranteed to be up to date. A potential corruption could occur in the following scenario: client 1 client 2 =============== =============== fd=open("f",O_CREAT|O_WRONLY,0644); write(fd,"fubar\n",6); // cache last page close(fd); fd=open("f",O_WRONLY|O_APPEND); write(fd,"foo\n",4); close(fd); fd=open("f",O_WRONLY|O_APPEND); write(fd,"bar\n",4); close(fd); ----- The bug may lead to the file "f" reading 'fubar\n\0\0\0\nbar\n' because client 2 does not update the cached page after re-opening the file for write. Instead it keeps it marked as PageUptodate() until someone calls invaldate_inode_pages2() (typically by calling read()). Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b144b1957dd9..f55c437124a2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -696,6 +696,17 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return status; } +/* + * If the page cache is marked as unsafe or invalid, then we can't rely on + * the PageUptodate() flag. In this case, we will need to turn off + * write optimisations that depend on the page contents being correct. + */ +static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +{ + return PageUptodate(page) && + !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); +} + /* * Update and possibly write a cached page of an NFS file. * @@ -717,10 +728,13 @@ int nfs_updatepage(struct file *file, struct page *page, (long long)(page_offset(page) +offset)); /* If we're not using byte range locks, and we know the page - * is entirely in cache, it may be more efficient to avoid - * fragmenting write requests. + * is up to date, it may be more efficient to extend the write + * to cover the entire page in order to avoid fragmentation + * inefficiencies. */ - if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { + if (nfs_write_pageuptodate(page, inode) && + inode->i_flock == NULL && + !(file->f_mode & O_SYNC)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } From 3211e4eb5834924dd5beac8956c0bc0bfb755c37 Mon Sep 17 00:00:00 2001 From: James Lentini Date: Mon, 28 Jan 2008 12:09:28 -0500 Subject: [PATCH 2/2] SUNRPC xptrdma: simplify build configuration Trond and Bruce, This is a patch for 2.6.25. This is the same version that was sent out on December 12 for review (no comments to date). To simplify the RPC/RDMA client and server build configuration, make SUNRPC_XPRT_RDMA a hidden config option that continues to depend on SUNRPC and INFINIBAND. The value of SUNRPC_XPRT_RDMA will be: - N if either SUNRPC or INFINIBAND are N - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M - Y if both SUNRPC and INFINIBAND are Y In 2.6.25, all of the RPC/RDMA related files are grouped in net/sunrpc/xprtrdma and the net/sunrpc/xprtrdma/Makefile builds both the client and server RPC/RDMA support using this config option. Signed-off-by: James Lentini Signed-off-by: Trond Myklebust --- fs/Kconfig | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 3bf6ace1720c..d7312825592b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1778,12 +1778,9 @@ config SUNRPC_GSS tristate config SUNRPC_XPRT_RDMA - tristate "RDMA transport for sunrpc (EXPERIMENTAL)" + tristate depends on SUNRPC && INFINIBAND && EXPERIMENTAL - default m - help - Adds a client RPC transport for supporting kernel NFS over RDMA - mounts, including Infiniband and iWARP. Experimental. + default SUNRPC && INFINIBAND config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"