Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: xfs: stop using the page cache to back the buffer cache xfs: register the inode cache shrinker before quotachecks xfs: xfs_trans_read_buf() should return an error on failure xfs: introduce inode cluster buffer trylocks for xfs_iflush vmap: flush vmap aliases when mapping fails xfs: preallocation transactions do not need to be synchronous Fix up trivial conflicts in fs/xfs/linux-2.6/xfs_buf.c due to plug removal.
2011-03-28 15:51:02 -07:00
parent 243b422af9 0e6e847ffe
commit c5850150d0
11 changed files with 160 additions and 315 deletions
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -93,75 +93,6 @@ xfs_buf_vmap_len(
 	return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
 }
 /*
 *	Page Region interfaces.
 *
 *	For pages in filesystems where the blocksize is smaller than the
 *	pagesize, we use the page->private field (long) to hold a bitmap
 * 	of uptodate regions within the page.
 *
 *	Each such region is "bytes per page / bits per long" bytes long.
 *
 *	NBPPR == number-of-bytes-per-page-region
 *	BTOPR == bytes-to-page-region (rounded up)
 *	BTOPRT == bytes-to-page-region-truncated (rounded down)
 */
 #if (BITS_PER_LONG == 32)
 #define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */
 #elif (BITS_PER_LONG == 64)
 #define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */
 #else
 #error BITS_PER_LONG must be 32 or 64
 #endif
 #define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)
 #define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
 #define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))
 STATIC unsigned long
 page_region_mask(
 	size_t		offset,
 	size_t		length)
 {
 	unsigned long	mask;
 	int		first, final;
 	first = BTOPR(offset);
 	final = BTOPRT(offset + length - 1);
 	first = min(first, final);
 	mask = ~0UL;
 	mask <<= BITS_PER_LONG - (final - first);
 	mask >>= BITS_PER_LONG - (final);
 	ASSERT(offset + length <= PAGE_CACHE_SIZE);
 	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
 	return mask;
 }
 STATIC void
 set_page_region(
 	struct page	*page,
 	size_t		offset,
 	size_t		length)
 {
 	set_page_private(page,
 		page_private(page) | page_region_mask(offset, length));
 	if (page_private(page) == ~0UL)
 		SetPageUptodate(page);
 }
 STATIC int
 test_page_region(
 	struct page	*page,
 	size_t		offset,
 	size_t		length)
 {
 	unsigned long	mask = page_region_mask(offset, length);
 	return (mask && (page_private(page) & mask) == mask);
 }
 /*
 * xfs_buf_lru_add - add a buffer to the LRU.
 *
@@ -332,7 +263,7 @@ xfs_buf_free(
 	ASSERT(list_empty(&bp->b_lru));
-	if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
+	if (bp->b_flags & _XBF_PAGES) {
 		uint		i;
 		if (xfs_buf_is_vmapped(bp))
@@ -342,25 +273,22 @@ xfs_buf_free(
 		for (i = 0; i < bp->b_page_count; i++) {
 			struct page	*page = bp->b_pages[i];
-			if (bp->b_flags & _XBF_PAGE_CACHE)
+			__free_page(page);
 				ASSERT(!PagePrivate(page));
 			page_cache_release(page);
 		}
 		}
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
 	_xfs_buf_free_pages(bp);
 	xfs_buf_deallocate(bp);
 }
 /*
- *	Finds all pages for buffer in question and builds it's page list.
+ * Allocates all the pages for buffer in question and builds it's page list.
 */
 STATIC int
-_xfs_buf_lookup_pages(
+xfs_buf_allocate_memory(
 	xfs_buf_t		*bp,
 	uint			flags)
 {
 	struct address_space	*mapping = bp->b_target->bt_mapping;
 	size_t			blocksize = bp->b_target->bt_bsize;
 	size_t			size = bp->b_count_desired;
 	size_t			nbytes, offset;
 	gfp_t			gfp_mask = xb_to_gfp(flags);
@@ -369,29 +297,55 @@ _xfs_buf_lookup_pages(
 	xfs_off_t		end;
 	int			error;
 	/*
 	 * for buffers that are contained within a single page, just allocate
 	 * the memory from the heap - there's no need for the complexity of
 	 * page arrays to keep allocation down to order 0.
 	 */
 	if (bp->b_buffer_length < PAGE_SIZE) {
 		bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
 		if (!bp->b_addr) {
 			/* low memory - use alloc_page loop instead */
 			goto use_alloc_page;
 		}
 		if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
 								PAGE_MASK) !=
 		    ((unsigned long)bp->b_addr & PAGE_MASK)) {
 			/* b_addr spans two pages - use alloc_page instead */
 			kmem_free(bp->b_addr);
 			bp->b_addr = NULL;
 			goto use_alloc_page;
 		}
 		bp->b_offset = offset_in_page(bp->b_addr);
 		bp->b_pages = bp->b_page_array;
 		bp->b_pages[0] = virt_to_page(bp->b_addr);
 		bp->b_page_count = 1;
 		bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
 		return 0;
 	}
 use_alloc_page:
 	end = bp->b_file_offset + bp->b_buffer_length;
 	page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
 	error = _xfs_buf_get_pages(bp, page_count, flags);
 	if (unlikely(error))
 		return error;
 	bp->b_flags |= _XBF_PAGE_CACHE;
 	offset = bp->b_offset;
-	first = bp->b_file_offset >> PAGE_CACHE_SHIFT;
+	first = bp->b_file_offset >> PAGE_SHIFT;
 	bp->b_flags |= _XBF_PAGES;
 	for (i = 0; i < bp->b_page_count; i++) {
 		struct page	*page;
 		uint		retries = 0;
-
+retry:
-	      retry:
+		page = alloc_page(gfp_mask);
 		page = find_or_create_page(mapping, first + i, gfp_mask);
 		if (unlikely(page == NULL)) {
 			if (flags & XBF_READ_AHEAD) {
 				bp->b_page_count = i;
-				for (i = 0; i < bp->b_page_count; i++)
+				error = ENOMEM;
-					unlock_page(bp->b_pages[i]);
+				goto out_free_pages;
 				return -ENOMEM;
 			}
 			/*
@@ -412,33 +366,16 @@ _xfs_buf_lookup_pages(
 		XFS_STATS_INC(xb_page_found);
-		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
+		nbytes = min_t(size_t, size, PAGE_SIZE - offset);
 		size -= nbytes;
 		ASSERT(!PagePrivate(page));
 		if (!PageUptodate(page)) {
 			page_count--;
 			if (blocksize >= PAGE_CACHE_SIZE) {
 				if (flags & XBF_READ)
 					bp->b_flags |= _XBF_PAGE_LOCKED;
 			} else if (!PagePrivate(page)) {
 				if (test_page_region(page, offset, nbytes))
 					page_count++;
 			}
 		}
 		bp->b_pages[i] = page;
 		offset = 0;
 	}
 	return 0;
-	if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
+out_free_pages:
 	for (i = 0; i < bp->b_page_count; i++)
-			unlock_page(bp->b_pages[i]);
+		__free_page(bp->b_pages[i]);
 	}
 	if (page_count == bp->b_page_count)
 		bp->b_flags |= XBF_DONE;
 	return error;
 }
@@ -450,14 +387,23 @@ _xfs_buf_map_pages(
 	xfs_buf_t		*bp,
 	uint			flags)
 {
-	/* A single page buffer is always mappable */
+	ASSERT(bp->b_flags & _XBF_PAGES);
 	if (bp->b_page_count == 1) {
 		/* A single page buffer is always mappable */
 		bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
 		bp->b_flags |= XBF_MAPPED;
 	} else if (flags & XBF_MAPPED) {
 		int retried = 0;
 		do {
 			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
 						-1, PAGE_KERNEL);
-		if (unlikely(bp->b_addr == NULL))
+			if (bp->b_addr)
 				break;
 			vm_unmap_aliases();
 		} while (retried++ <= 1);
 		if (!bp->b_addr)
 			return -ENOMEM;
 		bp->b_addr += bp->b_offset;
 		bp->b_flags |= XBF_MAPPED;
@@ -568,9 +514,14 @@ found:
 		}
 	}
 	/*
 	 * if the buffer is stale, clear all the external state associated with
 	 * it. We need to keep flags such as how we allocated the buffer memory
 	 * intact here.
 	 */
 	if (bp->b_flags & XBF_STALE) {
 		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-		bp->b_flags &= XBF_MAPPED;
+		bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
 	}
 	trace_xfs_buf_find(bp, flags, _RET_IP_);
@@ -591,7 +542,7 @@ xfs_buf_get(
 	xfs_buf_flags_t		flags)
 {
 	xfs_buf_t		*bp, *new_bp;
-	int			error = 0, i;
+	int			error = 0;
 	new_bp = xfs_buf_allocate(flags);
 	if (unlikely(!new_bp))
@@ -599,7 +550,7 @@ xfs_buf_get(
 	bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
 	if (bp == new_bp) {
-		error = _xfs_buf_lookup_pages(bp, flags);
+		error = xfs_buf_allocate_memory(bp, flags);
 		if (error)
 			goto no_buffer;
 	} else {
@@ -608,9 +559,6 @@ xfs_buf_get(
 			return NULL;
 	}
 	for (i = 0; i < bp->b_page_count; i++)
 		mark_page_accessed(bp->b_pages[i]);
 	if (!(bp->b_flags & XBF_MAPPED)) {
 		error = _xfs_buf_map_pages(bp, flags);
 		if (unlikely(error)) {
@@ -711,8 +659,7 @@ xfs_buf_readahead(
 {
 	struct backing_dev_info *bdi;
-	bdi = target->bt_mapping->backing_dev_info;
+	if (bdi_read_congested(target->bt_bdi))
 	if (bdi_read_congested(bdi))
 		return;
 	xfs_buf_read(target, ioff, isize,
@@ -790,10 +737,10 @@ xfs_buf_associate_memory(
 	size_t			buflen;
 	int			page_count;
-	pageaddr = (unsigned long)mem & PAGE_CACHE_MASK;
+	pageaddr = (unsigned long)mem & PAGE_MASK;
 	offset = (unsigned long)mem - pageaddr;
-	buflen = PAGE_CACHE_ALIGN(len + offset);
+	buflen = PAGE_ALIGN(len + offset);
-	page_count = buflen >> PAGE_CACHE_SHIFT;
+	page_count = buflen >> PAGE_SHIFT;
 	/* Free any previous set of page pointers */
 	if (bp->b_pages)
@@ -810,13 +757,12 @@ xfs_buf_associate_memory(
 	for (i = 0; i < bp->b_page_count; i++) {
 		bp->b_pages[i] = mem_to_page((void *)pageaddr);
-		pageaddr += PAGE_CACHE_SIZE;
+		pageaddr += PAGE_SIZE;
 	}
 	bp->b_count_desired = len;
 	bp->b_buffer_length = buflen;
 	bp->b_flags |= XBF_MAPPED;
 	bp->b_flags &= ~_XBF_PAGE_LOCKED;
 	return 0;
 }
@@ -923,20 +869,7 @@ xfs_buf_rele(
 /*
- *	Mutual exclusion on buffers.  Locking model:
+ *	Lock a buffer object, if it is not already locked.
 *
 *	Buffers associated with inodes for which buffer locking
 *	is not enabled are not protected by semaphores, and are
 *	assumed to be exclusively owned by the caller.  There is a
 *	spinlock in the buffer, used by the caller when concurrent
 *	access is possible.
 */
 /*
 *	Locks a buffer object, if it is not already locked.  Note that this in
 *	no way locks the underlying pages, so it is only useful for
 *	synchronizing concurrent use of buffer objects, not for synchronizing
 *	independent access to the underlying pages.
 *
 *	If we come across a stale, pinned, locked buffer, we know that we are
 *	being asked to lock a buffer that has been reallocated. Because it is
@@ -970,10 +903,7 @@ xfs_buf_lock_value(
 }
 /*
- *	Locks a buffer object.
+ *	Lock a buffer object.
 *	Note that this in no way locks the underlying pages, so it is only
 *	useful for synchronizing concurrent use of buffer objects, not for
 *	synchronizing independent access to the underlying pages.
 *
 *	If we come across a stale, pinned, locked buffer, we know that we
 *	are being asked to lock a buffer that has been reallocated. Because
@@ -1246,10 +1176,8 @@ _xfs_buf_ioend(
 	xfs_buf_t		*bp,
 	int			schedule)
 {
-	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
+	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
 		bp->b_flags &= ~_XBF_PAGE_LOCKED;
 		xfs_buf_ioend(bp, schedule);
 	}
 }
 STATIC void
@@ -1258,35 +1186,12 @@ xfs_buf_bio_end_io(
 	int			error)
 {
 	xfs_buf_t		*bp = (xfs_buf_t *)bio->bi_private;
 	unsigned int		blocksize = bp->b_target->bt_bsize;
 	struct bio_vec		*bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	xfs_buf_ioerror(bp, -error);
 	if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
 	do {
 		struct page	*page = bvec->bv_page;
 		ASSERT(!PagePrivate(page));
 		if (unlikely(bp->b_error)) {
 			if (bp->b_flags & XBF_READ)
 				ClearPageUptodate(page);
 		} else if (blocksize >= PAGE_CACHE_SIZE) {
 			SetPageUptodate(page);
 		} else if (!PagePrivate(page) &&
 				(bp->b_flags & _XBF_PAGE_CACHE)) {
 			set_page_region(page, bvec->bv_offset, bvec->bv_len);
 		}
 		if (--bvec >= bio->bi_io_vec)
 			prefetchw(&bvec->bv_page->flags);
 		if (bp->b_flags & _XBF_PAGE_LOCKED)
 			unlock_page(page);
 	} while (bvec >= bio->bi_io_vec);
 	_xfs_buf_ioend(bp, 1);
 	bio_put(bio);
 }
@@ -1300,7 +1205,6 @@ _xfs_buf_ioapply(
 	int			offset = bp->b_offset;
 	int			size = bp->b_count_desired;
 	sector_t		sector = bp->b_bn;
 	unsigned int		blocksize = bp->b_target->bt_bsize;
 	total_nr_pages = bp->b_page_count;
 	map_i = 0;
@@ -1321,29 +1225,6 @@ _xfs_buf_ioapply(
 		     (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
 	}
 	/* Special code path for reading a sub page size buffer in --
 	 * we populate up the whole page, and hence the other metadata
 	 * in the same page.  This optimization is only valid when the
 	 * filesystem block size is not smaller than the page size.
 	 */
 	if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
 	    ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
 	      (XBF_READ|_XBF_PAGE_LOCKED)) &&
 	    (blocksize >= PAGE_CACHE_SIZE)) {
 		bio = bio_alloc(GFP_NOIO, 1);
 		bio->bi_bdev = bp->b_target->bt_bdev;
 		bio->bi_sector = sector - (offset >> BBSHIFT);
 		bio->bi_end_io = xfs_buf_bio_end_io;
 		bio->bi_private = bp;
 		bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0);
 		size = 0;
 		atomic_inc(&bp->b_io_remaining);
 		goto submit_io;
 	}
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
@@ -1357,8 +1238,9 @@ next_chunk:
 	bio->bi_end_io = xfs_buf_bio_end_io;
 	bio->bi_private = bp;
 	for (; size && nr_pages; nr_pages--, map_i++) {
-		int	rbytes, nbytes = PAGE_CACHE_SIZE - offset;
+		int	rbytes, nbytes = PAGE_SIZE - offset;
 		if (nbytes > size)
 			nbytes = size;
@@ -1373,7 +1255,6 @@ next_chunk:
 		total_nr_pages--;
 	}
 submit_io:
 	if (likely(bio->bi_size)) {
 		if (xfs_buf_is_vmapped(bp)) {
 			flush_kernel_vmap_range(bp->b_addr,
@@ -1383,18 +1264,7 @@ submit_io:
 		if (size)
 			goto next_chunk;
 	} else {
 		/*
 		 * if we get here, no pages were added to the bio. However,
 		 * we can't just error out here - if the pages are locked then
 		 * we have to unlock them otherwise we can hang on a later
 		 * access to the page.
 		 */
 		xfs_buf_ioerror(bp, EIO);
 		if (bp->b_flags & _XBF_PAGE_LOCKED) {
 			int i;
 			for (i = 0; i < bp->b_page_count; i++)
 				unlock_page(bp->b_pages[i]);
 		}
 		bio_put(bio);
 	}
 }
@@ -1458,8 +1328,8 @@ xfs_buf_offset(
 		return XFS_BUF_PTR(bp) + offset;
 	offset += bp->b_offset;
-	page = bp->b_pages[offset >> PAGE_CACHE_SHIFT];
+	page = bp->b_pages[offset >> PAGE_SHIFT];
-	return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1));
+	return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
 }
 /*
@@ -1481,9 +1351,9 @@ xfs_buf_iomove(
 		page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
 		cpoff = xfs_buf_poff(boff + bp->b_offset);
 		csize = min_t(size_t,
-			      PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff);
+			      PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+		ASSERT(((csize + cpoff) <= PAGE_SIZE));
 		switch (mode) {
 		case XBRW_ZERO:
@@ -1596,7 +1466,6 @@ xfs_free_buftarg(
 	xfs_flush_buftarg(btp, 1);
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_blkdev_issue_flush(btp);
 	iput(btp->bt_mapping->host);
 	kthread_stop(btp->bt_task);
 	kmem_free(btp);
@@ -1620,15 +1489,6 @@ xfs_setsize_buftarg_flags(
 		return EINVAL;
 	}
 	if (verbose &&
 	    (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
 		printk(KERN_WARNING
 			"XFS: %u byte sectors in use on device %s.  "
 			"This is suboptimal; %u or greater is ideal.\n",
 			sectorsize, XFS_BUFTARG_NAME(btp),
 			(unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
 	}
 	return 0;
 }
@@ -1643,7 +1503,7 @@ xfs_setsize_buftarg_early(
 	struct block_device	*bdev)
 {
 	return xfs_setsize_buftarg_flags(btp,
-			PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0);
+			PAGE_SIZE, bdev_logical_block_size(bdev), 0);
 }
 int
@@ -1655,40 +1515,6 @@ xfs_setsize_buftarg(
 	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
 }
 STATIC int
 xfs_mapping_buftarg(
 	xfs_buftarg_t		*btp,
 	struct block_device	*bdev)
 {
 	struct backing_dev_info	*bdi;
 	struct inode		*inode;
 	struct address_space	*mapping;
 	static const struct address_space_operations mapping_aops = {
 		.migratepage = fail_migrate_page,
 	};
 	inode = new_inode(bdev->bd_inode->i_sb);
 	if (!inode) {
 		printk(KERN_WARNING
 			"XFS: Cannot allocate mapping inode for device %s\n",
 			XFS_BUFTARG_NAME(btp));
 		return ENOMEM;
 	}
 	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFBLK;
 	inode->i_bdev = bdev;
 	inode->i_rdev = bdev->bd_dev;
 	bdi = blk_get_backing_dev_info(bdev);
 	if (!bdi)
 		bdi = &default_backing_dev_info;
 	mapping = &inode->i_data;
 	mapping->a_ops = &mapping_aops;
 	mapping->backing_dev_info = bdi;
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	btp->bt_mapping = mapping;
 	return 0;
 }
 STATIC int
 xfs_alloc_delwrite_queue(
 	xfs_buftarg_t		*btp,
@@ -1717,12 +1543,14 @@ xfs_alloc_buftarg(
 	btp->bt_mount = mp;
 	btp->bt_dev =  bdev->bd_dev;
 	btp->bt_bdev = bdev;
 	btp->bt_bdi = blk_get_backing_dev_info(bdev);
 	if (!btp->bt_bdi)
 		goto error;
 	INIT_LIST_HEAD(&btp->bt_lru);
 	spin_lock_init(&btp->bt_lru_lock);
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
 	if (xfs_mapping_buftarg(btp, bdev))
 		goto error;
 	if (xfs_alloc_delwrite_queue(btp, fsname))
 		goto error;
 	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -61,30 +61,11 @@ typedef enum {
 #define XBF_DONT_BLOCK	(1 << 16)/* do not block in current thread */
 /* flags used only internally */
 #define _XBF_PAGE_CACHE	(1 << 17)/* backed by pagecache */
 #define _XBF_PAGES	(1 << 18)/* backed by refcounted pages */
 #define	_XBF_RUN_QUEUES	(1 << 19)/* run block device task queue	*/
 #define	_XBF_KMEM	(1 << 20)/* backed by heap memory */
 #define _XBF_DELWRI_Q	(1 << 21)/* buffer on delwri queue */
 /*
 * Special flag for supporting metadata blocks smaller than a FSB.
 *
 * In this case we can have multiple xfs_buf_t on a single page and
 * need to lock out concurrent xfs_buf_t readers as they only
 * serialise access to the buffer.
 *
 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
 * between reads of the page. Hence we can have one thread read the
 * page and modify it, but then race with another thread that thinks
 * the page is not up-to-date and hence reads it again.
 *
 * The result is that the first modifcation to the page is lost.
 * This sort of AGF/AGI reading race can happen when unlinking inodes
 * that require truncation and results in the AGI unlinked list
 * modifications being lost.
 */
 #define _XBF_PAGE_LOCKED	(1 << 22)
 typedef unsigned int xfs_buf_flags_t;
 #define XFS_BUF_FLAGS \
@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
 	{ XBF_LOCK,		"LOCK" },  	/* should never be set */\
 	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* ditto */\
 	{ XBF_DONT_BLOCK,	"DONT_BLOCK" },	/* ditto */\
 	{ _XBF_PAGE_CACHE,	"PAGE_CACHE" }, \
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_RUN_QUEUES,	"RUN_QUEUES" }, \
-	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
+	{ _XBF_KMEM,		"KMEM" }, \
-	{ _XBF_PAGE_LOCKED,	"PAGE_LOCKED" }
+	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
 typedef enum {
 	XBT_FORCE_SLEEP = 0,
@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
 	struct block_device	*bt_bdev;
-	struct address_space	*bt_mapping;
+	struct backing_dev_info	*bt_bdi;
 	struct xfs_mount	*bt_mount;
 	unsigned int		bt_bsize;
 	unsigned int		bt_sshift;
@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
 	unsigned int		bt_lru_nr;
 } xfs_buftarg_t;
 /*
 *	xfs_buf_t:  Buffer structure for pagecache-based buffers
 *
 * This buffer structure is used by the pagecache buffer management routines
 * to refer to an assembly of pages forming a logical buffer.
 *
 * The buffer structure is used on a temporary basis only, and discarded when
 * released.  The real data storage is recorded in the pagecache. Buffers are
 * hashed to the block device on which the file system resides.
 */
 struct xfs_buf;
 typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -896,6 +896,7 @@ xfs_file_fallocate(
 	xfs_flock64_t	bf;
 	xfs_inode_t	*ip = XFS_I(inode);
 	int		cmd = XFS_IOC_RESVSP;
 	int		attr_flags = XFS_ATTR_NOLOCK;
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 		return -EOPNOTSUPP;
@@ -918,7 +919,10 @@ xfs_file_fallocate(
 			goto out_unlock;
 	}
-	error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+	if (file->f_flags & O_DSYNC)
 		attr_flags |= XFS_ATTR_SYNC;
 	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
 	if (error)
 		goto out_unlock;
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -624,6 +624,10 @@ xfs_ioc_space(
 	if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
 		attr_flags |= XFS_ATTR_NONBLOCK;
 	if (filp->f_flags & O_DSYNC)
 		attr_flags |= XFS_ATTR_SYNC;
 	if (ioflags & IO_INVIS)
 		attr_flags |= XFS_ATTR_DMI;
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1078,7 +1078,7 @@ xfs_fs_write_inode(
 			error = 0;
 			goto out_unlock;
 		}
-		error = xfs_iflush(ip, 0);
+		error = xfs_iflush(ip, SYNC_TRYLOCK);
 	}
 out_unlock:
@@ -1539,10 +1539,14 @@ xfs_fs_fill_super(
 	if (error)
 		goto out_free_sb;
-	error = xfs_mountfs(mp);
+	/*
-	if (error)
+	 * we must configure the block size in the superblock before we run the
-		goto out_filestream_unmount;
+	 * full mount process as the mount process can lookup and cache inodes.
-
+	 * For the same reason we must also initialise the syncd and register
 	 * the inode cache shrinker so that inodes can be reclaimed during
 	 * operations like a quotacheck that iterate all inodes in the
 	 * filesystem.
 	 */
 	sb->s_magic = XFS_SB_MAGIC;
 	sb->s_blocksize = mp->m_sb.sb_blocksize;
 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1550,6 +1554,16 @@ xfs_fs_fill_super(
 	sb->s_time_gran = 1;
 	set_posix_acl_flag(sb);
 	error = xfs_syncd_init(mp);
 	if (error)
 		goto out_filestream_unmount;
 	xfs_inode_shrinker_register(mp);
 	error = xfs_mountfs(mp);
 	if (error)
 		goto out_syncd_stop;
 	root = igrab(VFS_I(mp->m_rootip));
 	if (!root) {
 		error = ENOENT;
@@ -1565,14 +1579,11 @@ xfs_fs_fill_super(
 		goto fail_vnrele;
 	}
 	error = xfs_syncd_init(mp);
 	if (error)
 		goto fail_vnrele;
 	xfs_inode_shrinker_register(mp);
 	return 0;
 out_syncd_stop:
 	xfs_inode_shrinker_unregister(mp);
 	xfs_syncd_stop(mp);
 out_filestream_unmount:
 	xfs_filestream_unmount(mp);
 out_free_sb:
@@ -1596,6 +1607,9 @@ xfs_fs_fill_super(
 	}
 fail_unmount:
 	xfs_inode_shrinker_unregister(mp);
 	xfs_syncd_stop(mp);
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
 	 * This can and will cause log traffic as inodes go inactive
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -761,8 +761,10 @@ xfs_reclaim_inode(
 	struct xfs_perag	*pag,
 	int			sync_mode)
 {
-	int	error = 0;
+	int	error;
 restart:
 	error = 0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (!xfs_iflock_nowait(ip)) {
 		if (!(sync_mode & SYNC_WAIT))
@@ -788,9 +790,31 @@ xfs_reclaim_inode(
 	if (xfs_inode_clean(ip))
 		goto reclaim;
-	/* Now we have an inode that needs flushing */
+	/*
-	error = xfs_iflush(ip, sync_mode);
+	 * Now we have an inode that needs flushing.
 	 *
 	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
 	 * reclaim as we can deadlock with inode cluster removal.
 	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
 	 * ip->i_lock, and we are doing the exact opposite here. As a result,
 	 * doing a blocking xfs_itobp() to get the cluster buffer will result
 	 * in an ABBA deadlock with xfs_ifree_cluster().
 	 *
 	 * As xfs_ifree_cluser() must gather all inodes that are active in the
 	 * cache to mark them stale, if we hit this case we don't actually want
 	 * to do IO here - we want the inode marked stale so we can simply
 	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
 	 * just unlock the inode, back off and try again. Hopefully the next
 	 * pass through will see the stale flag set on the inode.
 	 */
 	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
 	if (sync_mode & SYNC_WAIT) {
 		if (error == EAGAIN) {
 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
 			/* backoff longer than in xfs_ifree_cluster */
 			delay(2);
 			goto restart;
 		}
 		xfs_iflock(ip);
 		goto reclaim;
 	}
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2835,7 +2835,7 @@ xfs_iflush(
 	 * Get the buffer containing the on-disk inode.
 	 */
 	error = xfs_itobp(mp, NULL, ip, &dip, &bp,
-				(flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
+				(flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
 	if (error || !bp) {
 		xfs_ifunlock(ip);
 		return error;
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -760,11 +760,11 @@ xfs_inode_item_push(
 	 * Push the inode to it's backing buffer. This will not remove the
 	 * inode from the AIL - a further push will be required to trigger a
 	 * buffer push. However, this allows all the dirty inodes to be pushed
-	 * to the buffer before it is pushed to disk. THe buffer IO completion
+	 * to the buffer before it is pushed to disk. The buffer IO completion
-	 * will pull th einode from the AIL, mark it clean and unlock the flush
+	 * will pull the inode from the AIL, mark it clean and unlock the flush
 	 * lock.
 	 */
-	(void) xfs_iflush(ip, 0);
+	(void) xfs_iflush(ip, SYNC_TRYLOCK);
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 }
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -383,7 +383,8 @@ xfs_trans_read_buf(
 	bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
 	if (bp == NULL) {
 		*bpp = NULL;
-		return 0;
+		return (flags & XBF_TRYLOCK) ?
 					0 : XFS_ERROR(ENOMEM);
 	}
 	if (XFS_BUF_GETERROR(bp) != 0) {
 	    XFS_BUF_SUPER_STALE(bp);
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2831,6 +2831,7 @@ xfs_change_file_space(
 		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	if (attr_flags & XFS_ATTR_SYNC)
 		xfs_trans_set_sync(tp);
 	error = xfs_trans_commit(tp, 0);
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
 #define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
 #define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
 #define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
 #define XFS_ATTR_SYNC		0x10	/* synchronous operation required */
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_release(struct xfs_inode *ip);