xfs: don't flush inodes from background inode reclaim
We already flush dirty inodes throug the AIL regularly, there is no reason to have second thread compete with it and disturb the I/O pattern. We still do write inodes when doing a synchronous reclaim from the shrinker or during unmount for now. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
committed by
Ben Myers
parent
211e4d434b
commit
8a48088f64
@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inodes in different states need to be treated differently, and the return
|
* Inodes in different states need to be treated differently. The following
|
||||||
* value of xfs_iflush is not sufficient to get this right. The following table
|
* table lists the inode states and the reclaim actions necessary:
|
||||||
* lists the inode states and the reclaim actions necessary for non-blocking
|
|
||||||
* reclaim:
|
|
||||||
*
|
|
||||||
*
|
*
|
||||||
* inode state iflush ret required action
|
* inode state iflush ret required action
|
||||||
* --------------- ---------- ---------------
|
* --------------- ---------- ---------------
|
||||||
@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab(
|
|||||||
* stale, unpinned 0 reclaim
|
* stale, unpinned 0 reclaim
|
||||||
* clean, pinned(*) 0 requeue
|
* clean, pinned(*) 0 requeue
|
||||||
* stale, pinned EAGAIN requeue
|
* stale, pinned EAGAIN requeue
|
||||||
* dirty, delwri ok 0 requeue
|
* dirty, async - requeue
|
||||||
* dirty, delwri blocked EAGAIN requeue
|
* dirty, sync 0 reclaim
|
||||||
* dirty, sync flush 0 reclaim
|
|
||||||
*
|
*
|
||||||
* (*) dgc: I don't think the clean, pinned state is possible but it gets
|
* (*) dgc: I don't think the clean, pinned state is possible but it gets
|
||||||
* handled anyway given the order of checks implemented.
|
* handled anyway given the order of checks implemented.
|
||||||
@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab(
|
|||||||
*
|
*
|
||||||
* Also, because we get the flush lock first, we know that any inode that has
|
* Also, because we get the flush lock first, we know that any inode that has
|
||||||
* been flushed delwri has had the flush completed by the time we check that
|
* been flushed delwri has had the flush completed by the time we check that
|
||||||
* the inode is clean. The clean inode check needs to be done before flushing
|
* the inode is clean.
|
||||||
* the inode delwri otherwise we would loop forever requeuing clean inodes as
|
|
||||||
* we cannot tell apart a successful delwri flush and a clean inode from the
|
|
||||||
* return value of xfs_iflush().
|
|
||||||
*
|
*
|
||||||
* Note that because the inode is flushed delayed write by background
|
* Note that because the inode is flushed delayed write by AIL pushing, the
|
||||||
* writeback, the flush lock may already be held here and waiting on it can
|
* flush lock may already be held here and waiting on it can result in very
|
||||||
* result in very long latencies. Hence for sync reclaims, where we wait on the
|
* long latencies. Hence for sync reclaims, where we wait on the flush lock,
|
||||||
* flush lock, the caller should push out delayed write inodes first before
|
* the caller should push the AIL first before trying to reclaim inodes to
|
||||||
* trying to reclaim them to minimise the amount of time spent waiting. For
|
* minimise the amount of time spent waiting. For background relaim, we only
|
||||||
* background relaim, we just requeue the inode for the next pass.
|
* bother to reclaim clean inodes anyway.
|
||||||
*
|
*
|
||||||
* Hence the order of actions after gaining the locks should be:
|
* Hence the order of actions after gaining the locks should be:
|
||||||
* bad => reclaim
|
* bad => reclaim
|
||||||
* shutdown => unpin and reclaim
|
* shutdown => unpin and reclaim
|
||||||
* pinned, delwri => requeue
|
* pinned, async => requeue
|
||||||
* pinned, sync => unpin
|
* pinned, sync => unpin
|
||||||
* stale => reclaim
|
* stale => reclaim
|
||||||
* clean => reclaim
|
* clean => reclaim
|
||||||
* dirty, delwri => flush and requeue
|
* dirty, async => requeue
|
||||||
* dirty, sync => flush, wait and reclaim
|
* dirty, sync => flush, wait and reclaim
|
||||||
*/
|
*/
|
||||||
STATIC int
|
STATIC int
|
||||||
@@ -716,10 +709,8 @@ restart:
|
|||||||
goto reclaim;
|
goto reclaim;
|
||||||
}
|
}
|
||||||
if (xfs_ipincount(ip)) {
|
if (xfs_ipincount(ip)) {
|
||||||
if (!(sync_mode & SYNC_WAIT)) {
|
if (!(sync_mode & SYNC_WAIT))
|
||||||
xfs_ifunlock(ip);
|
goto out_ifunlock;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
xfs_iunpin_wait(ip);
|
xfs_iunpin_wait(ip);
|
||||||
}
|
}
|
||||||
if (xfs_iflags_test(ip, XFS_ISTALE))
|
if (xfs_iflags_test(ip, XFS_ISTALE))
|
||||||
@@ -727,6 +718,13 @@ restart:
|
|||||||
if (xfs_inode_clean(ip))
|
if (xfs_inode_clean(ip))
|
||||||
goto reclaim;
|
goto reclaim;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Never flush out dirty data during non-blocking reclaim, as it would
|
||||||
|
* just contend with AIL pushing trying to do the same job.
|
||||||
|
*/
|
||||||
|
if (!(sync_mode & SYNC_WAIT))
|
||||||
|
goto out_ifunlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we have an inode that needs flushing.
|
* Now we have an inode that needs flushing.
|
||||||
*
|
*
|
||||||
@@ -745,42 +743,13 @@ restart:
|
|||||||
* pass through will see the stale flag set on the inode.
|
* pass through will see the stale flag set on the inode.
|
||||||
*/
|
*/
|
||||||
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
|
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
|
||||||
if (sync_mode & SYNC_WAIT) {
|
if (error == EAGAIN) {
|
||||||
if (error == EAGAIN) {
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
/* backoff longer than in xfs_ifree_cluster */
|
||||||
/* backoff longer than in xfs_ifree_cluster */
|
delay(2);
|
||||||
delay(2);
|
goto restart;
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
xfs_iflock(ip);
|
|
||||||
goto reclaim;
|
|
||||||
}
|
}
|
||||||
|
xfs_iflock(ip);
|
||||||
/*
|
|
||||||
* When we have to flush an inode but don't have SYNC_WAIT set, we
|
|
||||||
* flush the inode out using a delwri buffer and wait for the next
|
|
||||||
* call into reclaim to find it in a clean state instead of waiting for
|
|
||||||
* it now. We also don't return errors here - if the error is transient
|
|
||||||
* then the next reclaim pass will flush the inode, and if the error
|
|
||||||
* is permanent then the next sync reclaim will reclaim the inode and
|
|
||||||
* pass on the error.
|
|
||||||
*/
|
|
||||||
if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
|
|
||||||
xfs_warn(ip->i_mount,
|
|
||||||
"inode 0x%llx background reclaim flush failed with %d",
|
|
||||||
(long long)ip->i_ino, error);
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
|
||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
|
||||||
/*
|
|
||||||
* We could return EAGAIN here to make reclaim rescan the inode tree in
|
|
||||||
* a short while. However, this just burns CPU time scanning the tree
|
|
||||||
* waiting for IO to complete and xfssyncd never goes back to the idle
|
|
||||||
* state. Instead, return 0 to let the next scheduled background reclaim
|
|
||||||
* attempt to reclaim the inode again.
|
|
||||||
*/
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
reclaim:
|
reclaim:
|
||||||
xfs_ifunlock(ip);
|
xfs_ifunlock(ip);
|
||||||
@@ -814,8 +783,21 @@ reclaim:
|
|||||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
|
||||||
xfs_inode_free(ip);
|
xfs_inode_free(ip);
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
|
out_ifunlock:
|
||||||
|
xfs_ifunlock(ip);
|
||||||
|
out:
|
||||||
|
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
||||||
|
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||||
|
/*
|
||||||
|
* We could return EAGAIN here to make reclaim rescan the inode tree in
|
||||||
|
* a short while. However, this just burns CPU time scanning the tree
|
||||||
|
* waiting for IO to complete and xfssyncd never goes back to the idle
|
||||||
|
* state. Instead, return 0 to let the next scheduled background reclaim
|
||||||
|
* attempt to reclaim the inode again.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user