Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: xfs: use proper interfaces for on-stack plugging xfs: fix xfs_debug warnings xfs: fix variable set but not used warnings xfs: convert log tail checking to a warning xfs: catch bad block numbers freeing extents. xfs: push the AIL from memory reclaim and periodic sync xfs: clean up code layout in xfs_trans_ail.c xfs: convert the xfsaild threads to a workqueue xfs: introduce background inode reclaim work xfs: convert ENOSPC inode flushing to use new syncd workqueue xfs: introduce a xfssyncd workqueue xfs: fix extent format buffer allocation size xfs: fix unreferenced var error in xfs_buf.c Also, applied patch from Tony Luck that fixes ia64: xfs_destroy_workqueues() should not be tagged with__exit in the branch before merging.
This commit is contained in:
@ -22,6 +22,7 @@
|
||||
#include "xfs_log.h"
|
||||
#include "xfs_inum.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_trans_priv.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_mount.h"
|
||||
@ -39,6 +40,8 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
|
||||
|
||||
/*
|
||||
* The inode lookup is done in batches to keep the amount of lock traffic and
|
||||
* radix tree lookups to a minimum. The batch size is a trade off between
|
||||
@ -431,62 +434,12 @@ xfs_quiesce_attr(
|
||||
xfs_unmountfs_writesb(mp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enqueue a work item to be picked up by the vfs xfssyncd thread.
|
||||
* Doing this has two advantages:
|
||||
* - It saves on stack space, which is tight in certain situations
|
||||
* - It can be used (with care) as a mechanism to avoid deadlocks.
|
||||
* Flushing while allocating in a full filesystem requires both.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_syncd_queue_work(
|
||||
struct xfs_mount *mp,
|
||||
void *data,
|
||||
void (*syncer)(struct xfs_mount *, void *),
|
||||
struct completion *completion)
|
||||
static void
|
||||
xfs_syncd_queue_sync(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_sync_work *work;
|
||||
|
||||
work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
|
||||
INIT_LIST_HEAD(&work->w_list);
|
||||
work->w_syncer = syncer;
|
||||
work->w_data = data;
|
||||
work->w_mount = mp;
|
||||
work->w_completion = completion;
|
||||
spin_lock(&mp->m_sync_lock);
|
||||
list_add_tail(&work->w_list, &mp->m_sync_list);
|
||||
spin_unlock(&mp->m_sync_lock);
|
||||
wake_up_process(mp->m_sync_task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush delayed allocate data, attempting to free up reserved space
|
||||
* from existing allocations. At this point a new allocation attempt
|
||||
* has failed with ENOSPC and we are in the process of scratching our
|
||||
* heads, looking about for more room...
|
||||
*/
|
||||
STATIC void
|
||||
xfs_flush_inodes_work(
|
||||
struct xfs_mount *mp,
|
||||
void *arg)
|
||||
{
|
||||
struct inode *inode = arg;
|
||||
xfs_sync_data(mp, SYNC_TRYLOCK);
|
||||
xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
|
||||
iput(inode);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_flush_inodes(
|
||||
xfs_inode_t *ip)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
|
||||
igrab(inode);
|
||||
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
|
||||
wait_for_completion(&completion);
|
||||
xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
|
||||
queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
|
||||
msecs_to_jiffies(xfs_syncd_centisecs * 10));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -496,9 +449,10 @@ xfs_flush_inodes(
|
||||
*/
|
||||
STATIC void
|
||||
xfs_sync_worker(
|
||||
struct xfs_mount *mp,
|
||||
void *unused)
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_mount *mp = container_of(to_delayed_work(work),
|
||||
struct xfs_mount, m_sync_work);
|
||||
int error;
|
||||
|
||||
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
|
||||
@ -508,73 +462,106 @@ xfs_sync_worker(
|
||||
error = xfs_fs_log_dummy(mp);
|
||||
else
|
||||
xfs_log_force(mp, 0);
|
||||
xfs_reclaim_inodes(mp, 0);
|
||||
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
|
||||
|
||||
/* start pushing all the metadata that is currently dirty */
|
||||
xfs_ail_push_all(mp->m_ail);
|
||||
}
|
||||
mp->m_sync_seq++;
|
||||
wake_up(&mp->m_wait_single_sync_task);
|
||||
|
||||
/* queue us up again */
|
||||
xfs_syncd_queue_sync(mp);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfssyncd(
|
||||
void *arg)
|
||||
/*
|
||||
* Queue a new inode reclaim pass if there are reclaimable inodes and there
|
||||
* isn't a reclaim pass already in progress. By default it runs every 5s based
|
||||
* on the xfs syncd work default of 30s. Perhaps this should have it's own
|
||||
* tunable, but that can be done if this method proves to be ineffective or too
|
||||
* aggressive.
|
||||
*/
|
||||
static void
|
||||
xfs_syncd_queue_reclaim(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xfs_mount *mp = arg;
|
||||
long timeleft;
|
||||
xfs_sync_work_t *work, *n;
|
||||
LIST_HEAD (tmp);
|
||||
|
||||
set_freezable();
|
||||
timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
|
||||
for (;;) {
|
||||
if (list_empty(&mp->m_sync_list))
|
||||
timeleft = schedule_timeout_interruptible(timeleft);
|
||||
/* swsusp */
|
||||
try_to_freeze();
|
||||
if (kthread_should_stop() && list_empty(&mp->m_sync_list))
|
||||
break;
|
||||
/*
|
||||
* We can have inodes enter reclaim after we've shut down the syncd
|
||||
* workqueue during unmount, so don't allow reclaim work to be queued
|
||||
* during unmount.
|
||||
*/
|
||||
if (!(mp->m_super->s_flags & MS_ACTIVE))
|
||||
return;
|
||||
|
||||
spin_lock(&mp->m_sync_lock);
|
||||
/*
|
||||
* We can get woken by laptop mode, to do a sync -
|
||||
* that's the (only!) case where the list would be
|
||||
* empty with time remaining.
|
||||
*/
|
||||
if (!timeleft || list_empty(&mp->m_sync_list)) {
|
||||
if (!timeleft)
|
||||
timeleft = xfs_syncd_centisecs *
|
||||
msecs_to_jiffies(10);
|
||||
INIT_LIST_HEAD(&mp->m_sync_work.w_list);
|
||||
list_add_tail(&mp->m_sync_work.w_list,
|
||||
&mp->m_sync_list);
|
||||
}
|
||||
list_splice_init(&mp->m_sync_list, &tmp);
|
||||
spin_unlock(&mp->m_sync_lock);
|
||||
|
||||
list_for_each_entry_safe(work, n, &tmp, w_list) {
|
||||
(*work->w_syncer)(mp, work->w_data);
|
||||
list_del(&work->w_list);
|
||||
if (work == &mp->m_sync_work)
|
||||
continue;
|
||||
if (work->w_completion)
|
||||
complete(work->w_completion);
|
||||
kmem_free(work);
|
||||
}
|
||||
rcu_read_lock();
|
||||
if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
|
||||
queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
|
||||
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
return 0;
|
||||
/*
|
||||
* This is a fast pass over the inode cache to try to get reclaim moving on as
|
||||
* many inodes as possible in a short period of time. It kicks itself every few
|
||||
* seconds, as well as being kicked by the inode cache shrinker when memory
|
||||
* goes low. It scans as quickly as possible avoiding locked inodes or those
|
||||
* already being flushed, and once done schedules a future pass.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_reclaim_worker(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_mount *mp = container_of(to_delayed_work(work),
|
||||
struct xfs_mount, m_reclaim_work);
|
||||
|
||||
xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
|
||||
xfs_syncd_queue_reclaim(mp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Flush delayed allocate data, attempting to free up reserved space
|
||||
* from existing allocations. At this point a new allocation attempt
|
||||
* has failed with ENOSPC and we are in the process of scratching our
|
||||
* heads, looking about for more room.
|
||||
*
|
||||
* Queue a new data flush if there isn't one already in progress and
|
||||
* wait for completion of the flush. This means that we only ever have one
|
||||
* inode flush in progress no matter how many ENOSPC events are occurring and
|
||||
* so will prevent the system from bogging down due to every concurrent
|
||||
* ENOSPC event scanning all the active inodes in the system for writeback.
|
||||
*/
|
||||
void
|
||||
xfs_flush_inodes(
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
queue_work(xfs_syncd_wq, &mp->m_flush_work);
|
||||
flush_work_sync(&mp->m_flush_work);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_flush_worker(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_mount *mp = container_of(work,
|
||||
struct xfs_mount, m_flush_work);
|
||||
|
||||
xfs_sync_data(mp, SYNC_TRYLOCK);
|
||||
xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
|
||||
}
|
||||
|
||||
int
|
||||
xfs_syncd_init(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
mp->m_sync_work.w_syncer = xfs_sync_worker;
|
||||
mp->m_sync_work.w_mount = mp;
|
||||
mp->m_sync_work.w_completion = NULL;
|
||||
mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
|
||||
if (IS_ERR(mp->m_sync_task))
|
||||
return -PTR_ERR(mp->m_sync_task);
|
||||
INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
|
||||
INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
|
||||
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
|
||||
|
||||
xfs_syncd_queue_sync(mp);
|
||||
xfs_syncd_queue_reclaim(mp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -582,7 +569,9 @@ void
|
||||
xfs_syncd_stop(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
kthread_stop(mp->m_sync_task);
|
||||
cancel_delayed_work_sync(&mp->m_sync_work);
|
||||
cancel_delayed_work_sync(&mp->m_reclaim_work);
|
||||
cancel_work_sync(&mp->m_flush_work);
|
||||
}
|
||||
|
||||
void
|
||||
@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
|
||||
XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
|
||||
XFS_ICI_RECLAIM_TAG);
|
||||
spin_unlock(&ip->i_mount->m_perag_lock);
|
||||
|
||||
/* schedule periodic background inode reclaim */
|
||||
xfs_syncd_queue_reclaim(ip->i_mount);
|
||||
|
||||
trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
|
||||
-1, _RET_IP_);
|
||||
}
|
||||
@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrinker infrastructure.
|
||||
* Inode cache shrinker.
|
||||
*
|
||||
* When called we make sure that there is a background (fast) inode reclaim in
|
||||
* progress, while we will throttle the speed of reclaim via doiing synchronous
|
||||
* reclaim of inodes. That means if we come across dirty inodes, we wait for
|
||||
* them to be cleaned, which we hope will not be very long due to the
|
||||
* background walker having already kicked the IO off on those dirty inodes.
|
||||
*/
|
||||
static int
|
||||
xfs_reclaim_inode_shrink(
|
||||
@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
|
||||
|
||||
mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
|
||||
if (nr_to_scan) {
|
||||
/* kick background reclaimer and push the AIL */
|
||||
xfs_syncd_queue_reclaim(mp);
|
||||
xfs_ail_push_all(mp->m_ail);
|
||||
|
||||
if (!(gfp_mask & __GFP_FS))
|
||||
return -1;
|
||||
|
||||
xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
|
||||
xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
|
||||
&nr_to_scan);
|
||||
/* terminate if we don't exhaust the scan */
|
||||
if (nr_to_scan > 0)
|
||||
return -1;
|
||||
|
Reference in New Issue
Block a user