writeback: split inode_wb_list_lock into bdi_writeback.list_lock
Split the global inode_wb_list_lock into a per-bdi_writeback list_lock, as it's currently the most contended lock in the system for metadata heavy workloads. It won't help for single-filesystem workloads for which we'll need the I/O-less balance_dirty_pages, but at least we can dedicate a cpu to spinning on each bdi now for larger systems. Based on earlier patches from Nick Piggin and Dave Chinner. It reduces lock contentions to 1/4 in this test case: 10 HDD JBOD, 100 dd on each disk, XFS, 6GB ram lock_stat version 0.3 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- vanilla 2.6.39-rc3: inode_wb_list_lock: 42590 44433 0.12 147.74 144127.35 252274 886792 0.08 121.34 917211.23 ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 34 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 12893 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 10702 [<ffffffff8115afef>] writeback_single_inode+0x16d/0x20a ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 19 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 5550 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 8511 [<ffffffff8115b4ad>] writeback_sb_inodes+0x10f/0x157 2.6.39-rc3 + patch: &(&wb->list_lock)->rlock: 11383 11657 0.14 151.69 40429.51 90825 527918 0.11 145.90 556843.37 ------------------------ &(&wb->list_lock)->rlock 10 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 1493 [<ffffffff8115b1ed>] writeback_inodes_wb+0x3d/0x150 &(&wb->list_lock)->rlock 3652 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f &(&wb->list_lock)->rlock 1412 [<ffffffff8115a38e>] writeback_single_inode+0x17f/0x223 ------------------------ &(&wb->list_lock)->rlock 3 [<ffffffff8110b5af>] bdi_lock_two+0x46/0x4b &(&wb->list_lock)->rlock 6 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 2061 [<ffffffff8115af97>] __mark_inode_dirty+0x173/0x1cf &(&wb->list_lock)->rlock 2629 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f hughd@google.com: fix recursive lock when bdi_lock_two() is called with new the same as old akpm@linux-foundation.org: cleanup bdev_inode_switch_bdi() comment Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
This commit is contained in:
committed by
Wu Fengguang
parent
424b351fe1
commit
f758eeabeb
@@ -44,24 +44,28 @@ inline struct block_device *I_BDEV(struct inode *inode)
|
||||
{
|
||||
return &BDEV_I(inode)->bdev;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(I_BDEV);
|
||||
|
||||
/*
|
||||
* move the inode from it's current bdi to the a new bdi. if the inode is dirty
|
||||
* we need to move it onto the dirty list of @dst so that the inode is always
|
||||
* on the right list.
|
||||
* Move the inode from its current bdi to a new bdi. If the inode is dirty we
|
||||
* need to move it onto the dirty list of @dst so that the inode is always on
|
||||
* the right list.
|
||||
*/
|
||||
static void bdev_inode_switch_bdi(struct inode *inode,
|
||||
struct backing_dev_info *dst)
|
||||
{
|
||||
spin_lock(&inode_wb_list_lock);
|
||||
struct backing_dev_info *old = inode->i_data.backing_dev_info;
|
||||
|
||||
if (unlikely(dst == old)) /* deadlock avoidance */
|
||||
return;
|
||||
bdi_lock_two(&old->wb, &dst->wb);
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_data.backing_dev_info = dst;
|
||||
if (inode->i_state & I_DIRTY)
|
||||
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_wb_list_lock);
|
||||
spin_unlock(&old->wb.list_lock);
|
||||
spin_unlock(&dst->wb.list_lock);
|
||||
}
|
||||
|
||||
static sector_t max_block(struct block_device *bdev)
|
||||
|
Reference in New Issue
Block a user