writeback: split inode_wb_list_lock into bdi_writeback.list_lock
Split the global inode_wb_list_lock into a per-bdi_writeback list_lock, as it's currently the most contended lock in the system for metadata heavy workloads. It won't help for single-filesystem workloads for which we'll need the I/O-less balance_dirty_pages, but at least we can dedicate a cpu to spinning on each bdi now for larger systems. Based on earlier patches from Nick Piggin and Dave Chinner. It reduces lock contentions to 1/4 in this test case: 10 HDD JBOD, 100 dd on each disk, XFS, 6GB ram lock_stat version 0.3 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- vanilla 2.6.39-rc3: inode_wb_list_lock: 42590 44433 0.12 147.74 144127.35 252274 886792 0.08 121.34 917211.23 ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 34 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 12893 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 10702 [<ffffffff8115afef>] writeback_single_inode+0x16d/0x20a ------------------ inode_wb_list_lock 2 [<ffffffff81165da5>] bdev_inode_switch_bdi+0x29/0x85 inode_wb_list_lock 19 [<ffffffff8115bd0b>] inode_wb_list_del+0x22/0x49 inode_wb_list_lock 5550 [<ffffffff8115bb53>] __mark_inode_dirty+0x170/0x1d0 inode_wb_list_lock 8511 [<ffffffff8115b4ad>] writeback_sb_inodes+0x10f/0x157 2.6.39-rc3 + patch: &(&wb->list_lock)->rlock: 11383 11657 0.14 151.69 40429.51 90825 527918 0.11 145.90 556843.37 ------------------------ &(&wb->list_lock)->rlock 10 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 1493 [<ffffffff8115b1ed>] writeback_inodes_wb+0x3d/0x150 &(&wb->list_lock)->rlock 3652 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f &(&wb->list_lock)->rlock 1412 [<ffffffff8115a38e>] writeback_single_inode+0x17f/0x223 ------------------------ &(&wb->list_lock)->rlock 3 [<ffffffff8110b5af>] bdi_lock_two+0x46/0x4b &(&wb->list_lock)->rlock 6 [<ffffffff8115b189>] inode_wb_list_del+0x5f/0x86 &(&wb->list_lock)->rlock 2061 [<ffffffff8115af97>] __mark_inode_dirty+0x173/0x1cf &(&wb->list_lock)->rlock 2629 [<ffffffff8115a8e9>] writeback_sb_inodes+0x123/0x16f hughd@google.com: fix recursive lock when bdi_lock_two() is called with new the same as old akpm@linux-foundation.org: cleanup bdev_inode_switch_bdi() comment Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
This commit is contained in:
committed by
Wu Fengguang
parent
424b351fe1
commit
f758eeabeb
@@ -45,6 +45,17 @@ static struct timer_list sync_supers_timer;
|
||||
static int bdi_sync_supers(void *);
|
||||
static void sync_supers_timer_fn(unsigned long);
|
||||
|
||||
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
|
||||
{
|
||||
if (wb1 < wb2) {
|
||||
spin_lock(&wb1->list_lock);
|
||||
spin_lock_nested(&wb2->list_lock, 1);
|
||||
} else {
|
||||
spin_lock(&wb2->list_lock);
|
||||
spin_lock_nested(&wb1->list_lock, 1);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
@@ -67,14 +78,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
|
||||
struct inode *inode;
|
||||
|
||||
nr_dirty = nr_io = nr_more_io = 0;
|
||||
spin_lock(&inode_wb_list_lock);
|
||||
spin_lock(&wb->list_lock);
|
||||
list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
|
||||
nr_dirty++;
|
||||
list_for_each_entry(inode, &wb->b_io, i_wb_list)
|
||||
nr_io++;
|
||||
list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
|
||||
nr_more_io++;
|
||||
spin_unlock(&inode_wb_list_lock);
|
||||
spin_unlock(&wb->list_lock);
|
||||
|
||||
global_dirty_limits(&background_thresh, &dirty_thresh);
|
||||
bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
|
||||
@@ -628,6 +639,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
|
||||
INIT_LIST_HEAD(&wb->b_dirty);
|
||||
INIT_LIST_HEAD(&wb->b_io);
|
||||
INIT_LIST_HEAD(&wb->b_more_io);
|
||||
spin_lock_init(&wb->list_lock);
|
||||
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
|
||||
}
|
||||
|
||||
@@ -676,11 +688,12 @@ void bdi_destroy(struct backing_dev_info *bdi)
|
||||
if (bdi_has_dirty_io(bdi)) {
|
||||
struct bdi_writeback *dst = &default_backing_dev_info.wb;
|
||||
|
||||
spin_lock(&inode_wb_list_lock);
|
||||
bdi_lock_two(&bdi->wb, dst);
|
||||
list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
|
||||
list_splice(&bdi->wb.b_io, &dst->b_io);
|
||||
list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
|
||||
spin_unlock(&inode_wb_list_lock);
|
||||
spin_unlock(&bdi->wb.list_lock);
|
||||
spin_unlock(&dst->list_lock);
|
||||
}
|
||||
|
||||
bdi_unregister(bdi);
|
||||
|
Reference in New Issue
Block a user