fs: protect inode->i_state with inode->i_lock
Protect inode state transitions and validity checks with the inode->i_lock. This enables us to make inode state transitions independently of the inode_lock and is the first step to peeling away the inode_lock from the code. This requires that __iget() is done atomically with i_state checks during list traversals so that we don't race with another thread marking the inode I_FREEING between the state check and grabbing the reference. Also remove the unlock_new_inode() memory barrier optimisation required to avoid taking the inode_lock when clearing I_NEW. Simplify the code by simply taking the inode->i_lock around the state change and wakeup. Because the wakeup is no longer tricky, remove the wake_up_inode() function and open code the wakeup where necessary. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
@@ -306,10 +306,12 @@ static void inode_wait_for_writeback(struct inode *inode)
|
||||
wait_queue_head_t *wqh;
|
||||
|
||||
wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
|
||||
while (inode->i_state & I_SYNC) {
|
||||
while (inode->i_state & I_SYNC) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_lock);
|
||||
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
|
||||
spin_lock(&inode_lock);
|
||||
spin_lock(&inode->i_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,6 +335,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
unsigned dirty;
|
||||
int ret;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
if (!atomic_read(&inode->i_count))
|
||||
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
|
||||
else
|
||||
@@ -348,6 +351,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
* completed a full scan of b_io.
|
||||
*/
|
||||
if (wbc->sync_mode != WB_SYNC_ALL) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
requeue_io(inode);
|
||||
return 0;
|
||||
}
|
||||
@@ -363,6 +367,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
/* Set I_SYNC, reset I_DIRTY_PAGES */
|
||||
inode->i_state |= I_SYNC;
|
||||
inode->i_state &= ~I_DIRTY_PAGES;
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_lock);
|
||||
|
||||
ret = do_writepages(mapping, wbc);
|
||||
@@ -384,8 +389,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
* write_inode()
|
||||
*/
|
||||
spin_lock(&inode_lock);
|
||||
spin_lock(&inode->i_lock);
|
||||
dirty = inode->i_state & I_DIRTY;
|
||||
inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_lock);
|
||||
/* Don't write the inode if only I_DIRTY_PAGES was set */
|
||||
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
|
||||
@@ -395,6 +402,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
}
|
||||
|
||||
spin_lock(&inode_lock);
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_state &= ~I_SYNC;
|
||||
if (!(inode->i_state & I_FREEING)) {
|
||||
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
@@ -436,6 +444,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
}
|
||||
}
|
||||
inode_sync_complete(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -506,7 +515,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
|
||||
* kind does not need peridic writeout yet, and for the latter
|
||||
* kind writeout is handled by the freer.
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
requeue_io(inode);
|
||||
continue;
|
||||
}
|
||||
@@ -515,10 +526,14 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
|
||||
* Was this inode dirtied after sync_sb_inodes was called?
|
||||
* This keeps sync from extra jobs and livelock.
|
||||
*/
|
||||
if (inode_dirtied_after(inode, wbc->wb_start))
|
||||
if (inode_dirtied_after(inode, wbc->wb_start)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
__iget(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
pages_skipped = wbc->pages_skipped;
|
||||
writeback_single_inode(inode, wbc);
|
||||
if (wbc->pages_skipped != pages_skipped) {
|
||||
@@ -724,7 +739,9 @@ static long wb_writeback(struct bdi_writeback *wb,
|
||||
if (!list_empty(&wb->b_more_io)) {
|
||||
inode = wb_inode(wb->b_more_io.prev);
|
||||
trace_wbc_writeback_wait(&wbc, wb->bdi);
|
||||
spin_lock(&inode->i_lock);
|
||||
inode_wait_for_writeback(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
spin_unlock(&inode_lock);
|
||||
}
|
||||
@@ -1017,6 +1034,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
block_dump___mark_inode_dirty(inode);
|
||||
|
||||
spin_lock(&inode_lock);
|
||||
spin_lock(&inode->i_lock);
|
||||
if ((inode->i_state & flags) != flags) {
|
||||
const int was_dirty = inode->i_state & I_DIRTY;
|
||||
|
||||
@@ -1028,7 +1046,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
* superblock list, based upon its state.
|
||||
*/
|
||||
if (inode->i_state & I_SYNC)
|
||||
goto out;
|
||||
goto out_unlock_inode;
|
||||
|
||||
/*
|
||||
* Only add valid (hashed) inodes to the superblock's
|
||||
@@ -1036,11 +1054,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
*/
|
||||
if (!S_ISBLK(inode->i_mode)) {
|
||||
if (inode_unhashed(inode))
|
||||
goto out;
|
||||
goto out_unlock_inode;
|
||||
}
|
||||
if (inode->i_state & I_FREEING)
|
||||
goto out;
|
||||
goto out_unlock_inode;
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
/*
|
||||
* If the inode was already on b_dirty/b_io/b_more_io, don't
|
||||
* reposition it (that would break b_dirty time-ordering).
|
||||
@@ -1065,7 +1084,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
inode->dirtied_when = jiffies;
|
||||
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
out_unlock_inode:
|
||||
spin_unlock(&inode->i_lock);
|
||||
out:
|
||||
spin_unlock(&inode_lock);
|
||||
|
||||
@@ -1111,14 +1133,16 @@ static void wait_sb_inodes(struct super_block *sb)
|
||||
* we still have to wait for that writeout.
|
||||
*/
|
||||
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
|
||||
struct address_space *mapping;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
|
||||
continue;
|
||||
mapping = inode->i_mapping;
|
||||
if (mapping->nrpages == 0)
|
||||
spin_lock(&inode->i_lock);
|
||||
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
|
||||
(mapping->nrpages == 0)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
continue;
|
||||
}
|
||||
__iget(inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&inode_lock);
|
||||
/*
|
||||
* We hold a reference to 'inode' so it couldn't have
|
||||
|
Reference in New Issue
Block a user