buffer_head: fix private_list handling
There are two possible races in handling of private_list in buffer cache. 1) When fsync_buffers_list() processes a private_list, it clears b_assoc_mapping and moves buffer to its private list. Now drop_buffers() comes, sees a buffer is on list so it calls __remove_assoc_queue() which complains about b_assoc_mapping being cleared (as it cannot propagate possible IO error). This race has been actually observed in the wild. 2) When fsync_buffers_list() processes a private_list, mark_buffer_dirty_inode() can be called on bh which is already on the private list of fsync_buffers_list(). As buffer is on some list (note that the check is performed without private_lock), it is not readded to the mapping's private_list and after fsync_buffers_list() finishes, we have a dirty buffer which should be on private_list but it isn't. This race has not been reported, probably because most (but not all) callers of mark_buffer_dirty_inode() hold i_mutex and thus are serialized with fsync(). Fix these issues by not clearing b_assoc_map when fsync_buffers_list() moves buffer to a dedicated list and by reinserting buffer in private_list when it is found dirty after we have submitted buffer for IO. We also change the tests whether a buffer is on a private list from !list_empty(&bh->b_assoc_buffers) to bh->b_assoc_map so that they are single word reads and hence lockless checks are safe. Signed-off-by: Jan Kara <jack@suse.cz> Cc: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
23
fs/buffer.c
23
fs/buffer.c
@@ -678,7 +678,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
|
|||||||
} else {
|
} else {
|
||||||
BUG_ON(mapping->assoc_mapping != buffer_mapping);
|
BUG_ON(mapping->assoc_mapping != buffer_mapping);
|
||||||
}
|
}
|
||||||
if (list_empty(&bh->b_assoc_buffers)) {
|
if (!bh->b_assoc_map) {
|
||||||
spin_lock(&buffer_mapping->private_lock);
|
spin_lock(&buffer_mapping->private_lock);
|
||||||
list_move_tail(&bh->b_assoc_buffers,
|
list_move_tail(&bh->b_assoc_buffers,
|
||||||
&mapping->private_list);
|
&mapping->private_list);
|
||||||
@@ -794,6 +794,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
|
|||||||
{
|
{
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
struct list_head tmp;
|
struct list_head tmp;
|
||||||
|
struct address_space *mapping;
|
||||||
int err = 0, err2;
|
int err = 0, err2;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&tmp);
|
INIT_LIST_HEAD(&tmp);
|
||||||
@@ -801,9 +802,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
|
|||||||
spin_lock(lock);
|
spin_lock(lock);
|
||||||
while (!list_empty(list)) {
|
while (!list_empty(list)) {
|
||||||
bh = BH_ENTRY(list->next);
|
bh = BH_ENTRY(list->next);
|
||||||
|
mapping = bh->b_assoc_map;
|
||||||
__remove_assoc_queue(bh);
|
__remove_assoc_queue(bh);
|
||||||
|
/* Avoid race with mark_buffer_dirty_inode() which does
|
||||||
|
* a lockless check and we rely on seeing the dirty bit */
|
||||||
|
smp_mb();
|
||||||
if (buffer_dirty(bh) || buffer_locked(bh)) {
|
if (buffer_dirty(bh) || buffer_locked(bh)) {
|
||||||
list_add(&bh->b_assoc_buffers, &tmp);
|
list_add(&bh->b_assoc_buffers, &tmp);
|
||||||
|
bh->b_assoc_map = mapping;
|
||||||
if (buffer_dirty(bh)) {
|
if (buffer_dirty(bh)) {
|
||||||
get_bh(bh);
|
get_bh(bh);
|
||||||
spin_unlock(lock);
|
spin_unlock(lock);
|
||||||
@@ -822,8 +828,17 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
|
|||||||
|
|
||||||
while (!list_empty(&tmp)) {
|
while (!list_empty(&tmp)) {
|
||||||
bh = BH_ENTRY(tmp.prev);
|
bh = BH_ENTRY(tmp.prev);
|
||||||
list_del_init(&bh->b_assoc_buffers);
|
|
||||||
get_bh(bh);
|
get_bh(bh);
|
||||||
|
mapping = bh->b_assoc_map;
|
||||||
|
__remove_assoc_queue(bh);
|
||||||
|
/* Avoid race with mark_buffer_dirty_inode() which does
|
||||||
|
* a lockless check and we rely on seeing the dirty bit */
|
||||||
|
smp_mb();
|
||||||
|
if (buffer_dirty(bh)) {
|
||||||
|
list_add(&bh->b_assoc_buffers,
|
||||||
|
&bh->b_assoc_map->private_list);
|
||||||
|
bh->b_assoc_map = mapping;
|
||||||
|
}
|
||||||
spin_unlock(lock);
|
spin_unlock(lock);
|
||||||
wait_on_buffer(bh);
|
wait_on_buffer(bh);
|
||||||
if (!buffer_uptodate(bh))
|
if (!buffer_uptodate(bh))
|
||||||
@@ -1195,7 +1210,7 @@ void __brelse(struct buffer_head * buf)
|
|||||||
void __bforget(struct buffer_head *bh)
|
void __bforget(struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
clear_buffer_dirty(bh);
|
clear_buffer_dirty(bh);
|
||||||
if (!list_empty(&bh->b_assoc_buffers)) {
|
if (bh->b_assoc_map) {
|
||||||
struct address_space *buffer_mapping = bh->b_page->mapping;
|
struct address_space *buffer_mapping = bh->b_page->mapping;
|
||||||
|
|
||||||
spin_lock(&buffer_mapping->private_lock);
|
spin_lock(&buffer_mapping->private_lock);
|
||||||
@@ -3022,7 +3037,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
|
|||||||
do {
|
do {
|
||||||
struct buffer_head *next = bh->b_this_page;
|
struct buffer_head *next = bh->b_this_page;
|
||||||
|
|
||||||
if (!list_empty(&bh->b_assoc_buffers))
|
if (bh->b_assoc_map)
|
||||||
__remove_assoc_queue(bh);
|
__remove_assoc_queue(bh);
|
||||||
bh = next;
|
bh = next;
|
||||||
} while (bh != head);
|
} while (bh != head);
|
||||||
|
Reference in New Issue
Block a user