ext4: Rewrite ext4_page_mkwrite() to use generic helpers
Rewrite ext4_page_mkwrite() to use __block_page_mkwrite() helper. This removes the need of using i_alloc_sem to avoid races with truncate which seems to be the wrong locking order according to lock ordering documented in mm/rmap.c. Also calling ext4_da_write_begin() as used by the old code seems to be problematic because we can decide to flush delay-allocated blocks which will acquire s_umount semaphore - again creating unpleasant lock dependency if not directly a deadlock. Also add a check for frozen filesystem so that we don't busyloop in page fault when the filesystem is frozen. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
106
fs/ext4/inode.c
106
fs/ext4/inode.c
@@ -5843,80 +5843,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
struct page *page = vmf->page;
|
struct page *page = vmf->page;
|
||||||
loff_t size;
|
loff_t size;
|
||||||
unsigned long len;
|
unsigned long len;
|
||||||
int ret = -EINVAL;
|
int ret;
|
||||||
void *fsdata;
|
|
||||||
struct file *file = vma->vm_file;
|
struct file *file = vma->vm_file;
|
||||||
struct inode *inode = file->f_path.dentry->d_inode;
|
struct inode *inode = file->f_path.dentry->d_inode;
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
handle_t *handle;
|
||||||
|
get_block_t *get_block;
|
||||||
|
int retries = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get i_alloc_sem to stop truncates messing with the inode. We cannot
|
* This check is racy but catches the common case. We rely on
|
||||||
* get i_mutex because we are already holding mmap_sem.
|
* __block_page_mkwrite() to do a reliable check.
|
||||||
*/
|
*/
|
||||||
down_read(&inode->i_alloc_sem);
|
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
|
||||||
size = i_size_read(inode);
|
/* Delalloc case is easy... */
|
||||||
if (page->mapping != mapping || size <= page_offset(page)
|
if (test_opt(inode->i_sb, DELALLOC) &&
|
||||||
|| !PageUptodate(page)) {
|
!ext4_should_journal_data(inode) &&
|
||||||
/* page got truncated from under us? */
|
!ext4_nonda_switch(inode->i_sb)) {
|
||||||
goto out_unlock;
|
do {
|
||||||
|
ret = __block_page_mkwrite(vma, vmf,
|
||||||
|
ext4_da_get_block_prep);
|
||||||
|
} while (ret == -ENOSPC &&
|
||||||
|
ext4_should_retry_alloc(inode->i_sb, &retries));
|
||||||
|
goto out_ret;
|
||||||
}
|
}
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
wait_on_page_writeback(page);
|
size = i_size_read(inode);
|
||||||
if (PageMappedToDisk(page)) {
|
/* Page got truncated from under us? */
|
||||||
up_read(&inode->i_alloc_sem);
|
if (page->mapping != mapping || page_offset(page) > size) {
|
||||||
return VM_FAULT_LOCKED;
|
unlock_page(page);
|
||||||
|
ret = VM_FAULT_NOPAGE;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page->index == size >> PAGE_CACHE_SHIFT)
|
if (page->index == size >> PAGE_CACHE_SHIFT)
|
||||||
len = size & ~PAGE_CACHE_MASK;
|
len = size & ~PAGE_CACHE_MASK;
|
||||||
else
|
else
|
||||||
len = PAGE_CACHE_SIZE;
|
len = PAGE_CACHE_SIZE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* return if we have all the buffers mapped. This avoid
|
* Return if we have all the buffers mapped. This avoids the need to do
|
||||||
* the need to call write_begin/write_end which does a
|
* journal_start/journal_stop which can block and take a long time
|
||||||
* journal_start/journal_stop which can block and take
|
|
||||||
* long time
|
|
||||||
*/
|
*/
|
||||||
if (page_has_buffers(page)) {
|
if (page_has_buffers(page)) {
|
||||||
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
|
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
|
||||||
ext4_bh_unmapped)) {
|
ext4_bh_unmapped)) {
|
||||||
up_read(&inode->i_alloc_sem);
|
/* Wait so that we don't change page under IO */
|
||||||
return VM_FAULT_LOCKED;
|
wait_on_page_writeback(page);
|
||||||
|
ret = VM_FAULT_LOCKED;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
/*
|
/* OK, we need to fill the hole... */
|
||||||
* OK, we need to fill the hole... Do write_begin write_end
|
if (ext4_should_dioread_nolock(inode))
|
||||||
* to do block allocation/reservation.We are not holding
|
get_block = ext4_get_block_write;
|
||||||
* inode.i__mutex here. That allow * parallel write_begin,
|
else
|
||||||
* write_end call. lock_page prevent this from happening
|
get_block = ext4_get_block;
|
||||||
* on the same page though
|
retry_alloc:
|
||||||
*/
|
handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
|
||||||
ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
|
if (IS_ERR(handle)) {
|
||||||
len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out_unlock;
|
|
||||||
ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
|
|
||||||
len, len, page, fsdata);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out_unlock;
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* write_begin/end might have created a dirty page and someone
|
|
||||||
* could wander in and start the IO. Make sure that hasn't
|
|
||||||
* happened.
|
|
||||||
*/
|
|
||||||
lock_page(page);
|
|
||||||
wait_on_page_writeback(page);
|
|
||||||
up_read(&inode->i_alloc_sem);
|
|
||||||
return VM_FAULT_LOCKED;
|
|
||||||
out_unlock:
|
|
||||||
if (ret)
|
|
||||||
ret = VM_FAULT_SIGBUS;
|
ret = VM_FAULT_SIGBUS;
|
||||||
up_read(&inode->i_alloc_sem);
|
goto out;
|
||||||
|
}
|
||||||
|
ret = __block_page_mkwrite(vma, vmf, get_block);
|
||||||
|
if (!ret && ext4_should_journal_data(inode)) {
|
||||||
|
if (walk_page_buffers(handle, page_buffers(page), 0,
|
||||||
|
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
|
||||||
|
unlock_page(page);
|
||||||
|
ret = VM_FAULT_SIGBUS;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
|
||||||
|
}
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||||
|
goto retry_alloc;
|
||||||
|
out_ret:
|
||||||
|
ret = block_page_mkwrite_return(ret);
|
||||||
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user