ocfs2: Zero the tail cluster when extending past i_size.
ocfs2's allocation unit is the cluster. This can be larger than a block or even a memory page. This means that a file may have many blocks in its last extent that are beyond the block containing i_size. There also may be more unwritten extents after that. When ocfs2 grows a file, it zeros the entire cluster in order to ensure future i_size growth will see cleared blocks. Unfortunately, block_write_full_page() drops the pages past i_size. This means that ocfs2 is actually leaking garbage data into the tail end of that last cluster. This is a bug. We adjust ocfs2_write_begin_nolock() and ocfs2_extend_file() to detect when a write or truncate is past i_size. They will use ocfs2_zero_extend() to ensure the data is properly zeroed. Older versions of ocfs2_zero_extend() simply zeroed every block between i_size and the zeroing position. This presumes three things: 1) There is allocation for all of these blocks. 2) The extents are not unwritten. 3) The extents are not refcounted. (1) and (2) hold true for non-sparse filesystems, which used to be the only users of ocfs2_zero_extend(). (3) is another bug. Since we're now using ocfs2_zero_extend() for sparse filesystems as well, we teach ocfs2_zero_extend() to check every extent between i_size and the zeroing position. If the extent is unwritten, it is ignored. If it is refcounted, it is CoWed. Then it is zeroed. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org
This commit is contained in:
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
|
||||
dump_stack();
|
||||
goto bail;
|
||||
}
|
||||
|
||||
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
|
||||
mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
|
||||
(unsigned long long)past_eof);
|
||||
|
||||
if (create && (iblock >= past_eof))
|
||||
set_buffer_new(bh_result);
|
||||
}
|
||||
|
||||
past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
|
||||
mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
|
||||
(unsigned long long)past_eof);
|
||||
if (create && (iblock >= past_eof))
|
||||
set_buffer_new(bh_result);
|
||||
|
||||
bail:
|
||||
if (err < 0)
|
||||
err = -EIO;
|
||||
@@ -1590,21 +1589,20 @@ out:
|
||||
* write path can treat it as an non-allocating write, which has no
|
||||
* special case code for sparse/nonsparse files.
|
||||
*/
|
||||
static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
|
||||
unsigned len,
|
||||
static int ocfs2_expand_nonsparse_inode(struct inode *inode,
|
||||
struct buffer_head *di_bh,
|
||||
loff_t pos, unsigned len,
|
||||
struct ocfs2_write_ctxt *wc)
|
||||
{
|
||||
int ret;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
loff_t newsize = pos + len;
|
||||
|
||||
if (ocfs2_sparse_alloc(osb))
|
||||
return 0;
|
||||
BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
|
||||
|
||||
if (newsize <= i_size_read(inode))
|
||||
return 0;
|
||||
|
||||
ret = ocfs2_extend_no_holes(inode, newsize, pos);
|
||||
ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
|
||||
if (ret)
|
||||
mlog_errno(ret);
|
||||
|
||||
@@ -1614,6 +1612,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
|
||||
loff_t pos)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
|
||||
if (pos > i_size_read(inode))
|
||||
ret = ocfs2_zero_extend(inode, di_bh, pos);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ocfs2_write_begin_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
@@ -1649,7 +1659,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
|
||||
}
|
||||
}
|
||||
|
||||
ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
|
||||
if (ocfs2_sparse_alloc(osb))
|
||||
ret = ocfs2_zero_tail(inode, di_bh, pos);
|
||||
else
|
||||
ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
|
||||
wc);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
Reference in New Issue
Block a user