Btrfs: Add run time btree defrag, and an ioctl to force btree defrag

This adds two types of btree defrag, a run time form that tries to
defrag recently allocated blocks in the btree when they are still in ram,
and an ioctl that forces defrag of all btree blocks.

File data blocks are not defragged yet, but this can make a huge difference
in sequential btree reads.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason
2007-08-07 16:15:09 -04:00
committed by David Woodhouse
parent 3c69faecb8
commit 6702ed490c
11 changed files with 566 additions and 102 deletions

View File

@@ -65,16 +65,71 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
memset(p, 0, sizeof(*p));
}
static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret, u64 search_start, u64 empty_size)
{
struct buffer_head *cow;
struct btrfs_node *cow_node;
int ret = 0;
int different_trans = 0;
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
WARN_ON(!buffer_uptodate(buf));
cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);
cow_node = btrfs_buffer_node(cow);
if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
WARN_ON(1);
memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
btrfs_set_header_generation(&cow_node->header, trans->transid);
btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
trans->transid);
if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
trans->transid) {
different_trans = 1;
ret = btrfs_inc_ref(trans, root, buf);
if (ret)
return ret;
} else {
WARN_ON(!root->ref_cows);
clean_tree_block(trans, root, buf);
}
if (buf == root->node) {
root->node = cow;
get_bh(cow);
if (buf != root->commit_root) {
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
} else {
btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
bh_blocknr(cow));
btrfs_mark_buffer_dirty(parent);
WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
trans->transid);
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
return 0;
}
int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret)
{
struct buffer_head *cow;
struct btrfs_node *cow_node;
int ret;
WARN_ON(!buffer_uptodate(buf));
u64 search_start;
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
@@ -90,38 +145,94 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*cow_ret = buf;
return 0;
}
cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
if (IS_ERR(cow))
return PTR_ERR(cow);
cow_node = btrfs_buffer_node(cow);
if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
WARN_ON(1);
memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
btrfs_set_header_generation(&cow_node->header, trans->transid);
btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
ret = btrfs_inc_ref(trans, root, buf);
if (ret)
return ret;
if (buf == root->node) {
root->node = cow;
get_bh(cow);
if (buf != root->commit_root) {
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
} else {
btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
bh_blocknr(cow));
btrfs_mark_buffer_dirty(parent);
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
search_start = bh_blocknr(buf) & ~((u64)65535);
return __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
}
static int close_blocks(u64 blocknr, u64 other)
{
if (blocknr < other && other - blocknr < 8)
return 1;
if (blocknr > other && blocknr - other < 8)
return 1;
return 0;
}
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct buffer_head *parent,
int cache_only)
{
struct btrfs_node *parent_node;
struct buffer_head *cur_bh;
struct buffer_head *tmp_bh;
u64 blocknr;
u64 search_start = 0;
u64 other;
u32 parent_nritems;
int start_slot;
int end_slot;
int i;
int err = 0;
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
WARN_ON(1);
}
if (trans->transid != root->fs_info->generation) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->generation);
WARN_ON(1);
}
parent_node = btrfs_buffer_node(parent);
parent_nritems = btrfs_header_nritems(&parent_node->header);
start_slot = 0;
end_slot = parent_nritems;
if (parent_nritems == 1)
return 0;
for (i = start_slot; i < end_slot; i++) {
int close = 1;
blocknr = btrfs_node_blockptr(parent_node, i);
if (i > 0) {
other = btrfs_node_blockptr(parent_node, i - 1);
close = close_blocks(blocknr, other);
}
if (close && i < end_slot - 1) {
other = btrfs_node_blockptr(parent_node, i + 1);
close = close_blocks(blocknr, other);
}
if (close)
continue;
cur_bh = btrfs_find_tree_block(root, blocknr);
if (!cur_bh || !buffer_uptodate(cur_bh) ||
buffer_locked(cur_bh)) {
if (cache_only) {
brelse(cur_bh);
continue;
}
brelse(cur_bh);
cur_bh = read_tree_block(root, blocknr);
}
if (search_start == 0) {
search_start = bh_blocknr(cur_bh) & ~((u64)65535);
}
err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
&tmp_bh, search_start,
min(8, end_slot - i));
if (err)
break;
search_start = bh_blocknr(tmp_bh);
brelse(tmp_bh);
}
return err;
}
/*
* The leaf data grows from end-to-front in the node.
* this returns the address of the start of the last item,
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
parent_slot = path->slots[level + 1];
parent_key = &parent->ptrs[parent_slot].key;
BUG_ON(memcmp(parent_key, &leaf->items[0].key,
sizeof(struct btrfs_disk_key)));
BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* readahead one full node of leaves
*/
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
int slot)
int level, int slot)
{
struct btrfs_node *node;
int i;
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
unsigned long gang[8];
struct buffer_head *bh;
if (!path->nodes[1])
if (level == 0)
return;
node = btrfs_buffer_node(path->nodes[1]);
if (!path->nodes[level])
return;
node = btrfs_buffer_node(path->nodes[level]);
search = btrfs_node_blockptr(node, slot);
bh = btrfs_find_tree_block(root, search);
if (bh) {
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < ret; i++) {
blocknr = gang[i];
clear_radix_bit(&found, blocknr);
if (nread > 64)
if (nread > 32)
continue;
if (direction > 0 && cluster_start <= blocknr &&
cluster_start + 8 > blocknr) {
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
struct buffer_head *b;
struct buffer_head *cow_buf;
struct btrfs_node *c;
struct btrfs_root_item *root_item = &root->root_item;
u64 blocknr;
int slot;
int ret;
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int should_reada = p->reada;
u8 lowest_level = 0;
if (btrfs_root_refs(root_item) == 0 && root->ref_cows) {
lowest_level = root_item->drop_level;
WARN_ON(ins_len || cow);
}
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len);
WARN_ON(p->nodes[0] != NULL);
WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
again:
@@ -798,8 +909,8 @@ again:
if (level == lowest_level)
break;
blocknr = btrfs_node_blockptr(c, slot);
if (level == 1 && should_reada)
reada_for_search(root, p, slot);
if (should_reada)
reada_for_search(root, p, level, slot);
b = read_tree_block(root, btrfs_node_blockptr(c, slot));
} else {
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
BUG_ON(path->nodes[level]);
BUG_ON(path->nodes[level-1] != root->node);
t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
if (IS_ERR(t))
return PTR_ERR(t);
c = btrfs_buffer_node(t);
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
}
c_nritems = btrfs_header_nritems(&c->header);
split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
if (IS_ERR(split_buffer))
return PTR_ERR(split_buffer);
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
nritems = btrfs_header_nritems(&l->header);
mid = (nritems + 1)/ 2;
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
if (!double_split)
return ret;
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
blocknr = btrfs_node_blockptr(c_node, slot);
if (next)
btrfs_block_release(root, next);
if (level == 1 && path->reada)
reada_for_search(root, path, slot);
if (path->reada)
reada_for_search(root, path, level, slot);
next = read_tree_block(root, blocknr);
break;
}
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
path->slots[level] = 0;
if (!level)
break;
if (level == 1 && path->reada)
reada_for_search(root, path, slot);
if (path->reada)
reada_for_search(root, path, level, slot);
next = read_tree_block(root,
btrfs_node_blockptr(btrfs_buffer_node(next), 0));
}