Btrfs: Add a write ahead tree log to optimize synchronous operations

File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree.  There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.

After a crash, items are copied out of the log tree and back into the
subvolume.  See tree-log.c for all the details.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason
2008-09-05 16:13:11 -04:00
parent a1b32a5932
commit e02119d5a7
17 changed files with 3408 additions and 205 deletions

View File

@@ -496,6 +496,23 @@ static int match_extent_ref(struct extent_buffer *leaf,
return ret == 0;
}
/* simple helper to search for an existing extent at a given offset */
int btrfs_lookup_extent(struct btrfs_root *root, struct btrfs_path *path,
u64 start, u64 len)
{
int ret;
struct btrfs_key key;
maybe_lock_mutex(root);
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
maybe_unlock_mutex(root);
return ret;
}
static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 bytenr,
@@ -1409,7 +1426,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
}
static int update_pinned_extents(struct btrfs_root *root,
int btrfs_update_pinned_extents(struct btrfs_root *root,
u64 bytenr, u64 num, int pin)
{
u64 len;
@@ -1492,7 +1509,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
EXTENT_DIRTY);
if (ret)
break;
update_pinned_extents(root, start, end + 1 - start, 0);
btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
if (need_resched()) {
@@ -1538,14 +1555,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
GFP_NOFS);
eb = btrfs_find_tree_block(extent_root, ins.objectid,
eb = btrfs_find_create_tree_block(extent_root, ins.objectid,
ins.offset);
if (!btrfs_buffer_uptodate(eb, trans->transid)) {
mutex_unlock(&extent_root->fs_info->alloc_mutex);
if (!btrfs_buffer_uptodate(eb, trans->transid))
btrfs_read_buffer(eb, trans->transid);
mutex_lock(&extent_root->fs_info->alloc_mutex);
}
btrfs_tree_lock(eb);
level = btrfs_header_level(eb);
@@ -1585,13 +1599,20 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
struct extent_buffer *buf;
buf = btrfs_find_tree_block(root, bytenr, num_bytes);
if (buf) {
/* we can reuse a block if it hasn't been written
* and it is from this transaction. We can't
* reuse anything from the tree log root because
* it has tiny sub-transactions.
*/
if (btrfs_buffer_uptodate(buf, 0) &&
btrfs_try_tree_lock(buf)) {
u64 transid =
root->fs_info->running_transaction->transid;
u64 header_transid =
btrfs_header_generation(buf);
if (header_transid == transid &&
if (btrfs_header_owner(buf) !=
BTRFS_TREE_LOG_OBJECTID &&
header_transid == transid &&
!btrfs_header_flag(buf,
BTRFS_HEADER_FLAG_WRITTEN)) {
clean_tree_block(NULL, root, buf);
@@ -1603,7 +1624,7 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
}
free_extent_buffer(buf);
}
update_pinned_extents(root, bytenr, num_bytes, 1);
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
} else {
set_extent_bits(&root->fs_info->pending_del,
bytenr, bytenr + num_bytes - 1,
@@ -1801,7 +1822,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
GFP_NOFS);
if (!test_range_bit(&extent_root->fs_info->extent_ins,
start, end, EXTENT_LOCKED, 0)) {
update_pinned_extents(extent_root, start,
btrfs_update_pinned_extents(extent_root, start,
end + 1 - start, 1);
ret = __free_extent(trans, extent_root,
start, end + 1 - start,
@@ -1919,6 +1940,12 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans,
if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->last_data_alloc;
}
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
last_ptr = &root->fs_info->last_log_alloc;
if (!last_ptr == 0 && root->fs_info->last_alloc) {
*last_ptr = root->fs_info->last_alloc + empty_cluster;
}
}
if (last_ptr) {
if (*last_ptr)
@@ -2268,6 +2295,35 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
maybe_unlock_mutex(root);
return ret;
}
/*
* this is used by the tree logging recovery code. It records that
* an extent has been allocated and makes sure to clear the free
* space cache bits as well
*/
int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 ref_generation,
u64 owner, u64 owner_offset,
struct btrfs_key *ins)
{
int ret;
struct btrfs_block_group_cache *block_group;
maybe_lock_mutex(root);
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(root, block_group);
clear_extent_dirty(&root->fs_info->free_space_cache,
ins->objectid, ins->objectid + ins->offset - 1,
GFP_NOFS);
ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
ref_generation, owner,
owner_offset, ins);
maybe_unlock_mutex(root);
return ret;
}
/*
* finds a free extent and does all the dirty work required for allocation
* returns the key for the extent through ins, and a tree buffer for
@@ -2350,9 +2406,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return buf;
}
static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *leaf)
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf)
{
u64 leaf_owner;
u64 leaf_generation;
@@ -2402,9 +2457,9 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans,
return 0;
}
static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_leaf_ref *ref)
static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_leaf_ref *ref)
{
int i;
int ret;
@@ -2512,7 +2567,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
btrfs_header_nritems(cur))
break;
if (*level == 0) {
ret = drop_leaf_ref_no_cache(trans, root, cur);
ret = btrfs_drop_leaf_ref(trans, root, cur);
BUG_ON(ret);
break;
}
@@ -2552,7 +2607,7 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
btrfs_node_key_to_cpu(cur, &key, path->slots[*level]);
ref = btrfs_lookup_leaf_ref(root, bytenr);
if (ref) {
ret = drop_leaf_ref(trans, root, ref);
ret = cache_drop_leaf_ref(trans, root, ref);
BUG_ON(ret);
btrfs_remove_leaf_ref(root, ref);
btrfs_free_leaf_ref(root, ref);
@@ -3628,6 +3683,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
extent_root = root->fs_info->extent_root;
block_group_cache = &root->fs_info->block_group_cache;
root->fs_info->last_trans_new_blockgroup = trans->transid;
cache = kzalloc(sizeof(*cache), GFP_NOFS);
BUG_ON(!cache);
cache->key.objectid = chunk_offset;