From fb235dc06fac9eaa4408ade9c8b20d45d63c89b7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 15 Feb 2017 10:43:03 +0800 Subject: [PATCH] btrfs: qgroup: Move half of the qgroup accounting time out of commit trans Just as Filipe pointed out, the most time consuming parts of qgroup are btrfs_qgroup_account_extents() and btrfs_qgroup_prepare_account_extents(). Which both call btrfs_find_all_roots() to get old_roots and new_roots ulist. What makes things worse is, we're calling that expensive btrfs_find_all_roots() at transaction committing time with TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction. Such behavior is necessary for @new_roots search as current btrfs_find_all_roots() can't do it correctly so we do call it just before switch commit roots. However for @old_roots search, it's not necessary as such search is based on commit_root, so it will always be correct and we can move it out of transaction committing. This patch moves the @old_roots search part out of commit_transaction(), so in theory we can half the time qgroup time consumption at commit_transaction(). But please note that, this won't speedup qgroup overall, the total time consumption is still the same, just reduce the performance stall. Cc: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 20 ++++++++++++++++---- fs/btrfs/qgroup.c | 33 +++++++++++++++++++++++++++++---- fs/btrfs/qgroup.h | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index a1b9ef2dfc4a..6eb80952efb3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -550,13 +550,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data) + int action, int is_data, int *qrecord_inserted_ret) { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; int count_mod = 1; int must_insert_reserved = 0; + int qrecord_inserted = 0; /* If reserved is provided, it must be a data extent. */ BUG_ON(!is_data && reserved); @@ -623,6 +624,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, if(btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord)) kfree(qrecord); + else + qrecord_inserted = 1; } spin_lock_init(&head_ref->lock); @@ -650,6 +653,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, atomic_inc(&delayed_refs->num_entries); trans->delayed_ref_updates++; } + if (qrecord_inserted_ret) + *qrecord_inserted_ret = qrecord_inserted; return head_ref; } @@ -779,6 +784,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; + int qrecord_inserted; BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -806,12 +812,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * the spin lock */ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, - bytenr, num_bytes, 0, 0, action, 0); + bytenr, num_bytes, 0, 0, action, 0, + &qrecord_inserted); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); spin_unlock(&delayed_refs->lock); + if (qrecord_inserted) + return btrfs_qgroup_trace_extent_post(fs_info, record); return 0; free_head_ref: @@ -835,6 +844,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; + int qrecord_inserted; ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); if (!ref) @@ -868,13 +878,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, bytenr, num_bytes, ref_root, reserved, - action, 1); + action, 1, &qrecord_inserted); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, action); spin_unlock(&delayed_refs->lock); + if (qrecord_inserted) + return btrfs_qgroup_trace_extent_post(fs_info, record); return 0; } @@ -897,7 +909,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data); + extent_op->is_data, NULL); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a48bf546fc91..a5da750c1087 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1464,8 +1464,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, while (node) { record = rb_entry(node, struct btrfs_qgroup_extent_record, node); - ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, - &record->old_roots); + if (WARN_ON(!record->old_roots)) + ret = btrfs_find_all_roots(NULL, fs_info, + record->bytenr, 0, &record->old_roots); if (ret < 0) break; if (qgroup_to_skip) @@ -1504,6 +1505,28 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } +int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_extent_record *qrecord) +{ + struct ulist *old_root; + u64 bytenr = qrecord->bytenr; + int ret; + + ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); + if (ret < 0) + return ret; + + /* + * Here we don't need to get the lock of + * trans->transaction->delayed_refs, since inserted qrecord won't + * be deleted, only qrecord->node may be modified (new qrecord insert) + * + * So modifying qrecord->old_roots is safe here + */ + qrecord->old_roots = old_root; + return 0; +} + int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, gfp_t gfp_flag) @@ -1529,9 +1552,11 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, spin_lock(&delayed_refs->lock); ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); spin_unlock(&delayed_refs->lock); - if (ret > 0) + if (ret > 0) { kfree(record); - return 0; + return 0; + } + return btrfs_qgroup_trace_extent_post(fs_info, record); } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index ee95f456a61f..26932a8a1993 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -94,9 +94,10 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); /* * Inform qgroup to trace one dirty extent, its info is recorded in @record. - * So qgroup can account it at commit trans time. + * So qgroup can account it at transaction committing time. * - * No lock version, caller must acquire delayed ref lock and allocate memory. + * No lock version, caller must acquire delayed ref lock and allocated memory, + * then call btrfs_qgroup_trace_extent_post() after exiting lock context. * * Return 0 for success insert * Return >0 for existing record, caller can free @record safely. @@ -107,12 +108,38 @@ int btrfs_qgroup_trace_extent_nolock( struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_qgroup_extent_record *record); +/* + * Post handler after qgroup_trace_extent_nolock(). + * + * NOTE: Current qgroup does the expensive backref walk at transaction + * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming + * new transaction. + * This is designed to allow btrfs_find_all_roots() to get correct new_roots + * result. + * + * However for old_roots there is no need to do backref walk at that time, + * since we search commit roots to walk backref and result will always be + * correct. + * + * Due to the nature of no lock version, we can't do backref there. + * So we must call btrfs_qgroup_trace_extent_post() after exiting + * spinlock context. + * + * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result + * using current root, then we can move all expensive backref walk out of + * transaction committing, but not now as qgroup accounting will be wrong again. + */ +int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, + struct btrfs_qgroup_extent_record *qrecord); + /* * Inform qgroup to trace one dirty extent, specified by @bytenr and * @num_bytes. * So qgroup can account it at commit trans time. * - * Better encapsulated version. + * Better encapsulated version, with memory allocation and backref walk for + * commit roots. + * So this can sleep. * * Return 0 if the operation is done. * Return <0 for error, like memory allocation failure or invalid parameter