blkcg: drop unnecessary RCU locking
Now that blkg additions / removals are always done under both q and blkcg locks, the only places RCU locking is necessary are blkg_lookup[_create]() for lookup w/o blkcg lock. This patch drops unncessary RCU locking replacing it with plain blkcg locking as necessary. * blkiocg_pre_destroy() already perform proper locking and don't need RCU. Dropped. * blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read lock. This isn't a hot path. * Now unnecessary synchronize_rcu() from queue exit paths removed. This makes q->nr_blkgs unnecessary. Dropped. * RCU annotation on blkg->q removed. -v2: Vivek pointed out that blkg_lookup_create() still needs to be called under rcu_read_lock(). Updated. -v3: After the update, stats_lock locking in blkio_read_blkg_stats() shouldn't be using _irq variant as it otherwise ends up enabling irq while blkcg->lock is locked. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
@@ -500,7 +500,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
spin_lock_init(&blkg->stats_lock);
|
spin_lock_init(&blkg->stats_lock);
|
||||||
rcu_assign_pointer(blkg->q, q);
|
blkg->q = q;
|
||||||
INIT_LIST_HEAD(&blkg->q_node);
|
INIT_LIST_HEAD(&blkg->q_node);
|
||||||
blkg->blkcg = blkcg;
|
blkg->blkcg = blkcg;
|
||||||
blkg->refcnt = 1;
|
blkg->refcnt = 1;
|
||||||
@@ -611,7 +611,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
|||||||
|
|
||||||
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
||||||
list_add(&blkg->q_node, &q->blkg_list);
|
list_add(&blkg->q_node, &q->blkg_list);
|
||||||
q->nr_blkgs++;
|
|
||||||
|
|
||||||
spin_unlock(&blkcg->lock);
|
spin_unlock(&blkcg->lock);
|
||||||
out:
|
out:
|
||||||
@@ -648,9 +647,6 @@ static void blkg_destroy(struct blkio_group *blkg)
|
|||||||
list_del_init(&blkg->q_node);
|
list_del_init(&blkg->q_node);
|
||||||
hlist_del_init_rcu(&blkg->blkcg_node);
|
hlist_del_init_rcu(&blkg->blkcg_node);
|
||||||
|
|
||||||
WARN_ON_ONCE(q->nr_blkgs <= 0);
|
|
||||||
q->nr_blkgs--;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Put the reference taken at the time of creation so that when all
|
* Put the reference taken at the time of creation so that when all
|
||||||
* queues are gone, group can be destroyed.
|
* queues are gone, group can be destroyed.
|
||||||
@@ -1232,8 +1228,9 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
|||||||
struct hlist_node *n;
|
struct hlist_node *n;
|
||||||
uint64_t cgroup_total = 0;
|
uint64_t cgroup_total = 0;
|
||||||
|
|
||||||
rcu_read_lock();
|
spin_lock_irq(&blkcg->lock);
|
||||||
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
|
||||||
|
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
|
||||||
const char *dname = blkg_dev_name(blkg);
|
const char *dname = blkg_dev_name(blkg);
|
||||||
int plid = BLKIOFILE_POLICY(cft->private);
|
int plid = BLKIOFILE_POLICY(cft->private);
|
||||||
|
|
||||||
@@ -1243,15 +1240,16 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
|||||||
cgroup_total += blkio_get_stat_cpu(blkg, plid,
|
cgroup_total += blkio_get_stat_cpu(blkg, plid,
|
||||||
cb, dname, type);
|
cb, dname, type);
|
||||||
} else {
|
} else {
|
||||||
spin_lock_irq(&blkg->stats_lock);
|
spin_lock(&blkg->stats_lock);
|
||||||
cgroup_total += blkio_get_stat(blkg, plid,
|
cgroup_total += blkio_get_stat(blkg, plid,
|
||||||
cb, dname, type);
|
cb, dname, type);
|
||||||
spin_unlock_irq(&blkg->stats_lock);
|
spin_unlock(&blkg->stats_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (show_total)
|
if (show_total)
|
||||||
cb->fill(cb, "Total", cgroup_total);
|
cb->fill(cb, "Total", cgroup_total);
|
||||||
rcu_read_unlock();
|
|
||||||
|
spin_unlock_irq(&blkcg->lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1583,28 +1581,24 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
|
|||||||
{
|
{
|
||||||
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
spin_lock_irq(&blkcg->lock);
|
spin_lock_irq(&blkcg->lock);
|
||||||
|
|
||||||
while (!hlist_empty(&blkcg->blkg_list)) {
|
while (!hlist_empty(&blkcg->blkg_list)) {
|
||||||
struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
|
struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
|
||||||
struct blkio_group, blkcg_node);
|
struct blkio_group, blkcg_node);
|
||||||
struct request_queue *q = rcu_dereference(blkg->q);
|
struct request_queue *q = blkg->q;
|
||||||
|
|
||||||
if (spin_trylock(q->queue_lock)) {
|
if (spin_trylock(q->queue_lock)) {
|
||||||
blkg_destroy(blkg);
|
blkg_destroy(blkg);
|
||||||
spin_unlock(q->queue_lock);
|
spin_unlock(q->queue_lock);
|
||||||
} else {
|
} else {
|
||||||
spin_unlock_irq(&blkcg->lock);
|
spin_unlock_irq(&blkcg->lock);
|
||||||
rcu_read_unlock();
|
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
rcu_read_lock();
|
|
||||||
spin_lock(&blkcg->lock);
|
spin_lock(&blkcg->lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock_irq(&blkcg->lock);
|
spin_unlock_irq(&blkcg->lock);
|
||||||
rcu_read_unlock();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -176,8 +176,8 @@ struct blkg_policy_data {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct blkio_group {
|
struct blkio_group {
|
||||||
/* Pointer to the associated request_queue, RCU protected */
|
/* Pointer to the associated request_queue */
|
||||||
struct request_queue __rcu *q;
|
struct request_queue *q;
|
||||||
struct list_head q_node;
|
struct list_head q_node;
|
||||||
struct hlist_node blkcg_node;
|
struct hlist_node blkcg_node;
|
||||||
struct blkio_cgroup *blkcg;
|
struct blkio_cgroup *blkcg;
|
||||||
|
@@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q)
|
|||||||
|
|
||||||
void blk_throtl_exit(struct request_queue *q)
|
void blk_throtl_exit(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct throtl_data *td = q->td;
|
BUG_ON(!q->td);
|
||||||
bool wait;
|
|
||||||
|
|
||||||
BUG_ON(!td);
|
|
||||||
|
|
||||||
throtl_shutdown_wq(q);
|
throtl_shutdown_wq(q);
|
||||||
|
|
||||||
/* If there are other groups */
|
|
||||||
spin_lock_irq(q->queue_lock);
|
|
||||||
wait = q->nr_blkgs;
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for tg_to_blkg(tg)->q accessors to exit their grace periods.
|
|
||||||
* Do this wait only if there are other undestroyed groups out
|
|
||||||
* there (other than root group). This can happen if cgroup deletion
|
|
||||||
* path claimed the responsibility of cleaning up a group before
|
|
||||||
* queue cleanup code get to the group.
|
|
||||||
*
|
|
||||||
* Do not call synchronize_rcu() unconditionally as there are drivers
|
|
||||||
* which create/delete request queue hundreds of times during scan/boot
|
|
||||||
* and synchronize_rcu() can take significant time and slow down boot.
|
|
||||||
*/
|
|
||||||
if (wait)
|
|
||||||
synchronize_rcu();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Just being safe to make sure after previous flush if some body did
|
|
||||||
* update limits through cgroup and another work got queued, cancel
|
|
||||||
* it.
|
|
||||||
*/
|
|
||||||
throtl_shutdown_wq(q);
|
|
||||||
|
|
||||||
kfree(q->td);
|
kfree(q->td);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3449,7 +3449,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
|||||||
{
|
{
|
||||||
struct cfq_data *cfqd = e->elevator_data;
|
struct cfq_data *cfqd = e->elevator_data;
|
||||||
struct request_queue *q = cfqd->queue;
|
struct request_queue *q = cfqd->queue;
|
||||||
bool wait = false;
|
|
||||||
|
|
||||||
cfq_shutdown_timer_wq(cfqd);
|
cfq_shutdown_timer_wq(cfqd);
|
||||||
|
|
||||||
@@ -3462,31 +3461,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
|||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
#ifdef CONFIG_BLK_CGROUP
|
|
||||||
/*
|
|
||||||
* If there are groups which we could not unlink from blkcg list,
|
|
||||||
* wait for a rcu period for them to be freed.
|
|
||||||
*/
|
|
||||||
spin_lock_irq(q->queue_lock);
|
|
||||||
wait = q->nr_blkgs;
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
|
||||||
#endif
|
|
||||||
cfq_shutdown_timer_wq(cfqd);
|
cfq_shutdown_timer_wq(cfqd);
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for cfqg->blkg->key accessors to exit their grace periods.
|
|
||||||
* Do this wait only if there are other unlinked groups out
|
|
||||||
* there. This can happen if cgroup deletion path claimed the
|
|
||||||
* responsibility of cleaning up a group before queue cleanup code
|
|
||||||
* get to the group.
|
|
||||||
*
|
|
||||||
* Do not call synchronize_rcu() unconditionally as there are drivers
|
|
||||||
* which create/delete request queue hundreds of times during scan/boot
|
|
||||||
* and synchronize_rcu() can take significant time and slow down boot.
|
|
||||||
*/
|
|
||||||
if (wait)
|
|
||||||
synchronize_rcu();
|
|
||||||
|
|
||||||
#ifndef CONFIG_CFQ_GROUP_IOSCHED
|
#ifndef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
kfree(cfqd->root_group);
|
kfree(cfqd->root_group);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -365,7 +365,6 @@ struct request_queue {
|
|||||||
#ifdef CONFIG_BLK_CGROUP
|
#ifdef CONFIG_BLK_CGROUP
|
||||||
/* XXX: array size hardcoded to avoid include dependency (temporary) */
|
/* XXX: array size hardcoded to avoid include dependency (temporary) */
|
||||||
struct list_head blkg_list;
|
struct list_head blkg_list;
|
||||||
int nr_blkgs;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct queue_limits limits;
|
struct queue_limits limits;
|
||||||
|
Reference in New Issue
Block a user