blkio: Introduce per cfq group weights and vdisktime calculations
o Bring in the per cfq group weight and how vdisktime is calculated for the group. Also bring in the functionality of updating the min_vdisktime of the group service tree. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
@@ -23,7 +23,6 @@ config IOSCHED_DEADLINE
|
|||||||
|
|
||||||
config IOSCHED_CFQ
|
config IOSCHED_CFQ
|
||||||
tristate "CFQ I/O scheduler"
|
tristate "CFQ I/O scheduler"
|
||||||
select BLK_CGROUP
|
|
||||||
default y
|
default y
|
||||||
---help---
|
---help---
|
||||||
The CFQ I/O scheduler tries to distribute bandwidth equally
|
The CFQ I/O scheduler tries to distribute bandwidth equally
|
||||||
@@ -33,6 +32,14 @@ config IOSCHED_CFQ
|
|||||||
|
|
||||||
This is the default I/O scheduler.
|
This is the default I/O scheduler.
|
||||||
|
|
||||||
|
config CFQ_GROUP_IOSCHED
|
||||||
|
bool "CFQ Group Scheduling support"
|
||||||
|
depends on IOSCHED_CFQ && CGROUPS
|
||||||
|
select BLK_CGROUP
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
Enable group IO scheduling in CFQ.
|
||||||
|
|
||||||
choice
|
choice
|
||||||
prompt "Default I/O scheduler"
|
prompt "Default I/O scheduler"
|
||||||
default DEFAULT_CFQ
|
default DEFAULT_CFQ
|
||||||
|
@@ -13,6 +13,7 @@
|
|||||||
#include <linux/rbtree.h>
|
#include <linux/rbtree.h>
|
||||||
#include <linux/ioprio.h>
|
#include <linux/ioprio.h>
|
||||||
#include <linux/blktrace_api.h>
|
#include <linux/blktrace_api.h>
|
||||||
|
#include "blk-cgroup.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tunables
|
* tunables
|
||||||
@@ -49,6 +50,7 @@ static const int cfq_hist_divisor = 4;
|
|||||||
|
|
||||||
#define CFQ_SLICE_SCALE (5)
|
#define CFQ_SLICE_SCALE (5)
|
||||||
#define CFQ_HW_QUEUE_MIN (5)
|
#define CFQ_HW_QUEUE_MIN (5)
|
||||||
|
#define CFQ_SERVICE_SHIFT 12
|
||||||
|
|
||||||
#define RQ_CIC(rq) \
|
#define RQ_CIC(rq) \
|
||||||
((struct cfq_io_context *) (rq)->elevator_private)
|
((struct cfq_io_context *) (rq)->elevator_private)
|
||||||
@@ -79,6 +81,7 @@ struct cfq_rb_root {
|
|||||||
struct rb_node *left;
|
struct rb_node *left;
|
||||||
unsigned count;
|
unsigned count;
|
||||||
u64 min_vdisktime;
|
u64 min_vdisktime;
|
||||||
|
struct rb_node *active;
|
||||||
};
|
};
|
||||||
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
|
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
|
||||||
|
|
||||||
@@ -163,6 +166,7 @@ struct cfq_group {
|
|||||||
|
|
||||||
/* group service_tree key */
|
/* group service_tree key */
|
||||||
u64 vdisktime;
|
u64 vdisktime;
|
||||||
|
unsigned int weight;
|
||||||
bool on_st;
|
bool on_st;
|
||||||
|
|
||||||
/* number of cfqq currently on this group */
|
/* number of cfqq currently on this group */
|
||||||
@@ -434,6 +438,51 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
|
|||||||
return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
|
return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
|
||||||
|
{
|
||||||
|
u64 d = delta << CFQ_SERVICE_SHIFT;
|
||||||
|
|
||||||
|
d = d * BLKIO_WEIGHT_DEFAULT;
|
||||||
|
do_div(d, cfqg->weight);
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
|
||||||
|
{
|
||||||
|
s64 delta = (s64)(vdisktime - min_vdisktime);
|
||||||
|
if (delta > 0)
|
||||||
|
min_vdisktime = vdisktime;
|
||||||
|
|
||||||
|
return min_vdisktime;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
|
||||||
|
{
|
||||||
|
s64 delta = (s64)(vdisktime - min_vdisktime);
|
||||||
|
if (delta < 0)
|
||||||
|
min_vdisktime = vdisktime;
|
||||||
|
|
||||||
|
return min_vdisktime;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_min_vdisktime(struct cfq_rb_root *st)
|
||||||
|
{
|
||||||
|
u64 vdisktime = st->min_vdisktime;
|
||||||
|
struct cfq_group *cfqg;
|
||||||
|
|
||||||
|
if (st->active) {
|
||||||
|
cfqg = rb_entry_cfqg(st->active);
|
||||||
|
vdisktime = cfqg->vdisktime;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (st->left) {
|
||||||
|
cfqg = rb_entry_cfqg(st->left);
|
||||||
|
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
|
||||||
|
}
|
||||||
|
|
||||||
|
st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get averaged number of queues of RT/BE priority.
|
* get averaged number of queues of RT/BE priority.
|
||||||
* average is updated, with a formula that gives more weight to higher numbers,
|
* average is updated, with a formula that gives more weight to higher numbers,
|
||||||
@@ -734,8 +783,12 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|||||||
{
|
{
|
||||||
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
||||||
|
|
||||||
|
if (st->active == &cfqg->rb_node)
|
||||||
|
st->active = NULL;
|
||||||
|
|
||||||
BUG_ON(cfqg->nr_cfqq < 1);
|
BUG_ON(cfqg->nr_cfqq < 1);
|
||||||
cfqg->nr_cfqq--;
|
cfqg->nr_cfqq--;
|
||||||
|
|
||||||
/* If there are other cfq queues under this group, don't delete it */
|
/* If there are other cfq queues under this group, don't delete it */
|
||||||
if (cfqg->nr_cfqq)
|
if (cfqg->nr_cfqq)
|
||||||
return;
|
return;
|
||||||
@@ -1654,10 +1707,14 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
|||||||
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
|
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
|
||||||
{
|
{
|
||||||
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
struct cfq_rb_root *st = &cfqd->grp_service_tree;
|
||||||
|
struct cfq_group *cfqg;
|
||||||
|
|
||||||
if (RB_EMPTY_ROOT(&st->rb))
|
if (RB_EMPTY_ROOT(&st->rb))
|
||||||
return NULL;
|
return NULL;
|
||||||
return cfq_rb_first_group(st);
|
cfqg = cfq_rb_first_group(st);
|
||||||
|
st->active = &cfqg->rb_node;
|
||||||
|
update_min_vdisktime(st);
|
||||||
|
return cfqg;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cfq_choose_cfqg(struct cfq_data *cfqd)
|
static void cfq_choose_cfqg(struct cfq_data *cfqd)
|
||||||
@@ -3150,6 +3207,9 @@ static void *cfq_init_queue(struct request_queue *q)
|
|||||||
*st = CFQ_RB_ROOT;
|
*st = CFQ_RB_ROOT;
|
||||||
RB_CLEAR_NODE(&cfqg->rb_node);
|
RB_CLEAR_NODE(&cfqg->rb_node);
|
||||||
|
|
||||||
|
/* Give preference to root group over other groups */
|
||||||
|
cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Not strictly needed (since RB_ROOT just clears the node and we
|
* Not strictly needed (since RB_ROOT just clears the node and we
|
||||||
* zeroed cfqd on alloc), but better be safe in case someone decides
|
* zeroed cfqd on alloc), but better be safe in case someone decides
|
||||||
|
Reference in New Issue
Block a user