dm cache policy mq: implement writeback_work() and mq_{set,clear}_dirty()
There are now two multiqueues for in cache blocks. A clean one and a dirty one. writeback_work comes from the dirty one. Demotions come from the clean one. There are two benefits: - Performance improvement, since demoting a clean block is a noop. - The cache cleans itself when io load is light. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
committed by
Mike Snitzer
parent
ffcbcb6720
commit
01911c19be
@@ -30,8 +30,10 @@ multiqueue
|
|||||||
|
|
||||||
This policy is the default.
|
This policy is the default.
|
||||||
|
|
||||||
The multiqueue policy has two sets of 16 queues: one set for entries
|
The multiqueue policy has three sets of 16 queues: one set for entries
|
||||||
waiting for the cache and another one for those in the cache.
|
waiting for the cache and another two for those in the cache (a set for
|
||||||
|
clean entries and a set for dirty entries).
|
||||||
|
|
||||||
Cache entries in the queues are aged based on logical time. Entry into
|
Cache entries in the queues are aged based on logical time. Entry into
|
||||||
the cache is based on variable thresholds and queue selection is based
|
the cache is based on variable thresholds and queue selection is based
|
||||||
on hit count on entry. The policy aims to take different cache miss
|
on hit count on entry. The policy aims to take different cache miss
|
||||||
|
@@ -224,6 +224,7 @@ struct entry {
|
|||||||
* FIXME: pack these better
|
* FIXME: pack these better
|
||||||
*/
|
*/
|
||||||
bool in_cache:1;
|
bool in_cache:1;
|
||||||
|
bool dirty:1;
|
||||||
unsigned hit_count;
|
unsigned hit_count;
|
||||||
unsigned generation;
|
unsigned generation;
|
||||||
unsigned tick;
|
unsigned tick;
|
||||||
@@ -238,13 +239,15 @@ struct mq_policy {
|
|||||||
struct io_tracker tracker;
|
struct io_tracker tracker;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We maintain two queues of entries. The cache proper contains
|
* We maintain three queues of entries. The cache proper,
|
||||||
* the currently active mappings. Whereas the pre_cache tracks
|
* consisting of a clean and dirty queue, contains the currently
|
||||||
* blocks that are being hit frequently and potential candidates
|
* active mappings. Whereas the pre_cache tracks blocks that
|
||||||
* for promotion to the cache.
|
* are being hit frequently and potential candidates for promotion
|
||||||
|
* to the cache.
|
||||||
*/
|
*/
|
||||||
struct queue pre_cache;
|
struct queue pre_cache;
|
||||||
struct queue cache;
|
struct queue cache_clean;
|
||||||
|
struct queue cache_dirty;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keeps track of time, incremented by the core. We use this to
|
* Keeps track of time, incremented by the core. We use this to
|
||||||
@@ -324,7 +327,8 @@ static void free_entries(struct mq_policy *mq)
|
|||||||
struct entry *e, *tmp;
|
struct entry *e, *tmp;
|
||||||
|
|
||||||
concat_queue(&mq->free, &mq->pre_cache);
|
concat_queue(&mq->free, &mq->pre_cache);
|
||||||
concat_queue(&mq->free, &mq->cache);
|
concat_queue(&mq->free, &mq->cache_clean);
|
||||||
|
concat_queue(&mq->free, &mq->cache_dirty);
|
||||||
|
|
||||||
list_for_each_entry_safe(e, tmp, &mq->free, list)
|
list_for_each_entry_safe(e, tmp, &mq->free, list)
|
||||||
kmem_cache_free(mq_entry_cache, e);
|
kmem_cache_free(mq_entry_cache, e);
|
||||||
@@ -508,7 +512,8 @@ static void push(struct mq_policy *mq, struct entry *e)
|
|||||||
|
|
||||||
if (e->in_cache) {
|
if (e->in_cache) {
|
||||||
alloc_cblock(mq, e->cblock);
|
alloc_cblock(mq, e->cblock);
|
||||||
queue_push(&mq->cache, queue_level(e), &e->list);
|
queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
|
||||||
|
queue_level(e), &e->list);
|
||||||
} else
|
} else
|
||||||
queue_push(&mq->pre_cache, queue_level(e), &e->list);
|
queue_push(&mq->pre_cache, queue_level(e), &e->list);
|
||||||
}
|
}
|
||||||
@@ -558,7 +563,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
|
|||||||
* of the entries.
|
* of the entries.
|
||||||
*
|
*
|
||||||
* At the moment the threshold is taken by averaging the hit counts of some
|
* At the moment the threshold is taken by averaging the hit counts of some
|
||||||
* of the entries in the cache (the first 20 entries of the first level).
|
* of the entries in the cache (the first 20 entries across all levels in
|
||||||
|
* ascending order, giving preference to the clean entries at each level).
|
||||||
*
|
*
|
||||||
* We can be much cleverer than this though. For example, each promotion
|
* We can be much cleverer than this though. For example, each promotion
|
||||||
* could bump up the threshold helping to prevent churn. Much more to do
|
* could bump up the threshold helping to prevent churn. Much more to do
|
||||||
@@ -580,7 +586,16 @@ static void check_generation(struct mq_policy *mq)
|
|||||||
mq->generation++;
|
mq->generation++;
|
||||||
|
|
||||||
for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
|
for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
|
||||||
head = mq->cache.qs + level;
|
head = mq->cache_clean.qs + level;
|
||||||
|
list_for_each_entry(e, head, list) {
|
||||||
|
nr++;
|
||||||
|
total += e->hit_count;
|
||||||
|
|
||||||
|
if (++count >= MAX_TO_AVERAGE)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
head = mq->cache_dirty.qs + level;
|
||||||
list_for_each_entry(e, head, list) {
|
list_for_each_entry(e, head, list) {
|
||||||
nr++;
|
nr++;
|
||||||
total += e->hit_count;
|
total += e->hit_count;
|
||||||
@@ -633,19 +648,28 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
|
|||||||
* - set the hit count to a hard coded value other than 1, eg, is it better
|
* - set the hit count to a hard coded value other than 1, eg, is it better
|
||||||
* if it goes in at level 2?
|
* if it goes in at level 2?
|
||||||
*/
|
*/
|
||||||
static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
|
static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock)
|
||||||
{
|
{
|
||||||
dm_cblock_t result;
|
struct entry *demoted = pop(mq, &mq->cache_clean);
|
||||||
struct entry *demoted = pop(mq, &mq->cache);
|
|
||||||
|
|
||||||
BUG_ON(!demoted);
|
if (!demoted)
|
||||||
result = demoted->cblock;
|
/*
|
||||||
|
* We could get a block from mq->cache_dirty, but that
|
||||||
|
* would add extra latency to the triggering bio as it
|
||||||
|
* waits for the writeback. Better to not promote this
|
||||||
|
* time and hope there's a clean block next time this block
|
||||||
|
* is hit.
|
||||||
|
*/
|
||||||
|
return -ENOSPC;
|
||||||
|
|
||||||
|
*cblock = demoted->cblock;
|
||||||
*oblock = demoted->oblock;
|
*oblock = demoted->oblock;
|
||||||
demoted->in_cache = false;
|
demoted->in_cache = false;
|
||||||
|
demoted->dirty = false;
|
||||||
demoted->hit_count = 1;
|
demoted->hit_count = 1;
|
||||||
push(mq, demoted);
|
push(mq, demoted);
|
||||||
|
|
||||||
return result;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -705,11 +729,16 @@ static int cache_entry_found(struct mq_policy *mq,
|
|||||||
static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
|
static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
|
||||||
struct policy_result *result)
|
struct policy_result *result)
|
||||||
{
|
{
|
||||||
|
int r;
|
||||||
dm_cblock_t cblock;
|
dm_cblock_t cblock;
|
||||||
|
|
||||||
if (find_free_cblock(mq, &cblock) == -ENOSPC) {
|
if (find_free_cblock(mq, &cblock) == -ENOSPC) {
|
||||||
result->op = POLICY_REPLACE;
|
result->op = POLICY_REPLACE;
|
||||||
cblock = demote_cblock(mq, &result->old_oblock);
|
r = demote_cblock(mq, &result->old_oblock, &cblock);
|
||||||
|
if (r) {
|
||||||
|
result->op = POLICY_MISS;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
result->op = POLICY_NEW;
|
result->op = POLICY_NEW;
|
||||||
|
|
||||||
@@ -717,6 +746,7 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
|
|||||||
|
|
||||||
del(mq, e);
|
del(mq, e);
|
||||||
e->in_cache = true;
|
e->in_cache = true;
|
||||||
|
e->dirty = false;
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -760,6 +790,7 @@ static void insert_in_pre_cache(struct mq_policy *mq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
e->in_cache = false;
|
e->in_cache = false;
|
||||||
|
e->dirty = false;
|
||||||
e->oblock = oblock;
|
e->oblock = oblock;
|
||||||
e->hit_count = 1;
|
e->hit_count = 1;
|
||||||
e->generation = mq->generation;
|
e->generation = mq->generation;
|
||||||
@@ -787,6 +818,7 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
|
|||||||
e->oblock = oblock;
|
e->oblock = oblock;
|
||||||
e->cblock = cblock;
|
e->cblock = cblock;
|
||||||
e->in_cache = true;
|
e->in_cache = true;
|
||||||
|
e->dirty = false;
|
||||||
e->hit_count = 1;
|
e->hit_count = 1;
|
||||||
e->generation = mq->generation;
|
e->generation = mq->generation;
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
@@ -917,6 +949,40 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: __mq_set_clear_dirty can block due to mutex.
|
||||||
|
* Ideally a policy should not block in functions called
|
||||||
|
* from the map() function. Explore using RCU.
|
||||||
|
*/
|
||||||
|
static void __mq_set_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool set)
|
||||||
|
{
|
||||||
|
struct mq_policy *mq = to_mq_policy(p);
|
||||||
|
struct entry *e;
|
||||||
|
|
||||||
|
mutex_lock(&mq->lock);
|
||||||
|
e = hash_lookup(mq, oblock);
|
||||||
|
if (!e)
|
||||||
|
DMWARN("__mq_set_clear_dirty called for a block that isn't in the cache");
|
||||||
|
else {
|
||||||
|
BUG_ON(!e->in_cache);
|
||||||
|
|
||||||
|
del(mq, e);
|
||||||
|
e->dirty = set;
|
||||||
|
push(mq, e);
|
||||||
|
}
|
||||||
|
mutex_unlock(&mq->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
|
||||||
|
{
|
||||||
|
__mq_set_clear_dirty(p, oblock, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
|
||||||
|
{
|
||||||
|
__mq_set_clear_dirty(p, oblock, false);
|
||||||
|
}
|
||||||
|
|
||||||
static int mq_load_mapping(struct dm_cache_policy *p,
|
static int mq_load_mapping(struct dm_cache_policy *p,
|
||||||
dm_oblock_t oblock, dm_cblock_t cblock,
|
dm_oblock_t oblock, dm_cblock_t cblock,
|
||||||
uint32_t hint, bool hint_valid)
|
uint32_t hint, bool hint_valid)
|
||||||
@@ -931,6 +997,7 @@ static int mq_load_mapping(struct dm_cache_policy *p,
|
|||||||
e->cblock = cblock;
|
e->cblock = cblock;
|
||||||
e->oblock = oblock;
|
e->oblock = oblock;
|
||||||
e->in_cache = true;
|
e->in_cache = true;
|
||||||
|
e->dirty = false; /* this gets corrected in a minute */
|
||||||
e->hit_count = hint_valid ? hint : 1;
|
e->hit_count = hint_valid ? hint : 1;
|
||||||
e->generation = mq->generation;
|
e->generation = mq->generation;
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
@@ -949,7 +1016,14 @@ static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
|
|||||||
mutex_lock(&mq->lock);
|
mutex_lock(&mq->lock);
|
||||||
|
|
||||||
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
||||||
list_for_each_entry(e, &mq->cache.qs[level], list) {
|
list_for_each_entry(e, &mq->cache_clean.qs[level], list) {
|
||||||
|
r = fn(context, e->cblock, e->oblock, e->hit_count);
|
||||||
|
if (r)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (level = 0; level < NR_QUEUE_LEVELS; level++)
|
||||||
|
list_for_each_entry(e, &mq->cache_dirty.qs[level], list) {
|
||||||
r = fn(context, e->cblock, e->oblock, e->hit_count);
|
r = fn(context, e->cblock, e->oblock, e->hit_count);
|
||||||
if (r)
|
if (r)
|
||||||
goto out;
|
goto out;
|
||||||
@@ -974,11 +1048,41 @@ static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
|
|||||||
|
|
||||||
del(mq, e);
|
del(mq, e);
|
||||||
e->in_cache = false;
|
e->in_cache = false;
|
||||||
|
e->dirty = false;
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
|
|
||||||
mutex_unlock(&mq->lock);
|
mutex_unlock(&mq->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
|
||||||
|
dm_cblock_t *cblock)
|
||||||
|
{
|
||||||
|
struct entry *e = pop(mq, &mq->cache_dirty);
|
||||||
|
|
||||||
|
if (!e)
|
||||||
|
return -ENODATA;
|
||||||
|
|
||||||
|
*oblock = e->oblock;
|
||||||
|
*cblock = e->cblock;
|
||||||
|
e->dirty = false;
|
||||||
|
push(mq, e);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
|
||||||
|
dm_cblock_t *cblock)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
struct mq_policy *mq = to_mq_policy(p);
|
||||||
|
|
||||||
|
mutex_lock(&mq->lock);
|
||||||
|
r = __mq_writeback_work(mq, oblock, cblock);
|
||||||
|
mutex_unlock(&mq->lock);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static void force_mapping(struct mq_policy *mq,
|
static void force_mapping(struct mq_policy *mq,
|
||||||
dm_oblock_t current_oblock, dm_oblock_t new_oblock)
|
dm_oblock_t current_oblock, dm_oblock_t new_oblock)
|
||||||
{
|
{
|
||||||
@@ -988,6 +1092,7 @@ static void force_mapping(struct mq_policy *mq,
|
|||||||
|
|
||||||
del(mq, e);
|
del(mq, e);
|
||||||
e->oblock = new_oblock;
|
e->oblock = new_oblock;
|
||||||
|
e->dirty = true;
|
||||||
push(mq, e);
|
push(mq, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1063,10 +1168,12 @@ static void init_policy_functions(struct mq_policy *mq)
|
|||||||
mq->policy.destroy = mq_destroy;
|
mq->policy.destroy = mq_destroy;
|
||||||
mq->policy.map = mq_map;
|
mq->policy.map = mq_map;
|
||||||
mq->policy.lookup = mq_lookup;
|
mq->policy.lookup = mq_lookup;
|
||||||
|
mq->policy.set_dirty = mq_set_dirty;
|
||||||
|
mq->policy.clear_dirty = mq_clear_dirty;
|
||||||
mq->policy.load_mapping = mq_load_mapping;
|
mq->policy.load_mapping = mq_load_mapping;
|
||||||
mq->policy.walk_mappings = mq_walk_mappings;
|
mq->policy.walk_mappings = mq_walk_mappings;
|
||||||
mq->policy.remove_mapping = mq_remove_mapping;
|
mq->policy.remove_mapping = mq_remove_mapping;
|
||||||
mq->policy.writeback_work = NULL;
|
mq->policy.writeback_work = mq_writeback_work;
|
||||||
mq->policy.force_mapping = mq_force_mapping;
|
mq->policy.force_mapping = mq_force_mapping;
|
||||||
mq->policy.residency = mq_residency;
|
mq->policy.residency = mq_residency;
|
||||||
mq->policy.tick = mq_tick;
|
mq->policy.tick = mq_tick;
|
||||||
@@ -1099,7 +1206,9 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
|
|||||||
mq->find_free_last_word = 0;
|
mq->find_free_last_word = 0;
|
||||||
|
|
||||||
queue_init(&mq->pre_cache);
|
queue_init(&mq->pre_cache);
|
||||||
queue_init(&mq->cache);
|
queue_init(&mq->cache_clean);
|
||||||
|
queue_init(&mq->cache_dirty);
|
||||||
|
|
||||||
mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
|
mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
|
||||||
|
|
||||||
mq->nr_entries = 2 * from_cblock(cache_size);
|
mq->nr_entries = 2 * from_cblock(cache_size);
|
||||||
|
Reference in New Issue
Block a user