Merge tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: - Two NVMe pull requests: - ana log parse fix from Anton - nvme quirks support for Apple devices from Ben - fix missing bio completion tracing for multipath stack devices from Hannes and Mikhail - IP TOS settings for nvme rdma and tcp transports from Israel - rq_dma_dir cleanups from Israel - tracing for Get LBA Status command from Minwoo - Some nvme-tcp cleanups from Minwoo, Potnuri and Myself - Some consolidation between the fabrics transports for handling the CAP register - reset race with ns scanning fix for fabrics (move fabrics commands to a dedicated request queue with a different lifetime from the admin request queue)." - controller reset and namespace scan races fixes - nvme discovery log change uevent support - naming improvements from Keith - multiple discovery controllers reject fix from James - some regular cleanups from various people - Series fixing (and re-fixing) null_blk debug printing and nr_devices checks (André) - A few pull requests from Song, with fixes from Andy, Guoqing, Guilherme, Neil, Nigel, and Yufen. - REQ_OP_ZONE_RESET_ALL support (Chaitanya) - Bio merge handling unification (Christoph) - Pick default elevator correctly for devices with special needs (Damien) - Block stats fixes (Hou) - Timeout and support devices nbd fixes (Mike) - Series fixing races around elevator switching and device add/remove (Ming) - sed-opal cleanups (Revanth) - Per device weight support for BFQ (Fam) - Support for blk-iocost, a new model that can properly account cost of IO workloads. (Tejun) - blk-cgroup writeback fixes (Tejun) - paride queue init fixes (zhengbin) - blk_set_runtime_active() cleanup (Stanley) - Block segment mapping optimizations (Bart) - lightnvm fixes (Hans/Minwoo/YueHaibing) - Various little fixes and cleanups * tag 'for-5.4/block-2019-09-16' of git://git.kernel.dk/linux-block: (186 commits) null_blk: format pr_* logs with pr_fmt null_blk: match the type of parameter nr_devices null_blk: do not fail the module load with zero devices block: also check RQF_STATS in blk_mq_need_time_stamp() block: make rq sector size accessible for block stats bfq: Fix bfq linkage error raid5: use bio_end_sector in r5_next_bio raid5: remove STRIPE_OPS_REQ_PENDING md: add feature flag MD_FEATURE_RAID0_LAYOUT md/raid0: avoid RAID0 data corruption due to layout confusion. raid5: don't set STRIPE_HANDLE to stripe which is in batch list raid5: don't increment read_errors on EILSEQ return nvmet: fix a wrong error status returned in error log page nvme: send discovery log page change events to userspace nvme: add uevent variables for controller devices nvme: enable aen regardless of the presence of I/O queues nvme-fabrics: allow discovery subsystems accept a kato nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery() nvme: Remove redundant assignment of cq vector nvme: Assign subsys instance from first ctrl ...
This commit is contained in:
@@ -26,6 +26,9 @@ menuconfig BLOCK
|
||||
|
||||
if BLOCK
|
||||
|
||||
config BLK_RQ_ALLOC_TIME
|
||||
bool
|
||||
|
||||
config BLK_SCSI_REQUEST
|
||||
bool
|
||||
|
||||
@@ -132,6 +135,16 @@ config BLK_CGROUP_IOLATENCY
|
||||
|
||||
Note, this is an experimental interface and could be changed someday.
|
||||
|
||||
config BLK_CGROUP_IOCOST
|
||||
bool "Enable support for cost model based cgroup IO controller"
|
||||
depends on BLK_CGROUP=y
|
||||
select BLK_RQ_ALLOC_TIME
|
||||
---help---
|
||||
Enabling this option enables the .weight interface for cost
|
||||
model based proportional IO control. The IO controller
|
||||
distributes IO capacity between different groups based on
|
||||
their share of the overall weight distribution.
|
||||
|
||||
config BLK_WBT_MQ
|
||||
bool "Multiqueue writeback throttling"
|
||||
default y
|
||||
|
@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
|
||||
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
|
||||
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
|
||||
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
|
||||
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
|
@@ -501,11 +501,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd)
|
||||
kfree(cpd_to_bfqgd(cpd));
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
|
||||
static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
|
||||
bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
|
||||
if (!bfqg)
|
||||
return NULL;
|
||||
|
||||
@@ -904,7 +905,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
|
||||
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
|
||||
}
|
||||
|
||||
static int bfq_io_show_weight(struct seq_file *sf, void *v)
|
||||
static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
|
||||
struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
|
||||
@@ -918,6 +919,60 @@ static int bfq_io_show_weight(struct seq_file *sf, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bfqg_prfill_weight_device(struct seq_file *sf,
|
||||
struct blkg_policy_data *pd, int off)
|
||||
{
|
||||
struct bfq_group *bfqg = pd_to_bfqg(pd);
|
||||
|
||||
if (!bfqg->entity.dev_weight)
|
||||
return 0;
|
||||
return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
|
||||
}
|
||||
|
||||
static int bfq_io_show_weight(struct seq_file *sf, void *v)
|
||||
{
|
||||
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
|
||||
struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
|
||||
|
||||
seq_printf(sf, "default %u\n", bfqgd->weight);
|
||||
blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
|
||||
&blkcg_policy_bfq, 0, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
|
||||
{
|
||||
weight = dev_weight ?: weight;
|
||||
|
||||
bfqg->entity.dev_weight = dev_weight;
|
||||
/*
|
||||
* Setting the prio_changed flag of the entity
|
||||
* to 1 with new_weight == weight would re-set
|
||||
* the value of the weight to its ioprio mapping.
|
||||
* Set the flag only if necessary.
|
||||
*/
|
||||
if ((unsigned short)weight != bfqg->entity.new_weight) {
|
||||
bfqg->entity.new_weight = (unsigned short)weight;
|
||||
/*
|
||||
* Make sure that the above new value has been
|
||||
* stored in bfqg->entity.new_weight before
|
||||
* setting the prio_changed flag. In fact,
|
||||
* this flag may be read asynchronously (in
|
||||
* critical sections protected by a different
|
||||
* lock than that held here), and finding this
|
||||
* flag set may cause the execution of the code
|
||||
* for updating parameters whose value may
|
||||
* depend also on bfqg->entity.new_weight (in
|
||||
* __bfq_entity_update_weight_prio).
|
||||
* This barrier makes sure that the new value
|
||||
* of bfqg->entity.new_weight is correctly
|
||||
* seen in that code.
|
||||
*/
|
||||
smp_wmb();
|
||||
bfqg->entity.prio_changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
|
||||
struct cftype *cftype,
|
||||
u64 val)
|
||||
@@ -936,53 +991,70 @@ static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
|
||||
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
|
||||
struct bfq_group *bfqg = blkg_to_bfqg(blkg);
|
||||
|
||||
if (!bfqg)
|
||||
continue;
|
||||
/*
|
||||
* Setting the prio_changed flag of the entity
|
||||
* to 1 with new_weight == weight would re-set
|
||||
* the value of the weight to its ioprio mapping.
|
||||
* Set the flag only if necessary.
|
||||
*/
|
||||
if ((unsigned short)val != bfqg->entity.new_weight) {
|
||||
bfqg->entity.new_weight = (unsigned short)val;
|
||||
/*
|
||||
* Make sure that the above new value has been
|
||||
* stored in bfqg->entity.new_weight before
|
||||
* setting the prio_changed flag. In fact,
|
||||
* this flag may be read asynchronously (in
|
||||
* critical sections protected by a different
|
||||
* lock than that held here), and finding this
|
||||
* flag set may cause the execution of the code
|
||||
* for updating parameters whose value may
|
||||
* depend also on bfqg->entity.new_weight (in
|
||||
* __bfq_entity_update_weight_prio).
|
||||
* This barrier makes sure that the new value
|
||||
* of bfqg->entity.new_weight is correctly
|
||||
* seen in that code.
|
||||
*/
|
||||
smp_wmb();
|
||||
bfqg->entity.prio_changed = 1;
|
||||
}
|
||||
if (bfqg)
|
||||
bfq_group_set_weight(bfqg, val, 0);
|
||||
}
|
||||
spin_unlock_irq(&blkcg->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes,
|
||||
loff_t off)
|
||||
{
|
||||
int ret;
|
||||
struct blkg_conf_ctx ctx;
|
||||
struct blkcg *blkcg = css_to_blkcg(of_css(of));
|
||||
struct bfq_group *bfqg;
|
||||
u64 v;
|
||||
|
||||
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sscanf(ctx.body, "%llu", &v) == 1) {
|
||||
/* require "default" on dfl */
|
||||
ret = -ERANGE;
|
||||
if (!v)
|
||||
goto out;
|
||||
} else if (!strcmp(strim(ctx.body), "default")) {
|
||||
v = 0;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bfqg = blkg_to_bfqg(ctx.blkg);
|
||||
|
||||
ret = -ERANGE;
|
||||
if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
|
||||
bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
blkg_conf_finish(&ctx);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes,
|
||||
loff_t off)
|
||||
{
|
||||
u64 weight;
|
||||
/* First unsigned long found in the file is used */
|
||||
int ret = kstrtoull(strim(buf), 0, &weight);
|
||||
char *endp;
|
||||
int ret;
|
||||
u64 v;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
buf = strim(buf);
|
||||
|
||||
ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight);
|
||||
return ret ?: nbytes;
|
||||
/* "WEIGHT" or "default WEIGHT" sets the default weight */
|
||||
v = simple_strtoull(buf, &endp, 0);
|
||||
if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
|
||||
ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
return bfq_io_set_device_weight(of, buf, nbytes, off);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BFQ_CGROUP_DEBUG
|
||||
@@ -1141,9 +1213,15 @@ struct cftype bfq_blkcg_legacy_files[] = {
|
||||
{
|
||||
.name = "bfq.weight",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = bfq_io_show_weight,
|
||||
.seq_show = bfq_io_show_weight_legacy,
|
||||
.write_u64 = bfq_io_set_weight_legacy,
|
||||
},
|
||||
{
|
||||
.name = "bfq.weight_device",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = bfq_io_show_weight,
|
||||
.write = bfq_io_set_weight,
|
||||
},
|
||||
|
||||
/* statistics, covers only the tasks in the bfqg */
|
||||
{
|
||||
|
@@ -168,6 +168,9 @@ struct bfq_entity {
|
||||
/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
|
||||
int budget;
|
||||
|
||||
/* device weight, if non-zero, it overrides the default weight of
|
||||
* bfq_group_data */
|
||||
int dev_weight;
|
||||
/* weight of the queue */
|
||||
int weight;
|
||||
/* next weight if a change is in progress */
|
||||
|
@@ -744,6 +744,8 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Matches the smp_wmb() in bfq_group_set_weight. */
|
||||
smp_rmb();
|
||||
old_st->wsum -= entity->weight;
|
||||
|
||||
if (entity->new_weight != entity->orig_weight) {
|
||||
|
60
block/bio.c
60
block/bio.c
@@ -646,25 +646,20 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the @page can be added to the current segment(@bv), and make
|
||||
* sure to call it only if page_is_mergeable(@bv, @page) is true
|
||||
*/
|
||||
static bool can_add_page_to_seg(struct request_queue *q,
|
||||
struct bio_vec *bv, struct page *page, unsigned len,
|
||||
unsigned offset)
|
||||
static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned len, unsigned offset,
|
||||
bool *same_page)
|
||||
{
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
|
||||
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
|
||||
|
||||
if ((addr1 | mask) != (addr2 | mask))
|
||||
return false;
|
||||
|
||||
if (bv->bv_len + len > queue_max_segment_size(q))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return __bio_try_merge_page(bio, page, len, offset, same_page);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -674,7 +669,7 @@ static bool can_add_page_to_seg(struct request_queue *q,
|
||||
* @page: page to add
|
||||
* @len: vec entry length
|
||||
* @offset: vec entry offset
|
||||
* @put_same_page: put the page if it is same with last added page
|
||||
* @same_page: return if the merge happen inside the same page
|
||||
*
|
||||
* Attempt to add a page to the bio_vec maplist. This can fail for a
|
||||
* number of reasons, such as the bio being full or target block device
|
||||
@@ -685,10 +680,9 @@ static bool can_add_page_to_seg(struct request_queue *q,
|
||||
*/
|
||||
static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset,
|
||||
bool put_same_page)
|
||||
bool *same_page)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
bool same_page = false;
|
||||
|
||||
/*
|
||||
* cloned bio must not modify vec list
|
||||
@@ -700,28 +694,16 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
return 0;
|
||||
|
||||
if (bio->bi_vcnt > 0) {
|
||||
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
|
||||
if (page == bvec->bv_page &&
|
||||
offset == bvec->bv_offset + bvec->bv_len) {
|
||||
if (put_same_page)
|
||||
put_page(page);
|
||||
bvec->bv_len += len;
|
||||
goto done;
|
||||
}
|
||||
if (bio_try_merge_pc_page(q, bio, page, len, offset, same_page))
|
||||
return len;
|
||||
|
||||
/*
|
||||
* If the queue doesn't support SG gaps and adding this
|
||||
* offset would create a gap, disallow it.
|
||||
* If the queue doesn't support SG gaps and adding this segment
|
||||
* would create a gap, disallow it.
|
||||
*/
|
||||
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
|
||||
if (bvec_gap_to_prev(q, bvec, offset))
|
||||
return 0;
|
||||
|
||||
if (page_is_mergeable(bvec, page, len, offset, &same_page) &&
|
||||
can_add_page_to_seg(q, bvec, page, len, offset)) {
|
||||
bvec->bv_len += len;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (bio_full(bio, len))
|
||||
@@ -735,7 +717,6 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
bvec->bv_len = len;
|
||||
bvec->bv_offset = offset;
|
||||
bio->bi_vcnt++;
|
||||
done:
|
||||
bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
}
|
||||
@@ -743,7 +724,8 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
|
||||
struct page *page, unsigned int len, unsigned int offset)
|
||||
{
|
||||
return __bio_add_pc_page(q, bio, page, len, offset, false);
|
||||
bool same_page = false;
|
||||
return __bio_add_pc_page(q, bio, page, len, offset, &same_page);
|
||||
}
|
||||
EXPORT_SYMBOL(bio_add_pc_page);
|
||||
|
||||
@@ -806,6 +788,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
|
||||
|
||||
bio->bi_iter.bi_size += len;
|
||||
bio->bi_vcnt++;
|
||||
|
||||
if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
|
||||
bio_set_flag(bio, BIO_WORKINGSET);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__bio_add_page);
|
||||
|
||||
@@ -1384,13 +1369,17 @@ struct bio *bio_map_user_iov(struct request_queue *q,
|
||||
for (j = 0; j < npages; j++) {
|
||||
struct page *page = pages[j];
|
||||
unsigned int n = PAGE_SIZE - offs;
|
||||
bool same_page = false;
|
||||
|
||||
if (n > bytes)
|
||||
n = bytes;
|
||||
|
||||
if (!__bio_add_pc_page(q, bio, page, n, offs,
|
||||
true))
|
||||
&same_page)) {
|
||||
if (same_page)
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
|
||||
added += n;
|
||||
bytes -= n;
|
||||
@@ -1521,7 +1510,6 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
bio->bi_end_io = bio_map_kern_endio;
|
||||
return bio;
|
||||
}
|
||||
EXPORT_SYMBOL(bio_map_kern);
|
||||
|
||||
static void bio_copy_kern_endio(struct bio *bio)
|
||||
{
|
||||
@@ -1842,8 +1830,8 @@ EXPORT_SYMBOL(bio_endio);
|
||||
* @bio, and updates @bio to represent the remaining sectors.
|
||||
*
|
||||
* Unless this is a discard request the newly allocated bio will point
|
||||
* to @bio's bi_io_vec; it is the caller's responsibility to ensure that
|
||||
* @bio is not freed before the split.
|
||||
* to @bio's bi_io_vec. It is the caller's responsibility to ensure that
|
||||
* neither @bio nor @bs are freed before the split bio.
|
||||
*/
|
||||
struct bio *bio_split(struct bio *bio, int sectors,
|
||||
gfp_t gfp, struct bio_set *bs)
|
||||
|
@@ -175,7 +175,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
continue;
|
||||
|
||||
/* alloc per-policy data and attach it to blkg */
|
||||
pd = pol->pd_alloc_fn(gfp_mask, q->node);
|
||||
pd = pol->pd_alloc_fn(gfp_mask, q, blkcg);
|
||||
if (!pd)
|
||||
goto err_free;
|
||||
|
||||
@@ -753,6 +753,44 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
|
||||
return __blkg_lookup(blkcg, q, true /* update_hint */);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
* @inputp: input string pointer
|
||||
*
|
||||
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
|
||||
* from @input and get and return the matching gendisk. *@inputp is
|
||||
* updated to point past the device node prefix. Returns an ERR_PTR()
|
||||
* value on error.
|
||||
*
|
||||
* Use this function iff blkg_conf_prep() can't be used for some reason.
|
||||
*/
|
||||
struct gendisk *blkcg_conf_get_disk(char **inputp)
|
||||
{
|
||||
char *input = *inputp;
|
||||
unsigned int major, minor;
|
||||
struct gendisk *disk;
|
||||
int key_len, part;
|
||||
|
||||
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
input += key_len;
|
||||
if (!isspace(*input))
|
||||
return ERR_PTR(-EINVAL);
|
||||
input = skip_spaces(input);
|
||||
|
||||
disk = get_gendisk(MKDEV(major, minor), &part);
|
||||
if (!disk)
|
||||
return ERR_PTR(-ENODEV);
|
||||
if (part) {
|
||||
put_disk_and_module(disk);
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
*inputp = input;
|
||||
return disk;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_conf_prep - parse and prepare for per-blkg config update
|
||||
* @blkcg: target block cgroup
|
||||
@@ -772,25 +810,11 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
struct blkcg_gq *blkg;
|
||||
unsigned int major, minor;
|
||||
int key_len, part, ret;
|
||||
char *body;
|
||||
int ret;
|
||||
|
||||
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
|
||||
return -EINVAL;
|
||||
|
||||
body = input + key_len;
|
||||
if (!isspace(*body))
|
||||
return -EINVAL;
|
||||
body = skip_spaces(body);
|
||||
|
||||
disk = get_gendisk(MKDEV(major, minor), &part);
|
||||
if (!disk)
|
||||
return -ENODEV;
|
||||
if (part) {
|
||||
ret = -ENODEV;
|
||||
goto fail;
|
||||
}
|
||||
disk = blkcg_conf_get_disk(&input);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
q = disk->queue;
|
||||
|
||||
@@ -856,7 +880,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
|
||||
success:
|
||||
ctx->disk = disk;
|
||||
ctx->blkg = blkg;
|
||||
ctx->body = body;
|
||||
ctx->body = input;
|
||||
return 0;
|
||||
|
||||
fail_unlock:
|
||||
@@ -876,6 +900,7 @@ fail:
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_prep);
|
||||
|
||||
/**
|
||||
* blkg_conf_finish - finish up per-blkg config update
|
||||
@@ -891,6 +916,7 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
|
||||
rcu_read_unlock();
|
||||
put_disk_and_module(ctx->disk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_conf_finish);
|
||||
|
||||
static int blkcg_print_stat(struct seq_file *sf, void *v)
|
||||
{
|
||||
@@ -1346,7 +1372,7 @@ int blkcg_activate_policy(struct request_queue *q,
|
||||
blk_mq_freeze_queue(q);
|
||||
pd_prealloc:
|
||||
if (!pd_prealloc) {
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
|
||||
if (!pd_prealloc) {
|
||||
ret = -ENOMEM;
|
||||
goto out_bypass_end;
|
||||
@@ -1362,7 +1388,7 @@ pd_prealloc:
|
||||
if (blkg->pd[pol->plid])
|
||||
continue;
|
||||
|
||||
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q->node);
|
||||
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
|
||||
if (!pd)
|
||||
swap(pd, pd_prealloc);
|
||||
if (!pd) {
|
||||
@@ -1475,7 +1501,8 @@ int blkcg_policy_register(struct blkcg_policy *pol)
|
||||
blkcg->cpd[pol->plid] = cpd;
|
||||
cpd->blkcg = blkcg;
|
||||
cpd->plid = pol->plid;
|
||||
pol->cpd_init_fn(cpd);
|
||||
if (pol->cpd_init_fn)
|
||||
pol->cpd_init_fn(cpd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/psi.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/block.h>
|
||||
@@ -129,6 +130,7 @@ static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(DISCARD),
|
||||
REQ_OP_NAME(SECURE_ERASE),
|
||||
REQ_OP_NAME(ZONE_RESET),
|
||||
REQ_OP_NAME(ZONE_RESET_ALL),
|
||||
REQ_OP_NAME(WRITE_SAME),
|
||||
REQ_OP_NAME(WRITE_ZEROES),
|
||||
REQ_OP_NAME(SCSI_IN),
|
||||
@@ -344,7 +346,8 @@ void blk_cleanup_queue(struct request_queue *q)
|
||||
|
||||
/*
|
||||
* Drain all requests queued before DYING marking. Set DEAD flag to
|
||||
* prevent that q->request_fn() gets invoked after draining finished.
|
||||
* prevent that blk_mq_run_hw_queues() accesses the hardware queues
|
||||
* after draining finished.
|
||||
*/
|
||||
blk_freeze_queue(q);
|
||||
|
||||
@@ -479,7 +482,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&q->queue_head);
|
||||
q->last_merge = NULL;
|
||||
|
||||
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
|
||||
@@ -518,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
|
||||
mutex_init(&q->blk_trace_mutex);
|
||||
#endif
|
||||
mutex_init(&q->sysfs_lock);
|
||||
mutex_init(&q->sysfs_dir_lock);
|
||||
spin_lock_init(&q->queue_lock);
|
||||
|
||||
init_waitqueue_head(&q->mq_freeze_wq);
|
||||
@@ -601,6 +604,7 @@ bool bio_attempt_back_merge(struct request *req, struct bio *bio,
|
||||
return false;
|
||||
|
||||
trace_block_bio_backmerge(req->q, req, bio);
|
||||
rq_qos_merge(req->q, req, bio);
|
||||
|
||||
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
||||
blk_rq_set_mixed_merge(req);
|
||||
@@ -622,6 +626,7 @@ bool bio_attempt_front_merge(struct request *req, struct bio *bio,
|
||||
return false;
|
||||
|
||||
trace_block_bio_frontmerge(req->q, req, bio);
|
||||
rq_qos_merge(req->q, req, bio);
|
||||
|
||||
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
||||
blk_rq_set_mixed_merge(req);
|
||||
@@ -647,6 +652,8 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
|
||||
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
|
||||
goto no_merge;
|
||||
|
||||
rq_qos_merge(q, req, bio);
|
||||
|
||||
req->biotail->bi_next = bio;
|
||||
req->biotail = bio;
|
||||
req->__data_len += bio->bi_iter.bi_size;
|
||||
@@ -931,6 +938,10 @@ generic_make_request_checks(struct bio *bio)
|
||||
if (!blk_queue_is_zoned(q))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
if (!q->limits.max_write_zeroes_sectors)
|
||||
goto not_supported;
|
||||
@@ -1128,6 +1139,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
*/
|
||||
blk_qc_t submit_bio(struct bio *bio)
|
||||
{
|
||||
bool workingset_read = false;
|
||||
unsigned long pflags;
|
||||
blk_qc_t ret;
|
||||
|
||||
if (blkcg_punt_bio_submit(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
@@ -1146,6 +1161,8 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
if (op_is_write(bio_op(bio))) {
|
||||
count_vm_events(PGPGOUT, count);
|
||||
} else {
|
||||
if (bio_flagged(bio, BIO_WORKINGSET))
|
||||
workingset_read = true;
|
||||
task_io_account_read(bio->bi_iter.bi_size);
|
||||
count_vm_events(PGPGIN, count);
|
||||
}
|
||||
@@ -1160,7 +1177,21 @@ blk_qc_t submit_bio(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
return generic_make_request(bio);
|
||||
/*
|
||||
* If we're reading data that is part of the userspace
|
||||
* workingset, count submission time as memory stall. When the
|
||||
* device is congested, or the submitting cgroup IO-throttled,
|
||||
* submission can be a significant part of overall IO time.
|
||||
*/
|
||||
if (workingset_read)
|
||||
psi_memstall_enter(&pflags);
|
||||
|
||||
ret = generic_make_request(bio);
|
||||
|
||||
if (workingset_read)
|
||||
psi_memstall_leave(&pflags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio);
|
||||
|
||||
|
2457
block/blk-iocost.c
Normal file
2457
block/blk-iocost.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -725,7 +725,7 @@ int blk_iolatency_init(struct request_queue *q)
|
||||
return -ENOMEM;
|
||||
|
||||
rqos = &blkiolat->rqos;
|
||||
rqos->id = RQ_QOS_CGROUP;
|
||||
rqos->id = RQ_QOS_LATENCY;
|
||||
rqos->ops = &blkcg_iolatency_ops;
|
||||
rqos->q = q;
|
||||
|
||||
@@ -934,11 +934,13 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
|
||||
}
|
||||
|
||||
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
|
||||
struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct iolatency_grp *iolat;
|
||||
|
||||
iolat = kzalloc_node(sizeof(*iolat), gfp, node);
|
||||
iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
|
||||
if (!iolat)
|
||||
return NULL;
|
||||
iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
|
||||
|
@@ -132,19 +132,32 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
|
||||
return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the maximum number of sectors from the start of a bio that may be
|
||||
* submitted as a single request to a block device. If enough sectors remain,
|
||||
* align the end to the physical block size. Otherwise align the end to the
|
||||
* logical block size. This approach minimizes the number of non-aligned
|
||||
* requests that are submitted to a block device if the start of a bio is not
|
||||
* aligned to a physical block boundary.
|
||||
*/
|
||||
static inline unsigned get_max_io_size(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
|
||||
unsigned mask = queue_logical_block_size(q) - 1;
|
||||
unsigned max_sectors = sectors;
|
||||
unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
|
||||
unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
|
||||
unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);
|
||||
|
||||
/* aligned to logical block size */
|
||||
sectors &= ~(mask >> 9);
|
||||
max_sectors += start_offset;
|
||||
max_sectors &= ~(pbs - 1);
|
||||
if (max_sectors > start_offset)
|
||||
return max_sectors - start_offset;
|
||||
|
||||
return sectors;
|
||||
return sectors & (lbs - 1);
|
||||
}
|
||||
|
||||
static unsigned get_max_segment_size(struct request_queue *q,
|
||||
static unsigned get_max_segment_size(const struct request_queue *q,
|
||||
unsigned offset)
|
||||
{
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
@@ -157,26 +170,41 @@ static unsigned get_max_segment_size(struct request_queue *q,
|
||||
queue_max_segment_size(q));
|
||||
}
|
||||
|
||||
/*
|
||||
* Split the bvec @bv into segments, and update all kinds of
|
||||
* variables.
|
||||
/**
|
||||
* bvec_split_segs - verify whether or not a bvec should be split in the middle
|
||||
* @q: [in] request queue associated with the bio associated with @bv
|
||||
* @bv: [in] bvec to examine
|
||||
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
|
||||
* by the number of segments from @bv that may be appended to that
|
||||
* bio without exceeding @max_segs
|
||||
* @sectors: [in,out] Number of sectors in the bio being built. Incremented
|
||||
* by the number of sectors from @bv that may be appended to that
|
||||
* bio without exceeding @max_sectors
|
||||
* @max_segs: [in] upper bound for *@nsegs
|
||||
* @max_sectors: [in] upper bound for *@sectors
|
||||
*
|
||||
* When splitting a bio, it can happen that a bvec is encountered that is too
|
||||
* big to fit in a single segment and hence that it has to be split in the
|
||||
* middle. This function verifies whether or not that should happen. The value
|
||||
* %true is returned if and only if appending the entire @bv to a bio with
|
||||
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
|
||||
* the block driver.
|
||||
*/
|
||||
static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
|
||||
unsigned *nsegs, unsigned *sectors, unsigned max_segs)
|
||||
static bool bvec_split_segs(const struct request_queue *q,
|
||||
const struct bio_vec *bv, unsigned *nsegs,
|
||||
unsigned *sectors, unsigned max_segs,
|
||||
unsigned max_sectors)
|
||||
{
|
||||
unsigned len = bv->bv_len;
|
||||
unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9;
|
||||
unsigned len = min(bv->bv_len, max_len);
|
||||
unsigned total_len = 0;
|
||||
unsigned new_nsegs = 0, seg_size = 0;
|
||||
unsigned seg_size = 0;
|
||||
|
||||
/*
|
||||
* Multi-page bvec may be too big to hold in one segment, so the
|
||||
* current bvec has to be splitted as multiple segments.
|
||||
*/
|
||||
while (len && new_nsegs + *nsegs < max_segs) {
|
||||
while (len && *nsegs < max_segs) {
|
||||
seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
|
||||
seg_size = min(seg_size, len);
|
||||
|
||||
new_nsegs++;
|
||||
(*nsegs)++;
|
||||
total_len += seg_size;
|
||||
len -= seg_size;
|
||||
|
||||
@@ -184,16 +212,31 @@ static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
|
||||
break;
|
||||
}
|
||||
|
||||
if (new_nsegs) {
|
||||
*nsegs += new_nsegs;
|
||||
if (sectors)
|
||||
*sectors += total_len >> 9;
|
||||
}
|
||||
*sectors += total_len >> 9;
|
||||
|
||||
/* split in the middle of the bvec if len != 0 */
|
||||
return !!len;
|
||||
/* tell the caller to split the bvec if it is too big to fit */
|
||||
return len > 0 || bv->bv_len > max_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_bio_segment_split - split a bio in two bios
|
||||
* @q: [in] request queue pointer
|
||||
* @bio: [in] bio to be split
|
||||
* @bs: [in] bio set to allocate the clone from
|
||||
* @segs: [out] number of segments in the bio with the first half of the sectors
|
||||
*
|
||||
* Clone @bio, update the bi_iter of the clone to represent the first sectors
|
||||
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
|
||||
* following is guaranteed for the cloned bio:
|
||||
* - That it has at most get_max_io_size(@q, @bio) sectors.
|
||||
* - That it has at most queue_max_segments(@q) segments.
|
||||
*
|
||||
* Except for discard requests the cloned bio will point at the bi_io_vec of
|
||||
* the original bio. It is the responsibility of the caller to ensure that the
|
||||
* original bio is not freed before the cloned bio. The caller is also
|
||||
* responsible for ensuring that @bs is only destroyed after processing of the
|
||||
* split bio has finished.
|
||||
*/
|
||||
static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||
struct bio *bio,
|
||||
struct bio_set *bs,
|
||||
@@ -213,34 +256,18 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
|
||||
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
|
||||
goto split;
|
||||
|
||||
if (sectors + (bv.bv_len >> 9) > max_sectors) {
|
||||
/*
|
||||
* Consider this a new segment if we're splitting in
|
||||
* the middle of this vector.
|
||||
*/
|
||||
if (nsegs < max_segs &&
|
||||
sectors < max_sectors) {
|
||||
/* split in the middle of bvec */
|
||||
bv.bv_len = (max_sectors - sectors) << 9;
|
||||
bvec_split_segs(q, &bv, &nsegs,
|
||||
§ors, max_segs);
|
||||
}
|
||||
if (nsegs < max_segs &&
|
||||
sectors + (bv.bv_len >> 9) <= max_sectors &&
|
||||
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
||||
nsegs++;
|
||||
sectors += bv.bv_len >> 9;
|
||||
} else if (bvec_split_segs(q, &bv, &nsegs, §ors, max_segs,
|
||||
max_sectors)) {
|
||||
goto split;
|
||||
}
|
||||
|
||||
if (nsegs == max_segs)
|
||||
goto split;
|
||||
|
||||
bvprv = bv;
|
||||
bvprvp = &bvprv;
|
||||
|
||||
if (bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
|
||||
nsegs++;
|
||||
sectors += bv.bv_len >> 9;
|
||||
} else if (bvec_split_segs(q, &bv, &nsegs, §ors,
|
||||
max_segs)) {
|
||||
goto split;
|
||||
}
|
||||
}
|
||||
|
||||
*segs = nsegs;
|
||||
@@ -250,6 +277,19 @@ split:
|
||||
return bio_split(bio, sectors, GFP_NOIO, bs);
|
||||
}
|
||||
|
||||
/**
|
||||
* __blk_queue_split - split a bio and submit the second half
|
||||
* @q: [in] request queue pointer
|
||||
* @bio: [in, out] bio to be split
|
||||
* @nr_segs: [out] number of segments in the first bio
|
||||
*
|
||||
* Split a bio into two bios, chain the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. If the second bio is still too
|
||||
* big it will be split by a recursive call to this function. Since this
|
||||
* function may allocate a new bio from @q->bio_split, it is the responsibility
|
||||
* of the caller to ensure that @q is only released after processing of the
|
||||
* split bio has finished.
|
||||
*/
|
||||
void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
unsigned int *nr_segs)
|
||||
{
|
||||
@@ -294,6 +334,17 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_queue_split - split a bio and submit the second half
|
||||
* @q: [in] request queue pointer
|
||||
* @bio: [in, out] bio to be split
|
||||
*
|
||||
* Split a bio into two bios, chains the two bios, submit the second half and
|
||||
* store a pointer to the first half in *@bio. Since this function may allocate
|
||||
* a new bio from @q->bio_split, it is the responsibility of the caller to
|
||||
* ensure that @q is only released after processing of the split bio has
|
||||
* finished.
|
||||
*/
|
||||
void blk_queue_split(struct request_queue *q, struct bio **bio)
|
||||
{
|
||||
unsigned int nr_segs;
|
||||
@@ -305,6 +356,7 @@ EXPORT_SYMBOL(blk_queue_split);
|
||||
unsigned int blk_recalc_rq_segments(struct request *rq)
|
||||
{
|
||||
unsigned int nr_phys_segs = 0;
|
||||
unsigned int nr_sectors = 0;
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bv;
|
||||
|
||||
@@ -321,7 +373,8 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
|
||||
}
|
||||
|
||||
rq_for_each_bvec(bv, rq, iter)
|
||||
bvec_split_segs(rq->q, &bv, &nr_phys_segs, NULL, UINT_MAX);
|
||||
bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors,
|
||||
UINT_MAX, UINT_MAX);
|
||||
return nr_phys_segs;
|
||||
}
|
||||
|
||||
|
@@ -15,10 +15,10 @@
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
|
||||
static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
|
||||
unsigned int nr_queues, const int cpu)
|
||||
static int queue_index(struct blk_mq_queue_map *qmap,
|
||||
unsigned int nr_queues, const int q)
|
||||
{
|
||||
return qmap->queue_offset + (cpu % nr_queues);
|
||||
return qmap->queue_offset + (q % nr_queues);
|
||||
}
|
||||
|
||||
static int get_first_sibling(unsigned int cpu)
|
||||
@@ -36,21 +36,36 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
|
||||
{
|
||||
unsigned int *map = qmap->mq_map;
|
||||
unsigned int nr_queues = qmap->nr_queues;
|
||||
unsigned int cpu, first_sibling;
|
||||
unsigned int cpu, first_sibling, q = 0;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
map[cpu] = -1;
|
||||
|
||||
/*
|
||||
* Spread queues among present CPUs first for minimizing
|
||||
* count of dead queues which are mapped by all un-present CPUs
|
||||
*/
|
||||
for_each_present_cpu(cpu) {
|
||||
if (q >= nr_queues)
|
||||
break;
|
||||
map[cpu] = queue_index(qmap, nr_queues, q++);
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (map[cpu] != -1)
|
||||
continue;
|
||||
/*
|
||||
* First do sequential mapping between CPUs and queues.
|
||||
* In case we still have CPUs to map, and we have some number of
|
||||
* threads per cores then map sibling threads to the same queue
|
||||
* for performance optimizations.
|
||||
*/
|
||||
if (cpu < nr_queues) {
|
||||
map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
|
||||
if (q < nr_queues) {
|
||||
map[cpu] = queue_index(qmap, nr_queues, q++);
|
||||
} else {
|
||||
first_sibling = get_first_sibling(cpu);
|
||||
if (first_sibling == cpu)
|
||||
map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
|
||||
map[cpu] = queue_index(qmap, nr_queues, q++);
|
||||
else
|
||||
map[cpu] = map[first_sibling];
|
||||
}
|
||||
|
@@ -270,7 +270,7 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_unregister_hctx(hctx);
|
||||
@@ -320,7 +320,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||
int ret, i;
|
||||
|
||||
WARN_ON_ONCE(!q->kobj.parent);
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
lockdep_assert_held(&q->sysfs_dir_lock);
|
||||
|
||||
ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
|
||||
if (ret < 0)
|
||||
@@ -349,23 +349,12 @@ unreg:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int blk_mq_register_dev(struct device *dev, struct request_queue *q)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
ret = __blk_mq_register_dev(dev, q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void blk_mq_sysfs_unregister(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
if (!q->mq_sysfs_init_done)
|
||||
goto unlock;
|
||||
|
||||
@@ -373,7 +362,7 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
|
||||
blk_mq_unregister_hctx(hctx);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
}
|
||||
|
||||
int blk_mq_sysfs_register(struct request_queue *q)
|
||||
@@ -381,7 +370,7 @@ int blk_mq_sysfs_register(struct request_queue *q)
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
int i, ret = 0;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
if (!q->mq_sysfs_init_done)
|
||||
goto unlock;
|
||||
|
||||
@@ -392,7 +381,7 @@ int blk_mq_sysfs_register(struct request_queue *q)
|
||||
}
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/delay.h>
|
||||
#include "blk.h"
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-tag.h"
|
||||
@@ -354,6 +355,37 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
|
||||
|
||||
static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
|
||||
void *data, bool reserved)
|
||||
{
|
||||
unsigned *count = data;
|
||||
|
||||
if (blk_mq_request_completed(rq))
|
||||
(*count)++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_tagset_wait_completed_request - wait until all completed req's
|
||||
* complete funtion is run
|
||||
* @tagset: Tag set to drain completed request
|
||||
*
|
||||
* Note: This function has to be run after all IO queues are shutdown
|
||||
*/
|
||||
void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
|
||||
{
|
||||
while (true) {
|
||||
unsigned count = 0;
|
||||
|
||||
blk_mq_tagset_busy_iter(tagset,
|
||||
blk_mq_tagset_count_completed_rqs, &count);
|
||||
if (!count)
|
||||
break;
|
||||
msleep(5);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
|
||||
|
||||
/**
|
||||
* blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
|
||||
* @q: Request queue to examine.
|
||||
|
@@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||
|
||||
static int blk_mq_poll_stats_bkt(const struct request *rq)
|
||||
{
|
||||
int ddir, bytes, bucket;
|
||||
int ddir, sectors, bucket;
|
||||
|
||||
ddir = rq_data_dir(rq);
|
||||
bytes = blk_rq_bytes(rq);
|
||||
sectors = blk_rq_stats_sectors(rq);
|
||||
|
||||
bucket = ddir + 2*(ilog2(bytes) - 9);
|
||||
bucket = ddir + 2 * ilog2(sectors);
|
||||
|
||||
if (bucket < 0)
|
||||
return -1;
|
||||
@@ -282,16 +282,16 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
|
||||
EXPORT_SYMBOL(blk_mq_can_queue);
|
||||
|
||||
/*
|
||||
* Only need start/end time stamping if we have stats enabled, or using
|
||||
* an IO scheduler.
|
||||
* Only need start/end time stamping if we have iostat or
|
||||
* blk stats enabled, or using an IO scheduler.
|
||||
*/
|
||||
static inline bool blk_mq_need_time_stamp(struct request *rq)
|
||||
{
|
||||
return (rq->rq_flags & RQF_IO_STAT) || rq->q->elevator;
|
||||
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
|
||||
}
|
||||
|
||||
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
unsigned int tag, unsigned int op)
|
||||
unsigned int tag, unsigned int op, u64 alloc_time_ns)
|
||||
{
|
||||
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
|
||||
struct request *rq = tags->static_rqs[tag];
|
||||
@@ -325,11 +325,15 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
|
||||
RB_CLEAR_NODE(&rq->rb_node);
|
||||
rq->rq_disk = NULL;
|
||||
rq->part = NULL;
|
||||
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
|
||||
rq->alloc_time_ns = alloc_time_ns;
|
||||
#endif
|
||||
if (blk_mq_need_time_stamp(rq))
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
else
|
||||
rq->start_time_ns = 0;
|
||||
rq->io_start_time_ns = 0;
|
||||
rq->stats_sectors = 0;
|
||||
rq->nr_phys_segments = 0;
|
||||
#if defined(CONFIG_BLK_DEV_INTEGRITY)
|
||||
rq->nr_integrity_segments = 0;
|
||||
@@ -356,8 +360,14 @@ static struct request *blk_mq_get_request(struct request_queue *q,
|
||||
struct request *rq;
|
||||
unsigned int tag;
|
||||
bool clear_ctx_on_error = false;
|
||||
u64 alloc_time_ns = 0;
|
||||
|
||||
blk_queue_enter_live(q);
|
||||
|
||||
/* alloc_time includes depth and tag waits */
|
||||
if (blk_queue_rq_alloc_time(q))
|
||||
alloc_time_ns = ktime_get_ns();
|
||||
|
||||
data->q = q;
|
||||
if (likely(!data->ctx)) {
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
@@ -393,7 +403,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags);
|
||||
rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
|
||||
if (!op_is_flush(data->cmd_flags)) {
|
||||
rq->elv.icq = NULL;
|
||||
if (e && e->type->ops.prepare_request) {
|
||||
@@ -652,19 +662,18 @@ bool blk_mq_complete_request(struct request *rq)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_complete_request);
|
||||
|
||||
void blk_mq_complete_request_sync(struct request *rq)
|
||||
{
|
||||
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
|
||||
rq->q->mq_ops->complete(rq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
|
||||
|
||||
int blk_mq_request_started(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_request_started);
|
||||
|
||||
int blk_mq_request_completed(struct request *rq)
|
||||
{
|
||||
return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_request_completed);
|
||||
|
||||
void blk_mq_start_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
@@ -673,9 +682,7 @@ void blk_mq_start_request(struct request *rq)
|
||||
|
||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
||||
rq->io_start_time_ns = ktime_get_ns();
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
rq->throtl_size = blk_rq_sectors(rq);
|
||||
#endif
|
||||
rq->stats_sectors = blk_rq_sectors(rq);
|
||||
rq->rq_flags |= RQF_STATS;
|
||||
rq_qos_issue(q, rq);
|
||||
}
|
||||
@@ -2453,11 +2460,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
struct blk_mq_ctx *ctx;
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
|
||||
/*
|
||||
* Avoid others reading imcomplete hctx->cpumask through sysfs
|
||||
*/
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
cpumask_clear(hctx->cpumask);
|
||||
hctx->nr_ctx = 0;
|
||||
@@ -2518,8 +2520,6 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
HCTX_TYPE_DEFAULT, i);
|
||||
}
|
||||
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
/*
|
||||
* If no software queues are mapped to this hardware queue,
|
||||
@@ -2688,7 +2688,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
||||
if (!uninit_q)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
q = blk_mq_init_allocated_queue(set, uninit_q);
|
||||
/*
|
||||
* Initialize the queue without an elevator. device_add_disk() will do
|
||||
* the initialization.
|
||||
*/
|
||||
q = blk_mq_init_allocated_queue(set, uninit_q, false);
|
||||
if (IS_ERR(q))
|
||||
blk_cleanup_queue(uninit_q);
|
||||
|
||||
@@ -2839,7 +2843,8 @@ static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q)
|
||||
struct request_queue *q,
|
||||
bool elevator_init)
|
||||
{
|
||||
/* mark the queue as mq asap */
|
||||
q->mq_ops = set->ops;
|
||||
@@ -2901,18 +2906,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
blk_mq_add_queue_tag_set(set, q);
|
||||
blk_mq_map_swqueue(q);
|
||||
|
||||
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
|
||||
int ret;
|
||||
|
||||
ret = elevator_init_mq(q);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
if (elevator_init)
|
||||
elevator_init_mq(q);
|
||||
|
||||
return q;
|
||||
|
||||
err_hctxs:
|
||||
kfree(q->queue_hw_ctx);
|
||||
q->nr_hw_queues = 0;
|
||||
err_sys_init:
|
||||
blk_mq_sysfs_deinit(q);
|
||||
err_poll:
|
||||
|
@@ -207,10 +207,12 @@ EXPORT_SYMBOL(blk_post_runtime_resume);
|
||||
*/
|
||||
void blk_set_runtime_active(struct request_queue *q)
|
||||
{
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
if (q->dev) {
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
q->rpm_status = RPM_ACTIVE;
|
||||
pm_runtime_mark_last_busy(q->dev);
|
||||
pm_request_autosuspend(q->dev);
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_runtime_active);
|
||||
|
@@ -83,6 +83,15 @@ void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio)
|
||||
} while (rqos);
|
||||
}
|
||||
|
||||
void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio)
|
||||
{
|
||||
do {
|
||||
if (rqos->ops->merge)
|
||||
rqos->ops->merge(rqos, rq, bio);
|
||||
rqos = rqos->next;
|
||||
} while (rqos);
|
||||
}
|
||||
|
||||
void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
{
|
||||
do {
|
||||
@@ -92,6 +101,15 @@ void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio)
|
||||
} while (rqos);
|
||||
}
|
||||
|
||||
void __rq_qos_queue_depth_changed(struct rq_qos *rqos)
|
||||
{
|
||||
do {
|
||||
if (rqos->ops->queue_depth_changed)
|
||||
rqos->ops->queue_depth_changed(rqos);
|
||||
rqos = rqos->next;
|
||||
} while (rqos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true, if we can't increase the depth further by scaling
|
||||
*/
|
||||
|
@@ -14,7 +14,8 @@ struct blk_mq_debugfs_attr;
|
||||
|
||||
enum rq_qos_id {
|
||||
RQ_QOS_WBT,
|
||||
RQ_QOS_CGROUP,
|
||||
RQ_QOS_LATENCY,
|
||||
RQ_QOS_COST,
|
||||
};
|
||||
|
||||
struct rq_wait {
|
||||
@@ -35,11 +36,13 @@ struct rq_qos {
|
||||
struct rq_qos_ops {
|
||||
void (*throttle)(struct rq_qos *, struct bio *);
|
||||
void (*track)(struct rq_qos *, struct request *, struct bio *);
|
||||
void (*merge)(struct rq_qos *, struct request *, struct bio *);
|
||||
void (*issue)(struct rq_qos *, struct request *);
|
||||
void (*requeue)(struct rq_qos *, struct request *);
|
||||
void (*done)(struct rq_qos *, struct request *);
|
||||
void (*done_bio)(struct rq_qos *, struct bio *);
|
||||
void (*cleanup)(struct rq_qos *, struct bio *);
|
||||
void (*queue_depth_changed)(struct rq_qos *);
|
||||
void (*exit)(struct rq_qos *);
|
||||
const struct blk_mq_debugfs_attr *debugfs_attrs;
|
||||
};
|
||||
@@ -72,7 +75,7 @@ static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
|
||||
|
||||
static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
|
||||
{
|
||||
return rq_qos_id(q, RQ_QOS_CGROUP);
|
||||
return rq_qos_id(q, RQ_QOS_LATENCY);
|
||||
}
|
||||
|
||||
static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
|
||||
@@ -80,8 +83,10 @@ static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
|
||||
switch (id) {
|
||||
case RQ_QOS_WBT:
|
||||
return "wbt";
|
||||
case RQ_QOS_CGROUP:
|
||||
return "cgroup";
|
||||
case RQ_QOS_LATENCY:
|
||||
return "latency";
|
||||
case RQ_QOS_COST:
|
||||
return "cost";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
@@ -135,7 +140,9 @@ void __rq_qos_issue(struct rq_qos *rqos, struct request *rq);
|
||||
void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq);
|
||||
void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio);
|
||||
void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio);
|
||||
void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio);
|
||||
void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio);
|
||||
void __rq_qos_queue_depth_changed(struct rq_qos *rqos);
|
||||
|
||||
static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
@@ -185,6 +192,19 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
|
||||
__rq_qos_track(q->rq_qos, rq, bio);
|
||||
}
|
||||
|
||||
static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
if (q->rq_qos)
|
||||
__rq_qos_merge(q->rq_qos, rq, bio);
|
||||
}
|
||||
|
||||
static inline void rq_qos_queue_depth_changed(struct request_queue *q)
|
||||
{
|
||||
if (q->rq_qos)
|
||||
__rq_qos_queue_depth_changed(q->rq_qos);
|
||||
}
|
||||
|
||||
void rq_qos_exit(struct request_queue *);
|
||||
|
||||
#endif
|
||||
|
@@ -805,7 +805,7 @@ EXPORT_SYMBOL(blk_queue_update_dma_alignment);
|
||||
void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
|
||||
{
|
||||
q->queue_depth = depth;
|
||||
wbt_set_queue_depth(q, depth);
|
||||
rq_qos_queue_depth_changed(q);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_queue_depth);
|
||||
|
||||
@@ -832,6 +832,22 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
|
||||
|
||||
/**
|
||||
* blk_queue_required_elevator_features - Set a queue required elevator features
|
||||
* @q: the request queue for the target device
|
||||
* @features: Required elevator features OR'ed together
|
||||
*
|
||||
* Tell the block layer that for the device controlled through @q, only the
|
||||
* only elevators that can be used are those that implement at least the set of
|
||||
* features specified by @features.
|
||||
*/
|
||||
void blk_queue_required_elevator_features(struct request_queue *q,
|
||||
unsigned int features)
|
||||
{
|
||||
q->required_elevator_features = features;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
|
||||
|
||||
static int __init blk_settings_init(void)
|
||||
{
|
||||
blk_max_low_pfn = max_low_pfn - 1;
|
||||
|
@@ -941,14 +941,14 @@ int blk_register_queue(struct gendisk *disk)
|
||||
int ret;
|
||||
struct device *dev = disk_to_dev(disk);
|
||||
struct request_queue *q = disk->queue;
|
||||
bool has_elevator = false;
|
||||
|
||||
if (WARN_ON(!q))
|
||||
return -ENXIO;
|
||||
|
||||
WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags),
|
||||
WARN_ONCE(blk_queue_registered(q),
|
||||
"%s is registering an already registered queue\n",
|
||||
kobject_name(&dev->kobj));
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
|
||||
/*
|
||||
* SCSI probing may synchronously create and destroy a lot of
|
||||
@@ -968,8 +968,7 @@ int blk_register_queue(struct gendisk *disk)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Prevent changes through sysfs until registration is completed. */
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
|
||||
ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
|
||||
if (ret < 0) {
|
||||
@@ -990,26 +989,36 @@ int blk_register_queue(struct gendisk *disk)
|
||||
blk_mq_debugfs_register(q);
|
||||
}
|
||||
|
||||
kobject_uevent(&q->kobj, KOBJ_ADD);
|
||||
|
||||
wbt_enable_default(q);
|
||||
|
||||
blk_throtl_register_queue(q);
|
||||
|
||||
/*
|
||||
* The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
|
||||
* switch won't happen at all.
|
||||
*/
|
||||
if (q->elevator) {
|
||||
ret = elv_register_queue(q);
|
||||
ret = elv_register_queue(q, false);
|
||||
if (ret) {
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
kobject_uevent(&q->kobj, KOBJ_REMOVE);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
kobject_del(&q->kobj);
|
||||
blk_trace_remove_sysfs(dev);
|
||||
kobject_put(&dev->kobj);
|
||||
return ret;
|
||||
}
|
||||
has_elevator = true;
|
||||
}
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
wbt_enable_default(q);
|
||||
blk_throtl_register_queue(q);
|
||||
|
||||
/* Now everything is ready and send out KOBJ_ADD uevent */
|
||||
kobject_uevent(&q->kobj, KOBJ_ADD);
|
||||
if (has_elevator)
|
||||
kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_register_queue);
|
||||
@@ -1029,7 +1038,7 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||
return;
|
||||
|
||||
/* Return early if disk->queue was never registered. */
|
||||
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
|
||||
if (!blk_queue_registered(q))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -1038,25 +1047,28 @@ void blk_unregister_queue(struct gendisk *disk)
|
||||
* concurrent elv_iosched_store() calls.
|
||||
*/
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
|
||||
blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
mutex_lock(&q->sysfs_dir_lock);
|
||||
/*
|
||||
* Remove the sysfs attributes before unregistering the queue data
|
||||
* structures that can be modified through sysfs.
|
||||
*/
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_unregister_dev(disk_to_dev(disk), q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
kobject_uevent(&q->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&q->kobj);
|
||||
blk_trace_remove_sysfs(disk_to_dev(disk));
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
/*
|
||||
* q->kobj has been removed, so it is safe to check if elevator
|
||||
* exists without holding q->sysfs_lock.
|
||||
*/
|
||||
if (q->elevator)
|
||||
elv_unregister_queue(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_unlock(&q->sysfs_dir_lock);
|
||||
|
||||
kobject_put(&disk_to_dev(disk)->kobj);
|
||||
}
|
||||
|
@@ -478,12 +478,14 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
|
||||
timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
|
||||
static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
|
||||
struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
{
|
||||
struct throtl_grp *tg;
|
||||
int rw;
|
||||
|
||||
tg = kzalloc_node(sizeof(*tg), gfp, node);
|
||||
tg = kzalloc_node(sizeof(*tg), gfp, q->node);
|
||||
if (!tg)
|
||||
return NULL;
|
||||
|
||||
@@ -2246,7 +2248,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
|
||||
struct request_queue *q = rq->q;
|
||||
struct throtl_data *td = q->td;
|
||||
|
||||
throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
|
||||
throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
|
||||
time_ns >> 10);
|
||||
}
|
||||
|
||||
void blk_throtl_bio_endio(struct bio *bio)
|
||||
|
@@ -629,15 +629,6 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
|
||||
}
|
||||
}
|
||||
|
||||
void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
if (rqos) {
|
||||
RQWB(rqos)->rq_depth.queue_depth = depth;
|
||||
__wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
}
|
||||
|
||||
void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
@@ -656,7 +647,7 @@ void wbt_enable_default(struct request_queue *q)
|
||||
return;
|
||||
|
||||
/* Queue not registered? Maybe shutting down... */
|
||||
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
|
||||
if (!blk_queue_registered(q))
|
||||
return;
|
||||
|
||||
if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
|
||||
@@ -689,6 +680,12 @@ static int wbt_data_dir(const struct request *rq)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void wbt_queue_depth_changed(struct rq_qos *rqos)
|
||||
{
|
||||
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
|
||||
__wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
|
||||
static void wbt_exit(struct rq_qos *rqos)
|
||||
{
|
||||
struct rq_wb *rwb = RQWB(rqos);
|
||||
@@ -811,6 +808,7 @@ static struct rq_qos_ops wbt_rqos_ops = {
|
||||
.requeue = wbt_requeue,
|
||||
.done = wbt_done,
|
||||
.cleanup = wbt_cleanup,
|
||||
.queue_depth_changed = wbt_queue_depth_changed,
|
||||
.exit = wbt_exit,
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
.debugfs_attrs = wbt_debugfs_attrs,
|
||||
@@ -853,7 +851,7 @@ int wbt_init(struct request_queue *q)
|
||||
|
||||
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
|
||||
|
||||
wbt_set_queue_depth(q, blk_queue_depth(q));
|
||||
wbt_queue_depth_changed(&rwb->rqos);
|
||||
wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
|
||||
|
||||
return 0;
|
||||
|
@@ -95,7 +95,6 @@ void wbt_enable_default(struct request_queue *);
|
||||
u64 wbt_get_min_lat(struct request_queue *q);
|
||||
void wbt_set_min_lat(struct request_queue *q, u64 val);
|
||||
|
||||
void wbt_set_queue_depth(struct request_queue *, unsigned int);
|
||||
void wbt_set_write_cache(struct request_queue *, bool);
|
||||
|
||||
u64 wbt_default_latency_nsec(struct request_queue *);
|
||||
@@ -118,9 +117,6 @@ static inline void wbt_disable_default(struct request_queue *q)
|
||||
static inline void wbt_enable_default(struct request_queue *q)
|
||||
{
|
||||
}
|
||||
static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
|
||||
{
|
||||
}
|
||||
static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
|
||||
{
|
||||
}
|
||||
|
@@ -202,6 +202,42 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_report_zones);
|
||||
|
||||
/*
|
||||
* Special case of zone reset operation to reset all zones in one command,
|
||||
* useful for applications like mkfs.
|
||||
*/
|
||||
static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
|
||||
{
|
||||
struct bio *bio = bio_alloc(gfp_mask, 0);
|
||||
int ret;
|
||||
|
||||
/* across the zones operations, don't need any sectors */
|
||||
bio_set_dev(bio, bdev);
|
||||
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
|
||||
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
|
||||
sector_t nr_sectors)
|
||||
{
|
||||
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
|
||||
return false;
|
||||
|
||||
if (nr_sectors != part_nr_sects_read(bdev->bd_part))
|
||||
return false;
|
||||
/*
|
||||
* REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
|
||||
* the entire disk, that is, if the blocks device start offset is 0 and
|
||||
* its capacity is the same as the entire disk.
|
||||
*/
|
||||
return get_start_sect(bdev) == 0 &&
|
||||
part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkdev_reset_zones - Reset zones write pointer
|
||||
* @bdev: Target block device
|
||||
@@ -235,6 +271,9 @@ int blkdev_reset_zones(struct block_device *bdev,
|
||||
/* Out of range */
|
||||
return -EINVAL;
|
||||
|
||||
if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
|
||||
return __blkdev_reset_all_zones(bdev, gfp_mask);
|
||||
|
||||
/* Check alignment (handle eventual smaller last zone) */
|
||||
zone_sectors = blk_queue_zone_sectors(q);
|
||||
if (sector & (zone_sectors - 1))
|
||||
|
@@ -184,11 +184,11 @@ void blk_account_io_done(struct request *req, u64 now);
|
||||
|
||||
void blk_insert_flush(struct request *rq);
|
||||
|
||||
int elevator_init_mq(struct request_queue *q);
|
||||
void elevator_init_mq(struct request_queue *q);
|
||||
int elevator_switch_mq(struct request_queue *q,
|
||||
struct elevator_type *new_e);
|
||||
void __elevator_exit(struct request_queue *, struct elevator_queue *);
|
||||
int elv_register_queue(struct request_queue *q);
|
||||
int elv_register_queue(struct request_queue *q, bool uevent);
|
||||
void elv_unregister_queue(struct request_queue *q);
|
||||
|
||||
static inline void elevator_exit(struct request_queue *q,
|
||||
|
217
block/elevator.c
217
block/elevator.c
@@ -83,8 +83,26 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(elv_bio_merge_ok);
|
||||
|
||||
static bool elevator_match(const struct elevator_type *e, const char *name)
|
||||
static inline bool elv_support_features(unsigned int elv_features,
|
||||
unsigned int required_features)
|
||||
{
|
||||
return (required_features & elv_features) == required_features;
|
||||
}
|
||||
|
||||
/**
|
||||
* elevator_match - Test an elevator name and features
|
||||
* @e: Scheduler to test
|
||||
* @name: Elevator name to test
|
||||
* @required_features: Features that the elevator must provide
|
||||
*
|
||||
* Return true is the elevator @e name matches @name and if @e provides all the
|
||||
* the feratures spcified by @required_features.
|
||||
*/
|
||||
static bool elevator_match(const struct elevator_type *e, const char *name,
|
||||
unsigned int required_features)
|
||||
{
|
||||
if (!elv_support_features(e->elevator_features, required_features))
|
||||
return false;
|
||||
if (!strcmp(e->elevator_name, name))
|
||||
return true;
|
||||
if (e->elevator_alias && !strcmp(e->elevator_alias, name))
|
||||
@@ -93,15 +111,21 @@ static bool elevator_match(const struct elevator_type *e, const char *name)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return scheduler with name 'name'
|
||||
/**
|
||||
* elevator_find - Find an elevator
|
||||
* @name: Name of the elevator to find
|
||||
* @required_features: Features that the elevator must provide
|
||||
*
|
||||
* Return the first registered scheduler with name @name and supporting the
|
||||
* features @required_features and NULL otherwise.
|
||||
*/
|
||||
static struct elevator_type *elevator_find(const char *name)
|
||||
static struct elevator_type *elevator_find(const char *name,
|
||||
unsigned int required_features)
|
||||
{
|
||||
struct elevator_type *e;
|
||||
|
||||
list_for_each_entry(e, &elv_list, list) {
|
||||
if (elevator_match(e, name))
|
||||
if (elevator_match(e, name, required_features))
|
||||
return e;
|
||||
}
|
||||
|
||||
@@ -120,12 +144,12 @@ static struct elevator_type *elevator_get(struct request_queue *q,
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
|
||||
e = elevator_find(name);
|
||||
e = elevator_find(name, q->required_elevator_features);
|
||||
if (!e && try_loading) {
|
||||
spin_unlock(&elv_list_lock);
|
||||
request_module("%s-iosched", name);
|
||||
spin_lock(&elv_list_lock);
|
||||
e = elevator_find(name);
|
||||
e = elevator_find(name, q->required_elevator_features);
|
||||
}
|
||||
|
||||
if (e && !try_module_get(e->elevator_owner))
|
||||
@@ -135,20 +159,6 @@ static struct elevator_type *elevator_get(struct request_queue *q,
|
||||
return e;
|
||||
}
|
||||
|
||||
static char chosen_elevator[ELV_NAME_MAX];
|
||||
|
||||
static int __init elevator_setup(char *str)
|
||||
{
|
||||
/*
|
||||
* Be backwards-compatible with previous kernels, so users
|
||||
* won't get the wrong elevator.
|
||||
*/
|
||||
strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("elevator=", elevator_setup);
|
||||
|
||||
static struct kobj_type elv_ktype;
|
||||
|
||||
struct elevator_queue *elevator_alloc(struct request_queue *q,
|
||||
@@ -470,13 +480,16 @@ static struct kobj_type elv_ktype = {
|
||||
.release = elevator_release,
|
||||
};
|
||||
|
||||
int elv_register_queue(struct request_queue *q)
|
||||
/*
|
||||
* elv_register_queue is called from either blk_register_queue or
|
||||
* elevator_switch, elevator switch is prevented from being happen
|
||||
* in the two paths, so it is safe to not hold q->sysfs_lock.
|
||||
*/
|
||||
int elv_register_queue(struct request_queue *q, bool uevent)
|
||||
{
|
||||
struct elevator_queue *e = q->elevator;
|
||||
int error;
|
||||
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
|
||||
if (!error) {
|
||||
struct elv_fs_entry *attr = e->type->elevator_attrs;
|
||||
@@ -487,24 +500,34 @@ int elv_register_queue(struct request_queue *q)
|
||||
attr++;
|
||||
}
|
||||
}
|
||||
kobject_uevent(&e->kobj, KOBJ_ADD);
|
||||
if (uevent)
|
||||
kobject_uevent(&e->kobj, KOBJ_ADD);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
e->registered = 1;
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* elv_unregister_queue is called from either blk_unregister_queue or
|
||||
* elevator_switch, elevator switch is prevented from being happen
|
||||
* in the two paths, so it is safe to not hold q->sysfs_lock.
|
||||
*/
|
||||
void elv_unregister_queue(struct request_queue *q)
|
||||
{
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
if (q) {
|
||||
struct elevator_queue *e = q->elevator;
|
||||
|
||||
kobject_uevent(&e->kobj, KOBJ_REMOVE);
|
||||
kobject_del(&e->kobj);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
e->registered = 0;
|
||||
/* Re-enable throttling in case elevator disabled it */
|
||||
wbt_enable_default(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -526,7 +549,7 @@ int elv_register(struct elevator_type *e)
|
||||
|
||||
/* register, don't allow duplicate names */
|
||||
spin_lock(&elv_list_lock);
|
||||
if (elevator_find(e->elevator_name)) {
|
||||
if (elevator_find(e->elevator_name, 0)) {
|
||||
spin_unlock(&elv_list_lock);
|
||||
kmem_cache_destroy(e->icq_cache);
|
||||
return -EBUSY;
|
||||
@@ -567,10 +590,32 @@ int elevator_switch_mq(struct request_queue *q,
|
||||
lockdep_assert_held(&q->sysfs_lock);
|
||||
|
||||
if (q->elevator) {
|
||||
if (q->elevator->registered)
|
||||
if (q->elevator->registered) {
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
/*
|
||||
* Concurrent elevator switch can't happen becasue
|
||||
* sysfs write is always exclusively on same file.
|
||||
*
|
||||
* Also the elevator queue won't be freed after
|
||||
* sysfs_lock is released becasue kobject_del() in
|
||||
* blk_unregister_queue() waits for completion of
|
||||
* .store & .show on its attributes.
|
||||
*/
|
||||
elv_unregister_queue(q);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
}
|
||||
ioc_clear_queue(q);
|
||||
elevator_exit(q, q->elevator);
|
||||
|
||||
/*
|
||||
* sysfs_lock may be dropped, so re-check if queue is
|
||||
* unregistered. If yes, don't switch to new elevator
|
||||
* any more
|
||||
*/
|
||||
if (!blk_queue_registered(q))
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = blk_mq_init_sched(q, new_e);
|
||||
@@ -578,7 +623,11 @@ int elevator_switch_mq(struct request_queue *q,
|
||||
goto out;
|
||||
|
||||
if (new_e) {
|
||||
ret = elv_register_queue(q);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
ret = elv_register_queue(q, true);
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (ret) {
|
||||
elevator_exit(q, q->elevator);
|
||||
goto out;
|
||||
@@ -594,37 +643,89 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool elv_support_iosched(struct request_queue *q)
|
||||
{
|
||||
if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* For blk-mq devices, we default to using mq-deadline, if available, for single
|
||||
* queue devices. If deadline isn't available OR we have multiple queues,
|
||||
* default to "none".
|
||||
* For single queue devices, default to using mq-deadline. If we have multiple
|
||||
* queues or mq-deadline is not available, default to "none".
|
||||
*/
|
||||
int elevator_init_mq(struct request_queue *q)
|
||||
static struct elevator_type *elevator_get_default(struct request_queue *q)
|
||||
{
|
||||
if (q->nr_hw_queues != 1)
|
||||
return NULL;
|
||||
|
||||
return elevator_get(q, "mq-deadline", false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the first elevator providing the features required by the request queue.
|
||||
* Default to "none" if no matching elevator is found.
|
||||
*/
|
||||
static struct elevator_type *elevator_get_by_features(struct request_queue *q)
|
||||
{
|
||||
struct elevator_type *e, *found = NULL;
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
|
||||
list_for_each_entry(e, &elv_list, list) {
|
||||
if (elv_support_features(e->elevator_features,
|
||||
q->required_elevator_features)) {
|
||||
found = e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found && !try_module_get(found->elevator_owner))
|
||||
found = NULL;
|
||||
|
||||
spin_unlock(&elv_list_lock);
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a device queue that has no required features, use the default elevator
|
||||
* settings. Otherwise, use the first elevator available matching the required
|
||||
* features. If no suitable elevator is find or if the chosen elevator
|
||||
* initialization fails, fall back to the "none" elevator (no elevator).
|
||||
*/
|
||||
void elevator_init_mq(struct request_queue *q)
|
||||
{
|
||||
struct elevator_type *e;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (q->nr_hw_queues != 1)
|
||||
return 0;
|
||||
if (!elv_support_iosched(q))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags));
|
||||
|
||||
/*
|
||||
* q->sysfs_lock must be held to provide mutual exclusion between
|
||||
* elevator_switch() and here.
|
||||
*/
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
if (unlikely(q->elevator))
|
||||
goto out_unlock;
|
||||
return;
|
||||
|
||||
e = elevator_get(q, "mq-deadline", false);
|
||||
if (!q->required_elevator_features)
|
||||
e = elevator_get_default(q);
|
||||
else
|
||||
e = elevator_get_by_features(q);
|
||||
if (!e)
|
||||
goto out_unlock;
|
||||
return;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
err = blk_mq_init_sched(q, e);
|
||||
if (err)
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (err) {
|
||||
pr_warn("\"%s\" elevator initialization failed, "
|
||||
"falling back to \"none\"\n", e->elevator_name);
|
||||
elevator_put(e);
|
||||
out_unlock:
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -660,7 +761,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
||||
struct elevator_type *e;
|
||||
|
||||
/* Make sure queue is not in the middle of being removed */
|
||||
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
|
||||
if (!blk_queue_registered(q))
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
@@ -677,7 +778,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
||||
if (!e)
|
||||
return -EINVAL;
|
||||
|
||||
if (q->elevator && elevator_match(q->elevator->type, elevator_name)) {
|
||||
if (q->elevator &&
|
||||
elevator_match(q->elevator->type, elevator_name, 0)) {
|
||||
elevator_put(e);
|
||||
return 0;
|
||||
}
|
||||
@@ -685,13 +787,6 @@ static int __elevator_change(struct request_queue *q, const char *name)
|
||||
return elevator_switch(q, e);
|
||||
}
|
||||
|
||||
static inline bool elv_support_iosched(struct request_queue *q)
|
||||
{
|
||||
if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_store(struct request_queue *q, const char *name,
|
||||
size_t count)
|
||||
{
|
||||
@@ -724,11 +819,13 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
list_for_each_entry(__e, &elv_list, list) {
|
||||
if (elv && elevator_match(elv, __e->elevator_name)) {
|
||||
if (elv && elevator_match(elv, __e->elevator_name, 0)) {
|
||||
len += sprintf(name+len, "[%s] ", elv->elevator_name);
|
||||
continue;
|
||||
}
|
||||
if (elv_support_iosched(q))
|
||||
if (elv_support_iosched(q) &&
|
||||
elevator_match(__e, __e->elevator_name,
|
||||
q->required_elevator_features))
|
||||
len += sprintf(name+len, "%s ", __e->elevator_name);
|
||||
}
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
@@ -695,6 +695,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
dev_t devt;
|
||||
int retval;
|
||||
|
||||
/*
|
||||
* The disk queue should now be all set with enough information about
|
||||
* the device for the elevator code to pick an adequate default
|
||||
* elevator if one is needed, that is, for devices requesting queue
|
||||
* registration.
|
||||
*/
|
||||
if (register_queue)
|
||||
elevator_init_mq(disk->queue);
|
||||
|
||||
/* minors == 0 indicates to use ext devt from part0 and should
|
||||
* be accompanied with EXT_DEVT flag. Make sure all
|
||||
* parameters make sense.
|
||||
|
@@ -377,13 +377,6 @@ done:
|
||||
* hardware queue, but we may return a request that is for a
|
||||
* different hardware queue. This is because mq-deadline has shared
|
||||
* state for all hardware queues, in terms of sorting, FIFOs, etc.
|
||||
*
|
||||
* For a zoned block device, __dd_dispatch_request() may return NULL
|
||||
* if all the queued write requests are directed at zones that are already
|
||||
* locked due to on-going write requests. In this case, make sure to mark
|
||||
* the queue as needing a restart to ensure that the queue is run again
|
||||
* and the pending writes dispatched once the target zones for the ongoing
|
||||
* write requests are unlocked in dd_finish_request().
|
||||
*/
|
||||
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
@@ -392,9 +385,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
spin_lock(&dd->lock);
|
||||
rq = __dd_dispatch_request(dd);
|
||||
if (!rq && blk_queue_is_zoned(hctx->queue) &&
|
||||
!list_empty(&dd->fifo_list[WRITE]))
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
spin_unlock(&dd->lock);
|
||||
|
||||
return rq;
|
||||
@@ -561,6 +551,13 @@ static void dd_prepare_request(struct request *rq, struct bio *bio)
|
||||
* spinlock so that the zone is never unlocked while deadline_fifo_request()
|
||||
* or deadline_next_request() are executing. This function is called for
|
||||
* all requests, whether or not these requests complete successfully.
|
||||
*
|
||||
* For a zoned block device, __dd_dispatch_request() may have stopped
|
||||
* dispatching requests if all the queued requests are write requests directed
|
||||
* at zones that are already locked due to on-going write requests. To ensure
|
||||
* write request dispatch progress in this case, mark the queue as needing a
|
||||
* restart to ensure that the queue is run again after completion of the
|
||||
* request and zones being unlocked.
|
||||
*/
|
||||
static void dd_finish_request(struct request *rq)
|
||||
{
|
||||
@@ -572,6 +569,8 @@ static void dd_finish_request(struct request *rq)
|
||||
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
blk_req_zone_write_unlock(rq);
|
||||
if (!list_empty(&dd->fifo_list[WRITE]))
|
||||
blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
}
|
||||
}
|
||||
@@ -795,6 +794,7 @@ static struct elevator_type mq_deadline = {
|
||||
.elevator_attrs = deadline_attrs,
|
||||
.elevator_name = "mq-deadline",
|
||||
.elevator_alias = "deadline",
|
||||
.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
MODULE_ALIAS("mq-deadline-iosched");
|
||||
|
@@ -119,8 +119,6 @@ enum opal_uid {
|
||||
OPAL_UID_HEXFF,
|
||||
};
|
||||
|
||||
#define OPAL_METHOD_LENGTH 8
|
||||
|
||||
/* Enum for indexing the OPALMETHOD array */
|
||||
enum opal_method {
|
||||
OPAL_PROPERTIES,
|
||||
@@ -167,7 +165,6 @@ enum opal_token {
|
||||
OPAL_TABLE_LASTID = 0x0A,
|
||||
OPAL_TABLE_MIN = 0x0B,
|
||||
OPAL_TABLE_MAX = 0x0C,
|
||||
|
||||
/* authority table */
|
||||
OPAL_PIN = 0x03,
|
||||
/* locking tokens */
|
||||
@@ -182,7 +179,7 @@ enum opal_token {
|
||||
OPAL_LIFECYCLE = 0x06,
|
||||
/* locking info table */
|
||||
OPAL_MAXRANGES = 0x04,
|
||||
/* mbr control */
|
||||
/* mbr control */
|
||||
OPAL_MBRENABLE = 0x01,
|
||||
OPAL_MBRDONE = 0x02,
|
||||
/* properties */
|
||||
|
@@ -129,7 +129,6 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },
|
||||
|
||||
/* tables */
|
||||
|
||||
[OPAL_TABLE_TABLE]
|
||||
{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
|
||||
[OPAL_LOCKINGRANGE_GLOBAL] =
|
||||
@@ -152,7 +151,6 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
|
||||
|
||||
/* C_PIN_TABLE object ID's */
|
||||
|
||||
[OPAL_C_PIN_MSID] =
|
||||
{ 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02},
|
||||
[OPAL_C_PIN_SID] =
|
||||
@@ -161,7 +159,6 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
|
||||
{ 0x00, 0x00, 0x00, 0x0B, 0x00, 0x01, 0x00, 0x01},
|
||||
|
||||
/* half UID's (only first 4 bytes used) */
|
||||
|
||||
[OPAL_HALF_UID_AUTHORITY_OBJ_REF] =
|
||||
{ 0x00, 0x00, 0x0C, 0x05, 0xff, 0xff, 0xff, 0xff },
|
||||
[OPAL_HALF_UID_BOOLEAN_ACE] =
|
||||
@@ -517,6 +514,7 @@ static int opal_discovery0(struct opal_dev *dev, void *data)
|
||||
ret = opal_recv_cmd(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return opal_discovery0_end(dev);
|
||||
}
|
||||
|
||||
@@ -525,6 +523,7 @@ static int opal_discovery0_step(struct opal_dev *dev)
|
||||
const struct opal_step discovery0_step = {
|
||||
opal_discovery0,
|
||||
};
|
||||
|
||||
return execute_step(dev, &discovery0_step, 0);
|
||||
}
|
||||
|
||||
@@ -551,6 +550,7 @@ static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok)
|
||||
{
|
||||
if (!can_add(err, cmd, 1))
|
||||
return;
|
||||
|
||||
cmd->cmd[cmd->pos++] = tok;
|
||||
}
|
||||
|
||||
@@ -577,6 +577,7 @@ static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring,
|
||||
header0 |= bytestring ? MEDIUM_ATOM_BYTESTRING : 0;
|
||||
header0 |= has_sign ? MEDIUM_ATOM_SIGNED : 0;
|
||||
header0 |= (len >> 8) & MEDIUM_ATOM_LEN_MASK;
|
||||
|
||||
cmd->cmd[cmd->pos++] = header0;
|
||||
cmd->cmd[cmd->pos++] = len;
|
||||
}
|
||||
@@ -649,6 +650,7 @@ static int build_locking_range(u8 *buffer, size_t length, u8 lr)
|
||||
|
||||
if (lr == 0)
|
||||
return 0;
|
||||
|
||||
buffer[5] = LOCKING_RANGE_NON_GLOBAL;
|
||||
buffer[7] = lr;
|
||||
|
||||
@@ -903,10 +905,6 @@ static int response_parse(const u8 *buf, size_t length,
|
||||
num_entries++;
|
||||
}
|
||||
|
||||
if (num_entries == 0) {
|
||||
pr_debug("Couldn't parse response.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
resp->num = num_entries;
|
||||
|
||||
return 0;
|
||||
@@ -945,6 +943,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
|
||||
}
|
||||
|
||||
*store = tok->pos + skip;
|
||||
|
||||
return tok->len - skip;
|
||||
}
|
||||
|
||||
@@ -1062,6 +1061,7 @@ static int start_opal_session_cont(struct opal_dev *dev)
|
||||
|
||||
dev->hsn = hsn;
|
||||
dev->tsn = tsn;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1084,6 +1084,7 @@ static int end_session_cont(struct opal_dev *dev)
|
||||
{
|
||||
dev->hsn = 0;
|
||||
dev->tsn = 0;
|
||||
|
||||
return parse_and_check_status(dev);
|
||||
}
|
||||
|
||||
@@ -1172,6 +1173,7 @@ static int gen_key(struct opal_dev *dev, void *data)
|
||||
return err;
|
||||
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
@@ -1184,12 +1186,14 @@ static int get_active_key_cont(struct opal_dev *dev)
|
||||
error = parse_and_check_status(dev);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
keylen = response_get_string(&dev->parsed, 4, &activekey);
|
||||
if (!activekey) {
|
||||
pr_debug("%s: Couldn't extract the Activekey from the response\n",
|
||||
__func__);
|
||||
return OPAL_INVAL_PARAM;
|
||||
}
|
||||
|
||||
dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
|
||||
|
||||
if (!dev->prev_data)
|
||||
@@ -1251,6 +1255,7 @@ static int generic_lr_enable_disable(struct opal_dev *dev,
|
||||
|
||||
add_token_u8(&err, dev, OPAL_ENDLIST);
|
||||
add_token_u8(&err, dev, OPAL_ENDNAME);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1263,6 +1268,7 @@ static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
|
||||
0, 0);
|
||||
if (err)
|
||||
pr_debug("Failed to create enable global lr command\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1313,7 +1319,6 @@ static int setup_locking_range(struct opal_dev *dev, void *data)
|
||||
if (err) {
|
||||
pr_debug("Error building Setup Locking range command.\n");
|
||||
return err;
|
||||
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
@@ -1393,6 +1398,7 @@ static int start_SIDASP_opal_session(struct opal_dev *dev, void *data)
|
||||
kfree(key);
|
||||
dev->prev_data = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1518,6 +1524,7 @@ static int erase_locking_range(struct opal_dev *dev, void *data)
|
||||
pr_debug("Error building Erase Locking Range Command.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
@@ -1636,6 +1643,7 @@ static int write_shadow_mbr(struct opal_dev *dev, void *data)
|
||||
|
||||
off += len;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1816,6 +1824,7 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
|
||||
pr_debug("Error building SET command.\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
@@ -1857,6 +1866,7 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
|
||||
pr_debug("Error building SET command.\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return finalize_and_send(dev, parse_and_check_status);
|
||||
}
|
||||
|
||||
@@ -1957,6 +1967,7 @@ static int end_opal_session(struct opal_dev *dev, void *data)
|
||||
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
return finalize_and_send(dev, end_session_cont);
|
||||
}
|
||||
|
||||
@@ -1965,6 +1976,7 @@ static int end_opal_session_error(struct opal_dev *dev)
|
||||
const struct opal_step error_end_session = {
|
||||
end_opal_session,
|
||||
};
|
||||
|
||||
return execute_step(dev, &error_end_session, 0);
|
||||
}
|
||||
|
||||
@@ -1984,6 +1996,7 @@ static int check_opal_support(struct opal_dev *dev)
|
||||
ret = opal_discovery0_step(dev);
|
||||
dev->supported = !ret;
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2004,6 +2017,7 @@ void free_opal_dev(struct opal_dev *dev)
|
||||
{
|
||||
if (!dev)
|
||||
return;
|
||||
|
||||
clean_opal_dev(dev);
|
||||
kfree(dev);
|
||||
}
|
||||
@@ -2026,6 +2040,7 @@ struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
|
||||
kfree(dev);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return dev;
|
||||
}
|
||||
EXPORT_SYMBOL(init_opal_dev);
|
||||
@@ -2045,6 +2060,7 @@ static int opal_secure_erase_locking_range(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2062,6 +2078,7 @@ static int opal_erase_locking_range(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, erase_steps, ARRAY_SIZE(erase_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2089,6 +2106,7 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2113,6 +2131,7 @@ static int opal_set_mbr_done(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2133,6 +2152,7 @@ static int opal_write_shadow_mbr(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2151,6 +2171,7 @@ static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
|
||||
setup_opal_dev(dev);
|
||||
add_suspend_info(dev, suspend);
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2169,12 +2190,14 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
|
||||
pr_debug("Locking state was not RO or RW\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (lk_unlk->session.who < OPAL_USER1 ||
|
||||
lk_unlk->session.who > OPAL_USER9) {
|
||||
pr_debug("Authority was not within the range of users: %d\n",
|
||||
lk_unlk->session.who);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (lk_unlk->session.sum) {
|
||||
pr_debug("%s not supported in sum. Use setup locking range\n",
|
||||
__func__);
|
||||
@@ -2185,6 +2208,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, steps, ARRAY_SIZE(steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2267,6 +2291,7 @@ static int opal_lock_unlock(struct opal_dev *dev,
|
||||
mutex_lock(&dev->dev_lock);
|
||||
ret = __opal_lock_unlock(dev, lk_unlk);
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2289,6 +2314,7 @@ static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, owner_steps, ARRAY_SIZE(owner_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2310,6 +2336,7 @@ static int opal_activate_lsp(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, active_steps, ARRAY_SIZE(active_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2327,6 +2354,7 @@ static int opal_setup_locking_range(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, lr_steps, ARRAY_SIZE(lr_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2347,6 +2375,7 @@ static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, pw_steps, ARRAY_SIZE(pw_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2371,6 +2400,7 @@ static int opal_activate_user(struct opal_dev *dev,
|
||||
setup_opal_dev(dev);
|
||||
ret = execute_steps(dev, act_steps, ARRAY_SIZE(act_steps));
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2382,6 +2412,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||
|
||||
if (!dev)
|
||||
return false;
|
||||
|
||||
if (!dev->supported)
|
||||
return false;
|
||||
|
||||
@@ -2399,6 +2430,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||
suspend->unlk.session.sum);
|
||||
was_failure = true;
|
||||
}
|
||||
|
||||
if (dev->mbr_enabled) {
|
||||
ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
|
||||
if (ret)
|
||||
@@ -2406,6 +2438,7 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
|
||||
}
|
||||
}
|
||||
mutex_unlock(&dev->dev_lock);
|
||||
|
||||
return was_failure;
|
||||
}
|
||||
EXPORT_SYMBOL(opal_unlock_from_suspend);
|
||||
|
Reference in New Issue
Block a user