From ddcc286900732953ac2e950b6ad0f9a4933767fb Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 28 May 2012 12:18:39 +0530 Subject: [PATCH 01/10] virtio ids: fix comment for virtio-rng It's virtio-rng, not virtio-ring. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell --- include/linux/virtio_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 7529b854b7fd..270fb22c5811 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -32,7 +32,7 @@ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ #define VIRTIO_ID_CONSOLE 3 /* virtio console */ -#define VIRTIO_ID_RNG 4 /* virtio ring */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ #define VIRTIO_ID_BALLOON 5 /* virtio balloon */ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ From cc8744e12936680478ce82b0f21dbaa272df1447 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 28 May 2012 12:18:40 +0530 Subject: [PATCH 02/10] virtio: rng: allow tasks to be killed that are waiting for rng input Use wait_for_completion_killable() instead of wait_for_completion() when waiting for the host to send us entropy. Without this, # cat /dev/hwrng ^C just hangs. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 723725bbb96b..c8a935034218 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -55,6 +55,7 @@ static void register_buffer(u8 *buf, size_t size) static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { + int ret; if (!busy) { busy = true; @@ -65,7 +66,9 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (!wait) return 0; - wait_for_completion(&have_data); + ret = wait_for_completion_killable(&have_data); + if (ret < 0) + return ret; busy = false; From 4476987a9a4525db3ebe29538cc357ca589db4ac Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 28 May 2012 12:18:41 +0530 Subject: [PATCH 03/10] virtio: rng: don't wait on host when module is going away No use waiting for input from host when the module is being removed. We're going to remove the vq in the next step anyway, so just perform any other steps for cleanup (currently none). Signed-off-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index c8a935034218..2dc9ce183cc6 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -109,6 +109,7 @@ static int virtrng_probe(struct virtio_device *vdev) static void __devexit virtrng_remove(struct virtio_device *vdev) { vdev->config->reset(vdev); + busy = false; hwrng_unregister(&virtio_hwrng); vdev->config->del_vqs(vdev); } From 178d855e7810deecb7fa96afdf82ec45b0284233 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 28 May 2012 12:18:42 +0530 Subject: [PATCH 04/10] virtio: rng: split out common code in probe / remove for s3/s4 ops The freeze/restore s3/s4 operations will use code that's common to the probe and remove routines. Put the common code in separate funcitons. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 2dc9ce183cc6..a9673a757009 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -88,7 +88,7 @@ static struct hwrng virtio_hwrng = { .read = virtio_read, }; -static int virtrng_probe(struct virtio_device *vdev) +static int probe_common(struct virtio_device *vdev) { int err; @@ -106,7 +106,7 @@ static int virtrng_probe(struct virtio_device *vdev) return 0; } -static void __devexit virtrng_remove(struct virtio_device *vdev) +static void remove_common(struct virtio_device *vdev) { vdev->config->reset(vdev); busy = false; @@ -114,6 +114,16 @@ static void __devexit virtrng_remove(struct virtio_device *vdev) vdev->config->del_vqs(vdev); } +static int virtrng_probe(struct virtio_device *vdev) +{ + return probe_common(vdev); +} + +static void __devexit virtrng_remove(struct virtio_device *vdev) +{ + remove_common(vdev); +} + static struct virtio_device_id id_table[] = { { VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID }, { 0 }, From 0bc1a2ef19b45bb23617b203bc631b44609f17ba Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Mon, 28 May 2012 12:18:43 +0530 Subject: [PATCH 05/10] virtio: rng: s3/s4 support Unregister from the hwrng interface and remove the vq before entering the S3 or S4 states. Add the vq and re-register with hwrng on restore. Signed-off-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/hw_random/virtio-rng.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a9673a757009..5708299507d0 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -124,6 +124,19 @@ static void __devexit virtrng_remove(struct virtio_device *vdev) remove_common(vdev); } +#ifdef CONFIG_PM +static int virtrng_freeze(struct virtio_device *vdev) +{ + remove_common(vdev); + return 0; +} + +static int virtrng_restore(struct virtio_device *vdev) +{ + return probe_common(vdev); +} +#endif + static struct virtio_device_id id_table[] = { { VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -135,6 +148,10 @@ static struct virtio_driver virtio_rng_driver = { .id_table = id_table, .probe = virtrng_probe, .remove = __devexit_p(virtrng_remove), +#ifdef CONFIG_PM + .freeze = virtrng_freeze, + .restore = virtrng_restore, +#endif }; static int __init init(void) From 02e2b124943648fba0a2ccee5c3656a5653e0151 Mon Sep 17 00:00:00 2001 From: Asias He Date: Fri, 25 May 2012 10:34:47 +0800 Subject: [PATCH 06/10] virtio-blk: Call del_gendisk() before disable guest kick del_gendisk() might not return due to failing to remove the /sys/block/vda/serial sysfs entry when another thread (udev) is trying to read it. virtblk_remove() vdev->config->reset() : guest will not kick us through interrupt del_gendisk() device_del() kobject_del(): got stuck, sysfs entry ref count non zero sysfs_open_file(): user space process read /sys/block/vda/serial sysfs_get_active() : got sysfs entry ref count dev_attr_show() virtblk_serial_show() blk_execute_rq() : got stuck, interrupt is disabled request cannot be finished This patch fixes it by calling del_gendisk() before we disable guest's interrupt so that the request sent in virtblk_serial_show() will be finished and del_gendisk() will success. This fixes another race in hot-unplug process. It is save to call del_gendisk(vblk->disk) before flush_work(&vblk->config_work) which might access vblk->disk, because vblk->disk is not freed until put_disk(vblk->disk). Cc: virtualization@lists.linux-foundation.org Cc: kvm@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 693187df7601..1bed51712dd1 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -584,13 +584,13 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) vblk->config_enable = false; mutex_unlock(&vblk->config_lock); + del_gendisk(vblk->disk); + /* Stop all the virtqueues. */ vdev->config->reset(vdev); flush_work(&vblk->config_work); - del_gendisk(vblk->disk); - /* Abort requests dispatched to driver. */ spin_lock_irqsave(&vblk->lock, flags); while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) { From 483001c765af6892b3fc3726576cb42f17d1d6b5 Mon Sep 17 00:00:00 2001 From: Asias He Date: Fri, 25 May 2012 10:34:48 +0800 Subject: [PATCH 07/10] virtio-blk: Reset device after blk_cleanup_queue() blk_cleanup_queue() will call blk_drian_queue() to drain all the requests before queue DEAD marking. If we reset the device before blk_cleanup_queue() the drain would fail. 1) if the queue is stopped in do_virtblk_request() because device is full, the q->request_fn() will not be called. blk_drain_queue() { while(true) { ... if (!list_empty(&q->queue_head)) __blk_run_queue(q) { if (queue is not stoped) q->request_fn() } ... } } Do no reset the device before blk_cleanup_queue() gives the chance to start the queue in interrupt handler blk_done(). 2) In commit b79d866c8b7014a51f611a64c40546109beaf24a, We abort requests dispatched to driver before blk_cleanup_queue(). There is a race if requests are dispatched to driver after the abort and before the queue DEAD mark. To fix this, instead of aborting the requests explicitly, we can just reset the device after after blk_cleanup_queue so that the device can complete all the requests before queue DEAD marking in the drain process. Cc: Rusty Russell Cc: virtualization@lists.linux-foundation.org Cc: kvm@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1bed51712dd1..b4fa2d71496e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -576,8 +576,6 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; int index = vblk->index; - struct virtblk_req *vbr; - unsigned long flags; /* Prevent config work handler from accessing the device. */ mutex_lock(&vblk->config_lock); @@ -585,21 +583,13 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) mutex_unlock(&vblk->config_lock); del_gendisk(vblk->disk); + blk_cleanup_queue(vblk->disk->queue); /* Stop all the virtqueues. */ vdev->config->reset(vdev); flush_work(&vblk->config_work); - /* Abort requests dispatched to driver. */ - spin_lock_irqsave(&vblk->lock, flags); - while ((vbr = virtqueue_detach_unused_buf(vblk->vq))) { - __blk_end_request_all(vbr->req, -EIO); - mempool_free(vbr, vblk->pool); - } - spin_unlock_irqrestore(&vblk->lock, flags); - - blk_cleanup_queue(vblk->disk->queue); put_disk(vblk->disk); mempool_destroy(vblk->pool); vdev->config->del_vqs(vdev); From 2c95a3290919541b846bee3e0fbaa75860929f53 Mon Sep 17 00:00:00 2001 From: Asias He Date: Fri, 25 May 2012 16:03:27 +0800 Subject: [PATCH 08/10] virtio-blk: Use block layer provided spinlock Block layer will allocate a spinlock for the queue if the driver does not provide one in blk_init_queue(). The reason to use the internal spinlock is that blk_cleanup_queue() will switch to use the internal spinlock in the cleanup code path. if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; However, processes which are in D state might have taken the driver provided spinlock, when the processes wake up, they would release the block provided spinlock. ===================================== [ BUG: bad unlock balance detected! ] 3.4.0-rc7+ #238 Not tainted ------------------------------------- fio/3587 is trying to release lock (&(&q->__queue_lock)->rlock) at: [] blk_queue_bio+0x2a2/0x380 but there are no more locks to release! other info that might help us debug this: 1 lock held by fio/3587: #0: (&(&vblk->lock)->rlock){......}, at: [] get_request_wait+0x19a/0x250 Other drivers use block layer provided spinlock as well, e.g. SCSI. Switching to the block layer provided spinlock saves a bit of memory and does not increase lock contention. Performance test shows no real difference is observed before and after this patch. Changes in v2: Improve commit log as Michael suggested. Cc: virtualization@lists.linux-foundation.org Cc: kvm@vger.kernel.org Cc: stable@kernel.org Signed-off-by: Asias He Acked-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b4fa2d71496e..774c31dce7c0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -21,8 +21,6 @@ struct workqueue_struct *virtblk_wq; struct virtio_blk { - spinlock_t lock; - struct virtio_device *vdev; struct virtqueue *vq; @@ -65,7 +63,7 @@ static void blk_done(struct virtqueue *vq) unsigned int len; unsigned long flags; - spin_lock_irqsave(&vblk->lock, flags); + spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { int error; @@ -99,7 +97,7 @@ static void blk_done(struct virtqueue *vq) } /* In case queue is stopped waiting for more buffers. */ blk_start_queue(vblk->disk->queue); - spin_unlock_irqrestore(&vblk->lock, flags); + spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags); } static bool do_req(struct request_queue *q, struct virtio_blk *vblk, @@ -431,7 +429,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_free_index; } - spin_lock_init(&vblk->lock); vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); @@ -456,7 +453,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) goto out_mempool; } - q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); + q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL); if (!q) { err = -ENOMEM; goto out_put_disk; From cd5d503862b0d0d927c56ef2e34d3ededac88039 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Jul 2012 15:19:37 +0200 Subject: [PATCH 09/10] virtio-blk: allow toggling host cache between writeback and writethrough This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature, which exposes the cache mode in the configuration space and lets the driver modify it. The cache mode is exposed via sysfs. Even if the host does not support the new feature, the cache mode is visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable. Signed-off-by: Paolo Bonzini Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 90 ++++++++++++++++++++++++++++++++++++-- include/linux/virtio_blk.h | 5 ++- 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 774c31dce7c0..c0bbeb470754 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -395,6 +395,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) return 0; } +static int virtblk_get_cache_mode(struct virtio_device *vdev) +{ + u8 writeback; + int err; + + err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE, + offsetof(struct virtio_blk_config, wce), + &writeback); + if (err) + writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); + + return writeback; +} + +static void virtblk_update_cache_mode(struct virtio_device *vdev) +{ + u8 writeback = virtblk_get_cache_mode(vdev); + struct virtio_blk *vblk = vdev->priv; + + if (writeback) + blk_queue_flush(vblk->disk->queue, REQ_FLUSH); + else + blk_queue_flush(vblk->disk->queue, 0); + + revalidate_disk(vblk->disk); +} + +static const char *const virtblk_cache_types[] = { + "write through", "write back" +}; + +static ssize_t +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct gendisk *disk = dev_to_disk(dev); + struct virtio_blk *vblk = disk->private_data; + struct virtio_device *vdev = vblk->vdev; + int i; + u8 writeback; + + BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); + for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; ) + if (sysfs_streq(buf, virtblk_cache_types[i])) + break; + + if (i < 0) + return -EINVAL; + + writeback = i; + vdev->config->set(vdev, + offsetof(struct virtio_blk_config, wce), + &writeback, sizeof(writeback)); + + virtblk_update_cache_mode(vdev); + return count; +} + +static ssize_t +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + struct virtio_blk *vblk = disk->private_data; + u8 writeback = virtblk_get_cache_mode(vblk->vdev); + + BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); + return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); +} + +static const struct device_attribute dev_attr_cache_type_ro = + __ATTR(cache_type, S_IRUGO, + virtblk_cache_type_show, NULL); +static const struct device_attribute dev_attr_cache_type_rw = + __ATTR(cache_type, S_IRUGO|S_IWUSR, + virtblk_cache_type_show, virtblk_cache_type_store); + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -471,8 +548,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->index = index; /* configure queue flush support */ - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) - blk_queue_flush(q, REQ_FLUSH); + virtblk_update_cache_mode(vdev); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) @@ -550,6 +626,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) if (err) goto out_del_disk; + if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) + err = device_create_file(disk_to_dev(vblk->disk), + &dev_attr_cache_type_rw); + else + err = device_create_file(disk_to_dev(vblk->disk), + &dev_attr_cache_type_ro); + if (err) + goto out_del_disk; return 0; out_del_disk: @@ -642,7 +726,7 @@ static const struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, - VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY + VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE }; /* diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index e0edb40ca7aa..e2aba15f545d 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -37,8 +37,9 @@ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ #define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ -#define VIRTIO_BLK_F_FLUSH 9 /* Cache flush command support */ +#define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ #define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ @@ -69,6 +70,8 @@ struct virtio_blk_config { /* optimal sustained I/O size in logical blocks. */ __u32 opt_io_size; + /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ + __u8 wce; } __attribute__((packed)); /* From 6a743897144500fb4c4566ced3a498d5180fbb5b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2012 13:30:52 +0930 Subject: [PATCH 10/10] virtio-blk: return VIRTIO_BLK_F_FLUSH to header. This got renamed and clarified, but let's not break any userspace out there. Signed-off-by: Rusty Russell --- include/linux/virtio_blk.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index e2aba15f545d..6d8e61c48563 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -41,6 +41,11 @@ #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ #define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#ifndef __KERNEL__ +/* Old (deprecated) name for VIRTIO_BLK_F_WCE. */ +#define VIRTIO_BLK_F_FLUSH VIRTIO_BLK_F_WCE +#endif + #define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ struct virtio_blk_config {