Merge branch 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio changes from Rusty Russell: "New workflow: same git trees pulled by linux-next get sent straight to Linus. Git is awkward at shuffling patches compared with quilt or mq, but that doesn't happen often once things get into my -next branch." * 'virtio-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (24 commits) lguest: fix occasional crash in example launcher. virtio-blk: Disable callback in virtblk_done() virtio_mmio: Don't attempt to create empty virtqueues virtio_mmio: fix off by one error allocating queue drivers/virtio/virtio_pci.c: fix error return code virtio: don't crash when device is buggy virtio: remove CONFIG_VIRTIO_RING virtio: add help to CONFIG_VIRTIO option. virtio: support reserved vqs virtio: introduce an API to set affinity for a virtqueue virtio-ring: move queue_index to vring_virtqueue virtio_balloon: not EXPERIMENTAL any more. virtio-balloon: dependency fix virtio-blk: fix NULL checking in virtblk_alloc_req() virtio-blk: Add REQ_FLUSH and REQ_FUA support to bio path virtio-blk: Add bio-based IO path for virtio-blk virtio: console: fix error handling in init() function tools: Fix pthread flag for Makefile of trace-agent used by virtio-trace tools: Add guest trace agent as a user tool virtio/console: Allocate scatterlist according to the current pipe size ...
This commit is contained in:
@@ -656,7 +656,6 @@ config S390_GUEST
|
|||||||
depends on 64BIT && EXPERIMENTAL
|
depends on 64BIT && EXPERIMENTAL
|
||||||
select VIRTUALIZATION
|
select VIRTUALIZATION
|
||||||
select VIRTIO
|
select VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
select VIRTIO_CONSOLE
|
select VIRTIO_CONSOLE
|
||||||
help
|
help
|
||||||
Enabling this option adds support for virtio based paravirtual device
|
Enabling this option adds support for virtio based paravirtual device
|
||||||
|
@@ -4,7 +4,6 @@ config LGUEST_GUEST
|
|||||||
depends on X86_32
|
depends on X86_32
|
||||||
select VIRTUALIZATION
|
select VIRTUALIZATION
|
||||||
select VIRTIO
|
select VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
select VIRTIO_CONSOLE
|
select VIRTIO_CONSOLE
|
||||||
help
|
help
|
||||||
Lguest is a tiny in-kernel hypervisor. Selecting this will
|
Lguest is a tiny in-kernel hypervisor. Selecting this will
|
||||||
|
@@ -14,6 +14,9 @@
|
|||||||
|
|
||||||
#define PART_BITS 4
|
#define PART_BITS 4
|
||||||
|
|
||||||
|
static bool use_bio;
|
||||||
|
module_param(use_bio, bool, S_IRUGO);
|
||||||
|
|
||||||
static int major;
|
static int major;
|
||||||
static DEFINE_IDA(vd_index_ida);
|
static DEFINE_IDA(vd_index_ida);
|
||||||
|
|
||||||
@@ -23,6 +26,7 @@ struct virtio_blk
|
|||||||
{
|
{
|
||||||
struct virtio_device *vdev;
|
struct virtio_device *vdev;
|
||||||
struct virtqueue *vq;
|
struct virtqueue *vq;
|
||||||
|
wait_queue_head_t queue_wait;
|
||||||
|
|
||||||
/* The disk structure for the kernel. */
|
/* The disk structure for the kernel. */
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
@@ -51,53 +55,244 @@ struct virtio_blk
|
|||||||
struct virtblk_req
|
struct virtblk_req
|
||||||
{
|
{
|
||||||
struct request *req;
|
struct request *req;
|
||||||
|
struct bio *bio;
|
||||||
struct virtio_blk_outhdr out_hdr;
|
struct virtio_blk_outhdr out_hdr;
|
||||||
struct virtio_scsi_inhdr in_hdr;
|
struct virtio_scsi_inhdr in_hdr;
|
||||||
|
struct work_struct work;
|
||||||
|
struct virtio_blk *vblk;
|
||||||
|
int flags;
|
||||||
u8 status;
|
u8 status;
|
||||||
|
struct scatterlist sg[];
|
||||||
};
|
};
|
||||||
|
|
||||||
static void blk_done(struct virtqueue *vq)
|
enum {
|
||||||
|
VBLK_IS_FLUSH = 1,
|
||||||
|
VBLK_REQ_FLUSH = 2,
|
||||||
|
VBLK_REQ_DATA = 4,
|
||||||
|
VBLK_REQ_FUA = 8,
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline int virtblk_result(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
switch (vbr->status) {
|
||||||
|
case VIRTIO_BLK_S_OK:
|
||||||
|
return 0;
|
||||||
|
case VIRTIO_BLK_S_UNSUPP:
|
||||||
|
return -ENOTTY;
|
||||||
|
default:
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
|
||||||
|
gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct virtio_blk *vblk = vq->vdev->priv;
|
|
||||||
struct virtblk_req *vbr;
|
struct virtblk_req *vbr;
|
||||||
unsigned int len;
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
|
vbr = mempool_alloc(vblk->pool, gfp_mask);
|
||||||
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
|
if (!vbr)
|
||||||
int error;
|
return NULL;
|
||||||
|
|
||||||
switch (vbr->status) {
|
vbr->vblk = vblk;
|
||||||
case VIRTIO_BLK_S_OK:
|
if (use_bio)
|
||||||
error = 0;
|
sg_init_table(vbr->sg, vblk->sg_elems);
|
||||||
break;
|
|
||||||
case VIRTIO_BLK_S_UNSUPP:
|
return vbr;
|
||||||
error = -ENOTTY;
|
}
|
||||||
break;
|
|
||||||
default:
|
static void virtblk_add_buf_wait(struct virtio_blk *vblk,
|
||||||
error = -EIO;
|
struct virtblk_req *vbr,
|
||||||
|
unsigned long out,
|
||||||
|
unsigned long in)
|
||||||
|
{
|
||||||
|
DEFINE_WAIT(wait);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
|
||||||
|
TASK_UNINTERRUPTIBLE);
|
||||||
|
|
||||||
|
spin_lock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
|
||||||
|
GFP_ATOMIC) < 0) {
|
||||||
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
io_schedule();
|
||||||
|
} else {
|
||||||
|
virtqueue_kick(vblk->vq);
|
||||||
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (vbr->req->cmd_type) {
|
}
|
||||||
case REQ_TYPE_BLOCK_PC:
|
|
||||||
vbr->req->resid_len = vbr->in_hdr.residual;
|
|
||||||
vbr->req->sense_len = vbr->in_hdr.sense_len;
|
|
||||||
vbr->req->errors = vbr->in_hdr.errors;
|
|
||||||
break;
|
|
||||||
case REQ_TYPE_SPECIAL:
|
|
||||||
vbr->req->errors = (error != 0);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
__blk_end_request_all(vbr->req, error);
|
finish_wait(&vblk->queue_wait, &wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtblk_add_req(struct virtblk_req *vbr,
|
||||||
|
unsigned int out, unsigned int in)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
|
||||||
|
spin_lock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
|
||||||
|
GFP_ATOMIC) < 0)) {
|
||||||
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
virtblk_add_buf_wait(vblk, vbr, out, in);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
virtqueue_kick(vblk->vq);
|
||||||
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int virtblk_bio_send_flush(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
unsigned int out = 0, in = 0;
|
||||||
|
|
||||||
|
vbr->flags |= VBLK_IS_FLUSH;
|
||||||
|
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
|
||||||
|
vbr->out_hdr.sector = 0;
|
||||||
|
vbr->out_hdr.ioprio = 0;
|
||||||
|
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
||||||
|
sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
|
||||||
|
|
||||||
|
virtblk_add_req(vbr, out, in);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int virtblk_bio_send_data(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
unsigned int num, out = 0, in = 0;
|
||||||
|
struct bio *bio = vbr->bio;
|
||||||
|
|
||||||
|
vbr->flags &= ~VBLK_IS_FLUSH;
|
||||||
|
vbr->out_hdr.type = 0;
|
||||||
|
vbr->out_hdr.sector = bio->bi_sector;
|
||||||
|
vbr->out_hdr.ioprio = bio_prio(bio);
|
||||||
|
|
||||||
|
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
||||||
|
|
||||||
|
num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
|
||||||
|
|
||||||
|
sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
|
||||||
|
sizeof(vbr->status));
|
||||||
|
|
||||||
|
if (num) {
|
||||||
|
if (bio->bi_rw & REQ_WRITE) {
|
||||||
|
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
|
||||||
|
out += num;
|
||||||
|
} else {
|
||||||
|
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
||||||
|
in += num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtblk_add_req(vbr, out, in);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtblk_bio_send_data_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct virtblk_req *vbr;
|
||||||
|
|
||||||
|
vbr = container_of(work, struct virtblk_req, work);
|
||||||
|
|
||||||
|
virtblk_bio_send_data(vbr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtblk_bio_send_flush_work(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct virtblk_req *vbr;
|
||||||
|
|
||||||
|
vbr = container_of(work, struct virtblk_req, work);
|
||||||
|
|
||||||
|
virtblk_bio_send_flush(vbr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtblk_request_done(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
struct request *req = vbr->req;
|
||||||
|
int error = virtblk_result(vbr);
|
||||||
|
|
||||||
|
if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
|
||||||
|
req->resid_len = vbr->in_hdr.residual;
|
||||||
|
req->sense_len = vbr->in_hdr.sense_len;
|
||||||
|
req->errors = vbr->in_hdr.errors;
|
||||||
|
} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
|
||||||
|
req->errors = (error != 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__blk_end_request_all(req, error);
|
||||||
|
mempool_free(vbr, vblk->pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
|
||||||
|
if (vbr->flags & VBLK_REQ_DATA) {
|
||||||
|
/* Send out the actual write data */
|
||||||
|
INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
|
||||||
|
queue_work(virtblk_wq, &vbr->work);
|
||||||
|
} else {
|
||||||
|
bio_endio(vbr->bio, virtblk_result(vbr));
|
||||||
mempool_free(vbr, vblk->pool);
|
mempool_free(vbr, vblk->pool);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
|
||||||
|
if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
|
||||||
|
/* Send out a flush before end the bio */
|
||||||
|
vbr->flags &= ~VBLK_REQ_DATA;
|
||||||
|
INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
|
||||||
|
queue_work(virtblk_wq, &vbr->work);
|
||||||
|
} else {
|
||||||
|
bio_endio(vbr->bio, virtblk_result(vbr));
|
||||||
|
mempool_free(vbr, vblk->pool);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtblk_bio_done(struct virtblk_req *vbr)
|
||||||
|
{
|
||||||
|
if (unlikely(vbr->flags & VBLK_IS_FLUSH))
|
||||||
|
virtblk_bio_flush_done(vbr);
|
||||||
|
else
|
||||||
|
virtblk_bio_data_done(vbr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtblk_done(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vq->vdev->priv;
|
||||||
|
bool bio_done = false, req_done = false;
|
||||||
|
struct virtblk_req *vbr;
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned int len;
|
||||||
|
|
||||||
|
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
|
||||||
|
do {
|
||||||
|
virtqueue_disable_cb(vq);
|
||||||
|
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
|
||||||
|
if (vbr->bio) {
|
||||||
|
virtblk_bio_done(vbr);
|
||||||
|
bio_done = true;
|
||||||
|
} else {
|
||||||
|
virtblk_request_done(vbr);
|
||||||
|
req_done = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (!virtqueue_enable_cb(vq));
|
||||||
/* In case queue is stopped waiting for more buffers. */
|
/* In case queue is stopped waiting for more buffers. */
|
||||||
blk_start_queue(vblk->disk->queue);
|
if (req_done)
|
||||||
|
blk_start_queue(vblk->disk->queue);
|
||||||
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
|
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
|
||||||
|
|
||||||
|
if (bio_done)
|
||||||
|
wake_up(&vblk->queue_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
||||||
@@ -106,13 +301,13 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
|||||||
unsigned long num, out = 0, in = 0;
|
unsigned long num, out = 0, in = 0;
|
||||||
struct virtblk_req *vbr;
|
struct virtblk_req *vbr;
|
||||||
|
|
||||||
vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
|
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
|
||||||
if (!vbr)
|
if (!vbr)
|
||||||
/* When another request finishes we'll try again. */
|
/* When another request finishes we'll try again. */
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
vbr->req = req;
|
vbr->req = req;
|
||||||
|
vbr->bio = NULL;
|
||||||
if (req->cmd_flags & REQ_FLUSH) {
|
if (req->cmd_flags & REQ_FLUSH) {
|
||||||
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
|
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
|
||||||
vbr->out_hdr.sector = 0;
|
vbr->out_hdr.sector = 0;
|
||||||
@@ -172,7 +367,8 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
|
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
|
||||||
|
GFP_ATOMIC) < 0) {
|
||||||
mempool_free(vbr, vblk->pool);
|
mempool_free(vbr, vblk->pool);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -180,7 +376,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_virtblk_request(struct request_queue *q)
|
static void virtblk_request(struct request_queue *q)
|
||||||
{
|
{
|
||||||
struct virtio_blk *vblk = q->queuedata;
|
struct virtio_blk *vblk = q->queuedata;
|
||||||
struct request *req;
|
struct request *req;
|
||||||
@@ -203,6 +399,34 @@ static void do_virtblk_request(struct request_queue *q)
|
|||||||
virtqueue_kick(vblk->vq);
|
virtqueue_kick(vblk->vq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void virtblk_make_request(struct request_queue *q, struct bio *bio)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = q->queuedata;
|
||||||
|
struct virtblk_req *vbr;
|
||||||
|
|
||||||
|
BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
|
||||||
|
|
||||||
|
vbr = virtblk_alloc_req(vblk, GFP_NOIO);
|
||||||
|
if (!vbr) {
|
||||||
|
bio_endio(bio, -ENOMEM);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vbr->bio = bio;
|
||||||
|
vbr->flags = 0;
|
||||||
|
if (bio->bi_rw & REQ_FLUSH)
|
||||||
|
vbr->flags |= VBLK_REQ_FLUSH;
|
||||||
|
if (bio->bi_rw & REQ_FUA)
|
||||||
|
vbr->flags |= VBLK_REQ_FUA;
|
||||||
|
if (bio->bi_size)
|
||||||
|
vbr->flags |= VBLK_REQ_DATA;
|
||||||
|
|
||||||
|
if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
|
||||||
|
virtblk_bio_send_flush(vbr);
|
||||||
|
else
|
||||||
|
virtblk_bio_send_data(vbr);
|
||||||
|
}
|
||||||
|
|
||||||
/* return id (s/n) string for *disk to *id_str
|
/* return id (s/n) string for *disk to *id_str
|
||||||
*/
|
*/
|
||||||
static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
static int virtblk_get_id(struct gendisk *disk, char *id_str)
|
||||||
@@ -360,7 +584,7 @@ static int init_vq(struct virtio_blk *vblk)
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
/* We expect one virtqueue, for output. */
|
/* We expect one virtqueue, for output. */
|
||||||
vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
|
vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
|
||||||
if (IS_ERR(vblk->vq))
|
if (IS_ERR(vblk->vq))
|
||||||
err = PTR_ERR(vblk->vq);
|
err = PTR_ERR(vblk->vq);
|
||||||
|
|
||||||
@@ -477,6 +701,8 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
|||||||
struct virtio_blk *vblk;
|
struct virtio_blk *vblk;
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
int err, index;
|
int err, index;
|
||||||
|
int pool_size;
|
||||||
|
|
||||||
u64 cap;
|
u64 cap;
|
||||||
u32 v, blk_size, sg_elems, opt_io_size;
|
u32 v, blk_size, sg_elems, opt_io_size;
|
||||||
u16 min_io_size;
|
u16 min_io_size;
|
||||||
@@ -506,10 +732,12 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
|||||||
goto out_free_index;
|
goto out_free_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
init_waitqueue_head(&vblk->queue_wait);
|
||||||
vblk->vdev = vdev;
|
vblk->vdev = vdev;
|
||||||
vblk->sg_elems = sg_elems;
|
vblk->sg_elems = sg_elems;
|
||||||
sg_init_table(vblk->sg, vblk->sg_elems);
|
sg_init_table(vblk->sg, vblk->sg_elems);
|
||||||
mutex_init(&vblk->config_lock);
|
mutex_init(&vblk->config_lock);
|
||||||
|
|
||||||
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
|
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
|
||||||
vblk->config_enable = true;
|
vblk->config_enable = true;
|
||||||
|
|
||||||
@@ -517,7 +745,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
|||||||
if (err)
|
if (err)
|
||||||
goto out_free_vblk;
|
goto out_free_vblk;
|
||||||
|
|
||||||
vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
|
pool_size = sizeof(struct virtblk_req);
|
||||||
|
if (use_bio)
|
||||||
|
pool_size += sizeof(struct scatterlist) * sg_elems;
|
||||||
|
vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
|
||||||
if (!vblk->pool) {
|
if (!vblk->pool) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out_free_vq;
|
goto out_free_vq;
|
||||||
@@ -530,12 +761,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
|||||||
goto out_mempool;
|
goto out_mempool;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = vblk->disk->queue = blk_init_queue(do_virtblk_request, NULL);
|
q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
|
||||||
if (!q) {
|
if (!q) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out_put_disk;
|
goto out_put_disk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_bio)
|
||||||
|
blk_queue_make_request(q, virtblk_make_request);
|
||||||
q->queuedata = vblk;
|
q->queuedata = vblk;
|
||||||
|
|
||||||
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
|
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
|
||||||
@@ -620,7 +853,6 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
|
|||||||
if (!err && opt_io_size)
|
if (!err && opt_io_size)
|
||||||
blk_queue_io_opt(q, blk_size * opt_io_size);
|
blk_queue_io_opt(q, blk_size * opt_io_size);
|
||||||
|
|
||||||
|
|
||||||
add_disk(vblk->disk);
|
add_disk(vblk->disk);
|
||||||
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
|
err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
|
||||||
if (err)
|
if (err)
|
||||||
|
@@ -24,6 +24,8 @@
|
|||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/freezer.h>
|
#include <linux/freezer.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
#include <linux/splice.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/poll.h>
|
#include <linux/poll.h>
|
||||||
@@ -474,26 +476,53 @@ static ssize_t send_control_msg(struct port *port, unsigned int event,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct buffer_token {
|
||||||
|
union {
|
||||||
|
void *buf;
|
||||||
|
struct scatterlist *sg;
|
||||||
|
} u;
|
||||||
|
/* If sgpages == 0 then buf is used, else sg is used */
|
||||||
|
unsigned int sgpages;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void reclaim_sg_pages(struct scatterlist *sg, unsigned int nrpages)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
for (i = 0; i < nrpages; i++) {
|
||||||
|
page = sg_page(&sg[i]);
|
||||||
|
if (!page)
|
||||||
|
break;
|
||||||
|
put_page(page);
|
||||||
|
}
|
||||||
|
kfree(sg);
|
||||||
|
}
|
||||||
|
|
||||||
/* Callers must take the port->outvq_lock */
|
/* Callers must take the port->outvq_lock */
|
||||||
static void reclaim_consumed_buffers(struct port *port)
|
static void reclaim_consumed_buffers(struct port *port)
|
||||||
{
|
{
|
||||||
void *buf;
|
struct buffer_token *tok;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
|
||||||
if (!port->portdev) {
|
if (!port->portdev) {
|
||||||
/* Device has been unplugged. vqs are already gone. */
|
/* Device has been unplugged. vqs are already gone. */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
|
while ((tok = virtqueue_get_buf(port->out_vq, &len))) {
|
||||||
kfree(buf);
|
if (tok->sgpages)
|
||||||
|
reclaim_sg_pages(tok->u.sg, tok->sgpages);
|
||||||
|
else
|
||||||
|
kfree(tok->u.buf);
|
||||||
|
kfree(tok);
|
||||||
port->outvq_full = false;
|
port->outvq_full = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
|
static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
|
||||||
bool nonblock)
|
int nents, size_t in_count,
|
||||||
|
struct buffer_token *tok, bool nonblock)
|
||||||
{
|
{
|
||||||
struct scatterlist sg[1];
|
|
||||||
struct virtqueue *out_vq;
|
struct virtqueue *out_vq;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@@ -505,8 +534,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
|
|||||||
|
|
||||||
reclaim_consumed_buffers(port);
|
reclaim_consumed_buffers(port);
|
||||||
|
|
||||||
sg_init_one(sg, in_buf, in_count);
|
ret = virtqueue_add_buf(out_vq, sg, nents, 0, tok, GFP_ATOMIC);
|
||||||
ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
|
|
||||||
|
|
||||||
/* Tell Host to go! */
|
/* Tell Host to go! */
|
||||||
virtqueue_kick(out_vq);
|
virtqueue_kick(out_vq);
|
||||||
@@ -544,6 +572,37 @@ done:
|
|||||||
return in_count;
|
return in_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
|
||||||
|
bool nonblock)
|
||||||
|
{
|
||||||
|
struct scatterlist sg[1];
|
||||||
|
struct buffer_token *tok;
|
||||||
|
|
||||||
|
tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
|
||||||
|
if (!tok)
|
||||||
|
return -ENOMEM;
|
||||||
|
tok->sgpages = 0;
|
||||||
|
tok->u.buf = in_buf;
|
||||||
|
|
||||||
|
sg_init_one(sg, in_buf, in_count);
|
||||||
|
|
||||||
|
return __send_to_port(port, sg, 1, in_count, tok, nonblock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t send_pages(struct port *port, struct scatterlist *sg, int nents,
|
||||||
|
size_t in_count, bool nonblock)
|
||||||
|
{
|
||||||
|
struct buffer_token *tok;
|
||||||
|
|
||||||
|
tok = kmalloc(sizeof(*tok), GFP_ATOMIC);
|
||||||
|
if (!tok)
|
||||||
|
return -ENOMEM;
|
||||||
|
tok->sgpages = nents;
|
||||||
|
tok->u.sg = sg;
|
||||||
|
|
||||||
|
return __send_to_port(port, sg, nents, in_count, tok, nonblock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Give out the data that's requested from the buffer that we have
|
* Give out the data that's requested from the buffer that we have
|
||||||
* queued up.
|
* queued up.
|
||||||
@@ -665,6 +724,26 @@ static ssize_t port_fops_read(struct file *filp, char __user *ubuf,
|
|||||||
return fill_readbuf(port, ubuf, count, true);
|
return fill_readbuf(port, ubuf, count, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int wait_port_writable(struct port *port, bool nonblock)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (will_write_block(port)) {
|
||||||
|
if (nonblock)
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
ret = wait_event_freezable(port->waitqueue,
|
||||||
|
!will_write_block(port));
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
/* Port got hot-unplugged. */
|
||||||
|
if (!port->guest_connected)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
|
static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
|
||||||
size_t count, loff_t *offp)
|
size_t count, loff_t *offp)
|
||||||
{
|
{
|
||||||
@@ -681,18 +760,9 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf,
|
|||||||
|
|
||||||
nonblock = filp->f_flags & O_NONBLOCK;
|
nonblock = filp->f_flags & O_NONBLOCK;
|
||||||
|
|
||||||
if (will_write_block(port)) {
|
ret = wait_port_writable(port, nonblock);
|
||||||
if (nonblock)
|
if (ret < 0)
|
||||||
return -EAGAIN;
|
return ret;
|
||||||
|
|
||||||
ret = wait_event_freezable(port->waitqueue,
|
|
||||||
!will_write_block(port));
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
/* Port got hot-unplugged. */
|
|
||||||
if (!port->guest_connected)
|
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
count = min((size_t)(32 * 1024), count);
|
count = min((size_t)(32 * 1024), count);
|
||||||
|
|
||||||
@@ -725,6 +795,93 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct sg_list {
|
||||||
|
unsigned int n;
|
||||||
|
unsigned int size;
|
||||||
|
size_t len;
|
||||||
|
struct scatterlist *sg;
|
||||||
|
};
|
||||||
|
|
||||||
|
static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
|
||||||
|
struct splice_desc *sd)
|
||||||
|
{
|
||||||
|
struct sg_list *sgl = sd->u.data;
|
||||||
|
unsigned int offset, len;
|
||||||
|
|
||||||
|
if (sgl->n == sgl->size)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Try lock this page */
|
||||||
|
if (buf->ops->steal(pipe, buf) == 0) {
|
||||||
|
/* Get reference and unlock page for moving */
|
||||||
|
get_page(buf->page);
|
||||||
|
unlock_page(buf->page);
|
||||||
|
|
||||||
|
len = min(buf->len, sd->len);
|
||||||
|
sg_set_page(&(sgl->sg[sgl->n]), buf->page, len, buf->offset);
|
||||||
|
} else {
|
||||||
|
/* Failback to copying a page */
|
||||||
|
struct page *page = alloc_page(GFP_KERNEL);
|
||||||
|
char *src = buf->ops->map(pipe, buf, 1);
|
||||||
|
char *dst;
|
||||||
|
|
||||||
|
if (!page)
|
||||||
|
return -ENOMEM;
|
||||||
|
dst = kmap(page);
|
||||||
|
|
||||||
|
offset = sd->pos & ~PAGE_MASK;
|
||||||
|
|
||||||
|
len = sd->len;
|
||||||
|
if (len + offset > PAGE_SIZE)
|
||||||
|
len = PAGE_SIZE - offset;
|
||||||
|
|
||||||
|
memcpy(dst + offset, src + buf->offset, len);
|
||||||
|
|
||||||
|
kunmap(page);
|
||||||
|
buf->ops->unmap(pipe, buf, src);
|
||||||
|
|
||||||
|
sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);
|
||||||
|
}
|
||||||
|
sgl->n++;
|
||||||
|
sgl->len += len;
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Faster zero-copy write by splicing */
|
||||||
|
static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe,
|
||||||
|
struct file *filp, loff_t *ppos,
|
||||||
|
size_t len, unsigned int flags)
|
||||||
|
{
|
||||||
|
struct port *port = filp->private_data;
|
||||||
|
struct sg_list sgl;
|
||||||
|
ssize_t ret;
|
||||||
|
struct splice_desc sd = {
|
||||||
|
.total_len = len,
|
||||||
|
.flags = flags,
|
||||||
|
.pos = *ppos,
|
||||||
|
.u.data = &sgl,
|
||||||
|
};
|
||||||
|
|
||||||
|
ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
sgl.n = 0;
|
||||||
|
sgl.len = 0;
|
||||||
|
sgl.size = pipe->nrbufs;
|
||||||
|
sgl.sg = kmalloc(sizeof(struct scatterlist) * sgl.size, GFP_KERNEL);
|
||||||
|
if (unlikely(!sgl.sg))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
sg_init_table(sgl.sg, sgl.size);
|
||||||
|
ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
|
||||||
|
if (likely(ret > 0))
|
||||||
|
ret = send_pages(port, sgl.sg, sgl.n, sgl.len, true);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
|
static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
|
||||||
{
|
{
|
||||||
struct port *port;
|
struct port *port;
|
||||||
@@ -856,6 +1013,7 @@ static const struct file_operations port_fops = {
|
|||||||
.open = port_fops_open,
|
.open = port_fops_open,
|
||||||
.read = port_fops_read,
|
.read = port_fops_read,
|
||||||
.write = port_fops_write,
|
.write = port_fops_write,
|
||||||
|
.splice_write = port_fops_splice_write,
|
||||||
.poll = port_fops_poll,
|
.poll = port_fops_poll,
|
||||||
.release = port_fops_release,
|
.release = port_fops_release,
|
||||||
.fasync = port_fops_fasync,
|
.fasync = port_fops_fasync,
|
||||||
|
@@ -263,6 +263,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
|
|||||||
struct virtqueue *vq;
|
struct virtqueue *vq;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* We must have this many virtqueues. */
|
/* We must have this many virtqueues. */
|
||||||
if (index >= ldev->desc->num_vq)
|
if (index >= ldev->desc->num_vq)
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
@@ -296,7 +299,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
|
|||||||
* to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
|
* to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
|
||||||
* barriers.
|
* barriers.
|
||||||
*/
|
*/
|
||||||
vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
|
vq = vring_new_virtqueue(index, lvq->config.num, LGUEST_VRING_ALIGN, vdev,
|
||||||
true, lvq->pages, lg_notify, callback, name);
|
true, lvq->pages, lg_notify, callback, name);
|
||||||
if (!vq) {
|
if (!vq) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
@@ -84,6 +84,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
|
|||||||
if (id >= ARRAY_SIZE(rvdev->vring))
|
if (id >= ARRAY_SIZE(rvdev->vring))
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
ret = rproc_alloc_vring(rvdev, id);
|
ret = rproc_alloc_vring(rvdev, id);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
@@ -103,7 +106,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
|
|||||||
* Create the new vq, and tell virtio we're not interested in
|
* Create the new vq, and tell virtio we're not interested in
|
||||||
* the 'weak' smp barriers, since we're talking with a real device.
|
* the 'weak' smp barriers, since we're talking with a real device.
|
||||||
*/
|
*/
|
||||||
vq = vring_new_virtqueue(len, rvring->align, vdev, false, addr,
|
vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, addr,
|
||||||
rproc_virtio_notify, callback, name);
|
rproc_virtio_notify, callback, name);
|
||||||
if (!vq) {
|
if (!vq) {
|
||||||
dev_err(dev, "vring_new_virtqueue %s failed\n", name);
|
dev_err(dev, "vring_new_virtqueue %s failed\n", name);
|
||||||
|
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)"
|
|||||||
config RPMSG
|
config RPMSG
|
||||||
tristate
|
tristate
|
||||||
select VIRTIO
|
select VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
depends on EXPERIMENTAL
|
depends on EXPERIMENTAL
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
@@ -190,6 +190,9 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
|
|||||||
if (index >= kdev->desc->num_vq)
|
if (index >= kdev->desc->num_vq)
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
config = kvm_vq_config(kdev->desc)+index;
|
config = kvm_vq_config(kdev->desc)+index;
|
||||||
|
|
||||||
err = vmem_add_mapping(config->address,
|
err = vmem_add_mapping(config->address,
|
||||||
@@ -198,7 +201,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
|
|||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
|
vq = vring_new_virtqueue(index, config->num, KVM_S390_VIRTIO_RING_ALIGN,
|
||||||
vdev, true, (void *) config->address,
|
vdev, true, (void *) config->address,
|
||||||
kvm_notify, callback, name);
|
kvm_notify, callback, name);
|
||||||
if (!vq) {
|
if (!vq) {
|
||||||
|
@@ -1,11 +1,9 @@
|
|||||||
# Virtio always gets selected by whoever wants it.
|
|
||||||
config VIRTIO
|
config VIRTIO
|
||||||
tristate
|
tristate
|
||||||
|
---help---
|
||||||
# Similarly the virtio ring implementation.
|
This option is selected by any driver which implements the virtio
|
||||||
config VIRTIO_RING
|
bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
|
||||||
tristate
|
CONFIG_RPMSG or CONFIG_S390_GUEST.
|
||||||
depends on VIRTIO
|
|
||||||
|
|
||||||
menu "Virtio drivers"
|
menu "Virtio drivers"
|
||||||
|
|
||||||
@@ -13,7 +11,6 @@ config VIRTIO_PCI
|
|||||||
tristate "PCI driver for virtio devices (EXPERIMENTAL)"
|
tristate "PCI driver for virtio devices (EXPERIMENTAL)"
|
||||||
depends on PCI && EXPERIMENTAL
|
depends on PCI && EXPERIMENTAL
|
||||||
select VIRTIO
|
select VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
---help---
|
---help---
|
||||||
This drivers provides support for virtio based paravirtual device
|
This drivers provides support for virtio based paravirtual device
|
||||||
drivers over PCI. This requires that your VMM has appropriate PCI
|
drivers over PCI. This requires that your VMM has appropriate PCI
|
||||||
@@ -26,9 +23,8 @@ config VIRTIO_PCI
|
|||||||
If unsure, say M.
|
If unsure, say M.
|
||||||
|
|
||||||
config VIRTIO_BALLOON
|
config VIRTIO_BALLOON
|
||||||
tristate "Virtio balloon driver (EXPERIMENTAL)"
|
tristate "Virtio balloon driver"
|
||||||
select VIRTIO
|
depends on VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
---help---
|
---help---
|
||||||
This driver supports increasing and decreasing the amount
|
This driver supports increasing and decreasing the amount
|
||||||
of memory within a KVM guest.
|
of memory within a KVM guest.
|
||||||
@@ -39,7 +35,6 @@ config VIRTIO_BALLOON
|
|||||||
tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
|
tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
|
||||||
depends on HAS_IOMEM && EXPERIMENTAL
|
depends on HAS_IOMEM && EXPERIMENTAL
|
||||||
select VIRTIO
|
select VIRTIO
|
||||||
select VIRTIO_RING
|
|
||||||
---help---
|
---help---
|
||||||
This drivers provides support for memory mapped virtio
|
This drivers provides support for memory mapped virtio
|
||||||
platform device driver.
|
platform device driver.
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
obj-$(CONFIG_VIRTIO) += virtio.o
|
obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
|
||||||
obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
|
|
||||||
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
|
obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
|
||||||
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
|
obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
|
||||||
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
|
obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
|
||||||
|
@@ -159,7 +159,7 @@ static int virtio_dev_remove(struct device *_d)
|
|||||||
drv->remove(dev);
|
drv->remove(dev);
|
||||||
|
|
||||||
/* Driver should have reset device. */
|
/* Driver should have reset device. */
|
||||||
BUG_ON(dev->config->get_status(dev));
|
WARN_ON_ONCE(dev->config->get_status(dev));
|
||||||
|
|
||||||
/* Acknowledge the device's existence again. */
|
/* Acknowledge the device's existence again. */
|
||||||
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
|
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
|
||||||
|
@@ -131,9 +131,6 @@ struct virtio_mmio_vq_info {
|
|||||||
/* the number of entries in the queue */
|
/* the number of entries in the queue */
|
||||||
unsigned int num;
|
unsigned int num;
|
||||||
|
|
||||||
/* the index of the queue */
|
|
||||||
int queue_index;
|
|
||||||
|
|
||||||
/* the virtual address of the ring queue */
|
/* the virtual address of the ring queue */
|
||||||
void *queue;
|
void *queue;
|
||||||
|
|
||||||
@@ -225,11 +222,10 @@ static void vm_reset(struct virtio_device *vdev)
|
|||||||
static void vm_notify(struct virtqueue *vq)
|
static void vm_notify(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
|
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
|
||||||
struct virtio_mmio_vq_info *info = vq->priv;
|
|
||||||
|
|
||||||
/* We write the queue's selector into the notification register to
|
/* We write the queue's selector into the notification register to
|
||||||
* signal the other end */
|
* signal the other end */
|
||||||
writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
|
writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Notify all virtqueues on an interrupt. */
|
/* Notify all virtqueues on an interrupt. */
|
||||||
@@ -270,6 +266,7 @@ static void vm_del_vq(struct virtqueue *vq)
|
|||||||
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
|
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
|
||||||
struct virtio_mmio_vq_info *info = vq->priv;
|
struct virtio_mmio_vq_info *info = vq->priv;
|
||||||
unsigned long flags, size;
|
unsigned long flags, size;
|
||||||
|
unsigned int index = virtqueue_get_queue_index(vq);
|
||||||
|
|
||||||
spin_lock_irqsave(&vm_dev->lock, flags);
|
spin_lock_irqsave(&vm_dev->lock, flags);
|
||||||
list_del(&info->node);
|
list_del(&info->node);
|
||||||
@@ -278,7 +275,7 @@ static void vm_del_vq(struct virtqueue *vq)
|
|||||||
vring_del_virtqueue(vq);
|
vring_del_virtqueue(vq);
|
||||||
|
|
||||||
/* Select and deactivate the queue */
|
/* Select and deactivate the queue */
|
||||||
writel(info->queue_index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
|
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
|
||||||
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
|
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
|
||||||
|
|
||||||
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
|
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
|
||||||
@@ -309,6 +306,9 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
unsigned long flags, size;
|
unsigned long flags, size;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (!name)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
/* Select the queue we're interested in */
|
/* Select the queue we're interested in */
|
||||||
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
|
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
|
||||||
|
|
||||||
@@ -324,7 +324,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto error_kmalloc;
|
goto error_kmalloc;
|
||||||
}
|
}
|
||||||
info->queue_index = index;
|
|
||||||
|
|
||||||
/* Allocate pages for the queue - start with a queue as big as
|
/* Allocate pages for the queue - start with a queue as big as
|
||||||
* possible (limited by maximum size allowed by device), drop down
|
* possible (limited by maximum size allowed by device), drop down
|
||||||
@@ -332,11 +331,21 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
* and two rings (which makes it "alignment_size * 2")
|
* and two rings (which makes it "alignment_size * 2")
|
||||||
*/
|
*/
|
||||||
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
|
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
|
||||||
|
|
||||||
|
/* If the device reports a 0 entry queue, we won't be able to
|
||||||
|
* use it to perform I/O, and vring_new_virtqueue() can't create
|
||||||
|
* empty queues anyway, so don't bother to set up the device.
|
||||||
|
*/
|
||||||
|
if (info->num == 0) {
|
||||||
|
err = -ENOENT;
|
||||||
|
goto error_alloc_pages;
|
||||||
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
size = PAGE_ALIGN(vring_size(info->num,
|
size = PAGE_ALIGN(vring_size(info->num,
|
||||||
VIRTIO_MMIO_VRING_ALIGN));
|
VIRTIO_MMIO_VRING_ALIGN));
|
||||||
/* Already smallest possible allocation? */
|
/* Did the last iter shrink the queue below minimum size? */
|
||||||
if (size <= VIRTIO_MMIO_VRING_ALIGN * 2) {
|
if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto error_alloc_pages;
|
goto error_alloc_pages;
|
||||||
}
|
}
|
||||||
@@ -356,7 +365,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
|
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
|
||||||
|
|
||||||
/* Create the vring */
|
/* Create the vring */
|
||||||
vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
|
vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
|
||||||
true, info->queue, vm_notify, callback, name);
|
true, info->queue, vm_notify, callback, name);
|
||||||
if (!vq) {
|
if (!vq) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
@@ -48,6 +48,7 @@ struct virtio_pci_device
|
|||||||
int msix_enabled;
|
int msix_enabled;
|
||||||
int intx_enabled;
|
int intx_enabled;
|
||||||
struct msix_entry *msix_entries;
|
struct msix_entry *msix_entries;
|
||||||
|
cpumask_var_t *msix_affinity_masks;
|
||||||
/* Name strings for interrupts. This size should be enough,
|
/* Name strings for interrupts. This size should be enough,
|
||||||
* and I'm too lazy to allocate each name separately. */
|
* and I'm too lazy to allocate each name separately. */
|
||||||
char (*msix_names)[256];
|
char (*msix_names)[256];
|
||||||
@@ -79,9 +80,6 @@ struct virtio_pci_vq_info
|
|||||||
/* the number of entries in the queue */
|
/* the number of entries in the queue */
|
||||||
int num;
|
int num;
|
||||||
|
|
||||||
/* the index of the queue */
|
|
||||||
int queue_index;
|
|
||||||
|
|
||||||
/* the virtual address of the ring queue */
|
/* the virtual address of the ring queue */
|
||||||
void *queue;
|
void *queue;
|
||||||
|
|
||||||
@@ -202,11 +200,11 @@ static void vp_reset(struct virtio_device *vdev)
|
|||||||
static void vp_notify(struct virtqueue *vq)
|
static void vp_notify(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
|
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
|
||||||
struct virtio_pci_vq_info *info = vq->priv;
|
|
||||||
|
|
||||||
/* we write the queue's selector into the notification register to
|
/* we write the queue's selector into the notification register to
|
||||||
* signal the other end */
|
* signal the other end */
|
||||||
iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
|
iowrite16(virtqueue_get_queue_index(vq),
|
||||||
|
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle a configuration change: Tell driver if it wants to know. */
|
/* Handle a configuration change: Tell driver if it wants to know. */
|
||||||
@@ -279,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev)
|
|||||||
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
|
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
|
||||||
free_irq(vp_dev->msix_entries[i].vector, vp_dev);
|
free_irq(vp_dev->msix_entries[i].vector, vp_dev);
|
||||||
|
|
||||||
|
for (i = 0; i < vp_dev->msix_vectors; i++)
|
||||||
|
if (vp_dev->msix_affinity_masks[i])
|
||||||
|
free_cpumask_var(vp_dev->msix_affinity_masks[i]);
|
||||||
|
|
||||||
if (vp_dev->msix_enabled) {
|
if (vp_dev->msix_enabled) {
|
||||||
/* Disable the vector used for configuration */
|
/* Disable the vector used for configuration */
|
||||||
iowrite16(VIRTIO_MSI_NO_VECTOR,
|
iowrite16(VIRTIO_MSI_NO_VECTOR,
|
||||||
@@ -296,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev)
|
|||||||
vp_dev->msix_names = NULL;
|
vp_dev->msix_names = NULL;
|
||||||
kfree(vp_dev->msix_entries);
|
kfree(vp_dev->msix_entries);
|
||||||
vp_dev->msix_entries = NULL;
|
vp_dev->msix_entries = NULL;
|
||||||
|
kfree(vp_dev->msix_affinity_masks);
|
||||||
|
vp_dev->msix_affinity_masks = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
|
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
|
||||||
@@ -314,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
|
|||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!vp_dev->msix_names)
|
if (!vp_dev->msix_names)
|
||||||
goto error;
|
goto error;
|
||||||
|
vp_dev->msix_affinity_masks
|
||||||
|
= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!vp_dev->msix_affinity_masks)
|
||||||
|
goto error;
|
||||||
|
for (i = 0; i < nvectors; ++i)
|
||||||
|
if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
|
||||||
|
GFP_KERNEL))
|
||||||
|
goto error;
|
||||||
|
|
||||||
for (i = 0; i < nvectors; ++i)
|
for (i = 0; i < nvectors; ++i)
|
||||||
vp_dev->msix_entries[i].entry = i;
|
vp_dev->msix_entries[i].entry = i;
|
||||||
@@ -402,7 +415,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
if (!info)
|
if (!info)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
info->queue_index = index;
|
|
||||||
info->num = num;
|
info->num = num;
|
||||||
info->msix_vector = msix_vec;
|
info->msix_vector = msix_vec;
|
||||||
|
|
||||||
@@ -418,7 +430,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
|
|||||||
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
|
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
|
||||||
|
|
||||||
/* create the vring */
|
/* create the vring */
|
||||||
vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
|
vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
|
||||||
true, info->queue, vp_notify, callback, name);
|
true, info->queue, vp_notify, callback, name);
|
||||||
if (!vq) {
|
if (!vq) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
@@ -467,7 +479,8 @@ static void vp_del_vq(struct virtqueue *vq)
|
|||||||
list_del(&info->node);
|
list_del(&info->node);
|
||||||
spin_unlock_irqrestore(&vp_dev->lock, flags);
|
spin_unlock_irqrestore(&vp_dev->lock, flags);
|
||||||
|
|
||||||
iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
|
iowrite16(virtqueue_get_queue_index(vq),
|
||||||
|
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
|
||||||
|
|
||||||
if (vp_dev->msix_enabled) {
|
if (vp_dev->msix_enabled) {
|
||||||
iowrite16(VIRTIO_MSI_NO_VECTOR,
|
iowrite16(VIRTIO_MSI_NO_VECTOR,
|
||||||
@@ -542,7 +555,10 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
|
|||||||
vp_dev->per_vq_vectors = per_vq_vectors;
|
vp_dev->per_vq_vectors = per_vq_vectors;
|
||||||
allocated_vectors = vp_dev->msix_used_vectors;
|
allocated_vectors = vp_dev->msix_used_vectors;
|
||||||
for (i = 0; i < nvqs; ++i) {
|
for (i = 0; i < nvqs; ++i) {
|
||||||
if (!callbacks[i] || !vp_dev->msix_enabled)
|
if (!names[i]) {
|
||||||
|
vqs[i] = NULL;
|
||||||
|
continue;
|
||||||
|
} else if (!callbacks[i] || !vp_dev->msix_enabled)
|
||||||
msix_vec = VIRTIO_MSI_NO_VECTOR;
|
msix_vec = VIRTIO_MSI_NO_VECTOR;
|
||||||
else if (vp_dev->per_vq_vectors)
|
else if (vp_dev->per_vq_vectors)
|
||||||
msix_vec = allocated_vectors++;
|
msix_vec = allocated_vectors++;
|
||||||
@@ -609,6 +625,35 @@ static const char *vp_bus_name(struct virtio_device *vdev)
|
|||||||
return pci_name(vp_dev->pci_dev);
|
return pci_name(vp_dev->pci_dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Setup the affinity for a virtqueue:
|
||||||
|
* - force the affinity for per vq vector
|
||||||
|
* - OR over all affinities for shared MSI
|
||||||
|
* - ignore the affinity request if we're using INTX
|
||||||
|
*/
|
||||||
|
static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
|
||||||
|
{
|
||||||
|
struct virtio_device *vdev = vq->vdev;
|
||||||
|
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
|
||||||
|
struct virtio_pci_vq_info *info = vq->priv;
|
||||||
|
struct cpumask *mask;
|
||||||
|
unsigned int irq;
|
||||||
|
|
||||||
|
if (!vq->callback)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (vp_dev->msix_enabled) {
|
||||||
|
mask = vp_dev->msix_affinity_masks[info->msix_vector];
|
||||||
|
irq = vp_dev->msix_entries[info->msix_vector].vector;
|
||||||
|
if (cpu == -1)
|
||||||
|
irq_set_affinity_hint(irq, NULL);
|
||||||
|
else {
|
||||||
|
cpumask_set_cpu(cpu, mask);
|
||||||
|
irq_set_affinity_hint(irq, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static struct virtio_config_ops virtio_pci_config_ops = {
|
static struct virtio_config_ops virtio_pci_config_ops = {
|
||||||
.get = vp_get,
|
.get = vp_get,
|
||||||
.set = vp_set,
|
.set = vp_set,
|
||||||
@@ -620,6 +665,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
|
|||||||
.get_features = vp_get_features,
|
.get_features = vp_get_features,
|
||||||
.finalize_features = vp_finalize_features,
|
.finalize_features = vp_finalize_features,
|
||||||
.bus_name = vp_bus_name,
|
.bus_name = vp_bus_name,
|
||||||
|
.set_vq_affinity = vp_set_vq_affinity,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void virtio_pci_release_dev(struct device *_d)
|
static void virtio_pci_release_dev(struct device *_d)
|
||||||
@@ -673,8 +719,10 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
|
|||||||
goto out_enable_device;
|
goto out_enable_device;
|
||||||
|
|
||||||
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
|
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
|
||||||
if (vp_dev->ioaddr == NULL)
|
if (vp_dev->ioaddr == NULL) {
|
||||||
|
err = -ENOMEM;
|
||||||
goto out_req_regions;
|
goto out_req_regions;
|
||||||
|
}
|
||||||
|
|
||||||
pci_set_drvdata(pci_dev, vp_dev);
|
pci_set_drvdata(pci_dev, vp_dev);
|
||||||
pci_set_master(pci_dev);
|
pci_set_master(pci_dev);
|
||||||
|
@@ -106,6 +106,9 @@ struct vring_virtqueue
|
|||||||
/* How to notify other side. FIXME: commonalize hcalls! */
|
/* How to notify other side. FIXME: commonalize hcalls! */
|
||||||
void (*notify)(struct virtqueue *vq);
|
void (*notify)(struct virtqueue *vq);
|
||||||
|
|
||||||
|
/* Index of the queue */
|
||||||
|
int queue_index;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
/* They're supposed to lock for us. */
|
/* They're supposed to lock for us. */
|
||||||
unsigned int in_use;
|
unsigned int in_use;
|
||||||
@@ -171,6 +174,13 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
|
|||||||
return head;
|
return head;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int virtqueue_get_queue_index(struct virtqueue *_vq)
|
||||||
|
{
|
||||||
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||||
|
return vq->queue_index;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtqueue_add_buf - expose buffer to other end
|
* virtqueue_add_buf - expose buffer to other end
|
||||||
* @vq: the struct virtqueue we're talking about.
|
* @vq: the struct virtqueue we're talking about.
|
||||||
@@ -616,7 +626,8 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(vring_interrupt);
|
EXPORT_SYMBOL_GPL(vring_interrupt);
|
||||||
|
|
||||||
struct virtqueue *vring_new_virtqueue(unsigned int num,
|
struct virtqueue *vring_new_virtqueue(unsigned int index,
|
||||||
|
unsigned int num,
|
||||||
unsigned int vring_align,
|
unsigned int vring_align,
|
||||||
struct virtio_device *vdev,
|
struct virtio_device *vdev,
|
||||||
bool weak_barriers,
|
bool weak_barriers,
|
||||||
@@ -647,6 +658,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
|
|||||||
vq->broken = false;
|
vq->broken = false;
|
||||||
vq->last_used_idx = 0;
|
vq->last_used_idx = 0;
|
||||||
vq->num_added = 0;
|
vq->num_added = 0;
|
||||||
|
vq->queue_index = index;
|
||||||
list_add_tail(&vq->vq.list, &vdev->vqs);
|
list_add_tail(&vq->vq.list, &vdev->vqs);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
vq->in_use = false;
|
vq->in_use = false;
|
||||||
|
@@ -50,6 +50,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
|
|||||||
|
|
||||||
unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
|
unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
|
||||||
|
|
||||||
|
int virtqueue_get_queue_index(struct virtqueue *vq);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtio_device - representation of a device using virtio
|
* virtio_device - representation of a device using virtio
|
||||||
* @index: unique position on the virtio bus
|
* @index: unique position on the virtio bus
|
||||||
|
@@ -84,7 +84,9 @@
|
|||||||
* nvqs: the number of virtqueues to find
|
* nvqs: the number of virtqueues to find
|
||||||
* vqs: on success, includes new virtqueues
|
* vqs: on success, includes new virtqueues
|
||||||
* callbacks: array of callbacks, for each virtqueue
|
* callbacks: array of callbacks, for each virtqueue
|
||||||
|
* include a NULL entry for vqs that do not need a callback
|
||||||
* names: array of virtqueue names (mainly for debugging)
|
* names: array of virtqueue names (mainly for debugging)
|
||||||
|
* include a NULL entry for vqs unused by driver
|
||||||
* Returns 0 on success or error status
|
* Returns 0 on success or error status
|
||||||
* @del_vqs: free virtqueues found by find_vqs().
|
* @del_vqs: free virtqueues found by find_vqs().
|
||||||
* @get_features: get the array of feature bits for this device.
|
* @get_features: get the array of feature bits for this device.
|
||||||
@@ -98,6 +100,7 @@
|
|||||||
* vdev: the virtio_device
|
* vdev: the virtio_device
|
||||||
* This returns a pointer to the bus name a la pci_name from which
|
* This returns a pointer to the bus name a la pci_name from which
|
||||||
* the caller can then copy.
|
* the caller can then copy.
|
||||||
|
* @set_vq_affinity: set the affinity for a virtqueue.
|
||||||
*/
|
*/
|
||||||
typedef void vq_callback_t(struct virtqueue *);
|
typedef void vq_callback_t(struct virtqueue *);
|
||||||
struct virtio_config_ops {
|
struct virtio_config_ops {
|
||||||
@@ -116,6 +119,7 @@ struct virtio_config_ops {
|
|||||||
u32 (*get_features)(struct virtio_device *vdev);
|
u32 (*get_features)(struct virtio_device *vdev);
|
||||||
void (*finalize_features)(struct virtio_device *vdev);
|
void (*finalize_features)(struct virtio_device *vdev);
|
||||||
const char *(*bus_name)(struct virtio_device *vdev);
|
const char *(*bus_name)(struct virtio_device *vdev);
|
||||||
|
int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* If driver didn't advertise the feature, it will never appear. */
|
/* If driver didn't advertise the feature, it will never appear. */
|
||||||
@@ -190,5 +194,24 @@ const char *virtio_bus_name(struct virtio_device *vdev)
|
|||||||
return vdev->config->bus_name(vdev);
|
return vdev->config->bus_name(vdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* virtqueue_set_affinity - setting affinity for a virtqueue
|
||||||
|
* @vq: the virtqueue
|
||||||
|
* @cpu: the cpu no.
|
||||||
|
*
|
||||||
|
* Pay attention the function are best-effort: the affinity hint may not be set
|
||||||
|
* due to config support, irq type and sharing.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
|
||||||
|
{
|
||||||
|
struct virtio_device *vdev = vq->vdev;
|
||||||
|
if (vdev->config->set_vq_affinity)
|
||||||
|
return vdev->config->set_vq_affinity(vq, cpu);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _LINUX_VIRTIO_CONFIG_H */
|
#endif /* _LINUX_VIRTIO_CONFIG_H */
|
||||||
|
@@ -165,7 +165,8 @@ static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
|
|||||||
struct virtio_device;
|
struct virtio_device;
|
||||||
struct virtqueue;
|
struct virtqueue;
|
||||||
|
|
||||||
struct virtqueue *vring_new_virtqueue(unsigned int num,
|
struct virtqueue *vring_new_virtqueue(unsigned int index,
|
||||||
|
unsigned int num,
|
||||||
unsigned int vring_align,
|
unsigned int vring_align,
|
||||||
struct virtio_device *vdev,
|
struct virtio_device *vdev,
|
||||||
bool weak_barriers,
|
bool weak_barriers,
|
||||||
|
@@ -4200,12 +4200,6 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
|
|||||||
buf->private = 0;
|
buf->private = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
|
|
||||||
struct pipe_buffer *buf)
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
|
static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
|
||||||
struct pipe_buffer *buf)
|
struct pipe_buffer *buf)
|
||||||
{
|
{
|
||||||
@@ -4221,7 +4215,7 @@ static const struct pipe_buf_operations buffer_pipe_buf_ops = {
|
|||||||
.unmap = generic_pipe_buf_unmap,
|
.unmap = generic_pipe_buf_unmap,
|
||||||
.confirm = generic_pipe_buf_confirm,
|
.confirm = generic_pipe_buf_confirm,
|
||||||
.release = buffer_pipe_buf_release,
|
.release = buffer_pipe_buf_release,
|
||||||
.steal = buffer_pipe_buf_steal,
|
.steal = generic_pipe_buf_steal,
|
||||||
.get = buffer_pipe_buf_get,
|
.get = buffer_pipe_buf_get,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type)
|
|||||||
dev->feature_len = 0;
|
dev->feature_len = 0;
|
||||||
dev->num_vq = 0;
|
dev->num_vq = 0;
|
||||||
dev->running = false;
|
dev->running = false;
|
||||||
|
dev->next = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Append to device list. Prepending to a single-linked list is
|
* Append to device list. Prepending to a single-linked list is
|
||||||
|
13
tools/virtio/virtio-trace/Makefile
Normal file
13
tools/virtio/virtio-trace/Makefile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
CC = gcc
|
||||||
|
CFLAGS = -O2 -Wall -pthread
|
||||||
|
|
||||||
|
all: trace-agent
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -c $^ -o $@
|
||||||
|
|
||||||
|
trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
|
||||||
|
$(CC) $(CFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *.o trace-agent
|
118
tools/virtio/virtio-trace/README
Normal file
118
tools/virtio/virtio-trace/README
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
Trace Agent for virtio-trace
|
||||||
|
============================
|
||||||
|
|
||||||
|
Trace agent is a user tool for sending trace data of a guest to a Host in low
|
||||||
|
overhead. Trace agent has the following functions:
|
||||||
|
- splice a page of ring-buffer to read_pipe without memory copying
|
||||||
|
- splice the page from write_pipe to virtio-console without memory copying
|
||||||
|
- write trace data to stdout by using -o option
|
||||||
|
- controlled by start/stop orders from a Host
|
||||||
|
|
||||||
|
The trace agent operates as follows:
|
||||||
|
1) Initialize all structures.
|
||||||
|
2) Create a read/write thread per CPU. Each thread is bound to a CPU.
|
||||||
|
The read/write threads hold it.
|
||||||
|
3) A controller thread does poll() for a start order of a host.
|
||||||
|
4) After the controller of the trace agent receives a start order from a host,
|
||||||
|
the controller wake read/write threads.
|
||||||
|
5) The read/write threads start to read trace data from ring-buffers and
|
||||||
|
write the data to virtio-serial.
|
||||||
|
6) If the controller receives a stop order from a host, the read/write threads
|
||||||
|
stop to read trace data.
|
||||||
|
|
||||||
|
|
||||||
|
Files
|
||||||
|
=====
|
||||||
|
|
||||||
|
README: this file
|
||||||
|
Makefile: Makefile of trace agent for virtio-trace
|
||||||
|
trace-agent.c: includes main function, sets up for operating trace agent
|
||||||
|
trace-agent.h: includes all structures and some macros
|
||||||
|
trace-agent-ctl.c: includes controller function for read/write threads
|
||||||
|
trace-agent-rw.c: includes read/write threads function
|
||||||
|
|
||||||
|
|
||||||
|
Setup
|
||||||
|
=====
|
||||||
|
|
||||||
|
To use this trace agent for virtio-trace, we need to prepare some virtio-serial
|
||||||
|
I/Fs.
|
||||||
|
|
||||||
|
1) Make FIFO in a host
|
||||||
|
virtio-trace uses virtio-serial pipe as trace data paths as to the number
|
||||||
|
of CPUs and a control path, so FIFO (named pipe) should be created as follows:
|
||||||
|
# mkdir /tmp/virtio-trace/
|
||||||
|
# mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
|
||||||
|
# mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
|
||||||
|
|
||||||
|
For example, if a guest use three CPUs, the names are
|
||||||
|
trace-path-cpu{0,1,2}.{in.out}
|
||||||
|
and
|
||||||
|
agent-ctl-path.{in,out}.
|
||||||
|
|
||||||
|
2) Set up of virtio-serial pipe in a host
|
||||||
|
Add qemu option to use virtio-serial pipe.
|
||||||
|
|
||||||
|
##virtio-serial device##
|
||||||
|
-device virtio-serial-pci,id=virtio-serial0\
|
||||||
|
##control path##
|
||||||
|
-chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
|
||||||
|
-device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
|
||||||
|
id=channel0,name=agent-ctl-path\
|
||||||
|
##data path##
|
||||||
|
-chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
|
||||||
|
-device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
|
||||||
|
id=channel1,name=trace-path-cpu0\
|
||||||
|
...
|
||||||
|
|
||||||
|
If you manage guests with libvirt, add the following tags to domain XML files.
|
||||||
|
Then, libvirt passes the same command option to qemu.
|
||||||
|
|
||||||
|
<channel type='pipe'>
|
||||||
|
<source path='/tmp/virtio-trace/agent-ctl-path'/>
|
||||||
|
<target type='virtio' name='agent-ctl-path'/>
|
||||||
|
<address type='virtio-serial' controller='0' bus='0' port='0'/>
|
||||||
|
</channel>
|
||||||
|
<channel type='pipe'>
|
||||||
|
<source path='/tmp/virtio-trace/trace-path-cpu0'/>
|
||||||
|
<target type='virtio' name='trace-path-cpu0'/>
|
||||||
|
<address type='virtio-serial' controller='0' bus='0' port='1'/>
|
||||||
|
</channel>
|
||||||
|
...
|
||||||
|
Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
|
||||||
|
example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
|
||||||
|
trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
|
||||||
|
|
||||||
|
3) Boot the guest
|
||||||
|
You can find some chardev in /dev/virtio-ports/ in the guest.
|
||||||
|
|
||||||
|
|
||||||
|
Run
|
||||||
|
===
|
||||||
|
|
||||||
|
0) Build trace agent in a guest
|
||||||
|
$ make
|
||||||
|
|
||||||
|
1) Enable ftrace in the guest
|
||||||
|
<Example>
|
||||||
|
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
|
||||||
|
|
||||||
|
2) Run trace agent in the guest
|
||||||
|
This agent must be operated as root.
|
||||||
|
# ./trace-agent
|
||||||
|
read/write threads in the agent wait for start order from host. If you add -o
|
||||||
|
option, trace data are output via stdout in the guest.
|
||||||
|
|
||||||
|
3) Open FIFO in a host
|
||||||
|
# cat /tmp/virtio-trace/trace-path-cpu0.out
|
||||||
|
If a host does not open these, trace data get stuck in buffers of virtio. Then,
|
||||||
|
the guest will stop by specification of chardev in QEMU. This blocking mode may
|
||||||
|
be solved in the future.
|
||||||
|
|
||||||
|
4) Start to read trace data by ordering from a host
|
||||||
|
A host injects read start order to the guest via virtio-serial.
|
||||||
|
# echo 1 > /tmp/virtio-trace/agent-ctl-path.in
|
||||||
|
|
||||||
|
5) Stop to read trace data by ordering from a host
|
||||||
|
A host injects read stop order to the guest via virtio-serial.
|
||||||
|
# echo 0 > /tmp/virtio-trace/agent-ctl-path.in
|
137
tools/virtio/virtio-trace/trace-agent-ctl.c
Normal file
137
tools/virtio/virtio-trace/trace-agent-ctl.c
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
* Controller of read/write threads for virtio-trace
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Hitachi, Ltd.
|
||||||
|
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
|
||||||
|
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
|
||||||
|
*
|
||||||
|
* Licensed under GPL version 2 only.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <poll.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "trace-agent.h"
|
||||||
|
|
||||||
|
#define HOST_MSG_SIZE 256
|
||||||
|
#define EVENT_WAIT_MSEC 100
|
||||||
|
|
||||||
|
static volatile sig_atomic_t global_signal_val;
|
||||||
|
bool global_sig_receive; /* default false */
|
||||||
|
bool global_run_operation; /* default false*/
|
||||||
|
|
||||||
|
/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
|
||||||
|
static void signal_handler(int sig)
|
||||||
|
{
|
||||||
|
global_signal_val = sig;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rw_ctl_init(const char *ctl_path)
|
||||||
|
{
|
||||||
|
int ctl_fd;
|
||||||
|
|
||||||
|
ctl_fd = open(ctl_path, O_RDONLY);
|
||||||
|
if (ctl_fd == -1) {
|
||||||
|
pr_err("Cannot open ctl_fd\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ctl_fd;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int wait_order(int ctl_fd)
|
||||||
|
{
|
||||||
|
struct pollfd poll_fd;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
while (!global_sig_receive) {
|
||||||
|
poll_fd.fd = ctl_fd;
|
||||||
|
poll_fd.events = POLLIN;
|
||||||
|
|
||||||
|
ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
|
||||||
|
|
||||||
|
if (global_signal_val) {
|
||||||
|
global_sig_receive = true;
|
||||||
|
pr_info("Receive interrupt %d\n", global_signal_val);
|
||||||
|
|
||||||
|
/* Wakes rw-threads when they are sleeping */
|
||||||
|
if (!global_run_operation)
|
||||||
|
pthread_cond_broadcast(&cond_wakeup);
|
||||||
|
|
||||||
|
ret = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_err("Polling error\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* contol read/write threads by handling global_run_operation
|
||||||
|
*/
|
||||||
|
void *rw_ctl_loop(int ctl_fd)
|
||||||
|
{
|
||||||
|
ssize_t rlen;
|
||||||
|
char buf[HOST_MSG_SIZE];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Setup signal handlers */
|
||||||
|
signal(SIGTERM, signal_handler);
|
||||||
|
signal(SIGINT, signal_handler);
|
||||||
|
signal(SIGQUIT, signal_handler);
|
||||||
|
|
||||||
|
while (!global_sig_receive) {
|
||||||
|
|
||||||
|
ret = wait_order(ctl_fd);
|
||||||
|
if (ret < 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
rlen = read(ctl_fd, buf, sizeof(buf));
|
||||||
|
if (rlen < 0) {
|
||||||
|
pr_err("read data error in ctl thread\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rlen == 2 && buf[0] == '1') {
|
||||||
|
/*
|
||||||
|
* If host writes '1' to a control path,
|
||||||
|
* this controller wakes all read/write threads.
|
||||||
|
*/
|
||||||
|
global_run_operation = true;
|
||||||
|
pthread_cond_broadcast(&cond_wakeup);
|
||||||
|
pr_debug("Wake up all read/write threads\n");
|
||||||
|
} else if (rlen == 2 && buf[0] == '0') {
|
||||||
|
/*
|
||||||
|
* If host writes '0' to a control path, read/write
|
||||||
|
* threads will wait for notification from Host.
|
||||||
|
*/
|
||||||
|
global_run_operation = false;
|
||||||
|
pr_debug("Stop all read/write threads\n");
|
||||||
|
} else
|
||||||
|
pr_info("Invalid host notification: %s\n", buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
192
tools/virtio/virtio-trace/trace-agent-rw.c
Normal file
192
tools/virtio/virtio-trace/trace-agent-rw.c
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
/*
|
||||||
|
* Read/write thread of a guest agent for virtio-trace
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Hitachi, Ltd.
|
||||||
|
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
|
||||||
|
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
|
||||||
|
*
|
||||||
|
* Licensed under GPL version 2 only.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include "trace-agent.h"
|
||||||
|
|
||||||
|
#define READ_WAIT_USEC 100000
|
||||||
|
|
||||||
|
void *rw_thread_info_new(void)
|
||||||
|
{
|
||||||
|
struct rw_thread_info *rw_ti;
|
||||||
|
|
||||||
|
rw_ti = zalloc(sizeof(struct rw_thread_info));
|
||||||
|
if (rw_ti == NULL) {
|
||||||
|
pr_err("rw_thread_info zalloc error\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
rw_ti->cpu_num = -1;
|
||||||
|
rw_ti->in_fd = -1;
|
||||||
|
rw_ti->out_fd = -1;
|
||||||
|
rw_ti->read_pipe = -1;
|
||||||
|
rw_ti->write_pipe = -1;
|
||||||
|
rw_ti->pipe_size = PIPE_INIT;
|
||||||
|
|
||||||
|
return rw_ti;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
|
||||||
|
bool stdout_flag, unsigned long pipe_size,
|
||||||
|
struct rw_thread_info *rw_ti)
|
||||||
|
{
|
||||||
|
int data_pipe[2];
|
||||||
|
|
||||||
|
rw_ti->cpu_num = cpu;
|
||||||
|
|
||||||
|
/* set read(input) fd */
|
||||||
|
rw_ti->in_fd = open(in_path, O_RDONLY);
|
||||||
|
if (rw_ti->in_fd == -1) {
|
||||||
|
pr_err("Could not open in_fd (CPU:%d)\n", cpu);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set write(output) fd */
|
||||||
|
if (!stdout_flag) {
|
||||||
|
/* virtio-serial output mode */
|
||||||
|
rw_ti->out_fd = open(out_path, O_WRONLY);
|
||||||
|
if (rw_ti->out_fd == -1) {
|
||||||
|
pr_err("Could not open out_fd (CPU:%d)\n", cpu);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
/* stdout mode */
|
||||||
|
rw_ti->out_fd = STDOUT_FILENO;
|
||||||
|
|
||||||
|
if (pipe2(data_pipe, O_NONBLOCK) < 0) {
|
||||||
|
pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Size of pipe is 64kB in default based on fs/pipe.c.
|
||||||
|
* To read/write trace data speedy, pipe size is changed.
|
||||||
|
*/
|
||||||
|
if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
|
||||||
|
pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
rw_ti->read_pipe = data_pipe[1];
|
||||||
|
rw_ti->write_pipe = data_pipe[0];
|
||||||
|
rw_ti->pipe_size = pipe_size;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bind a thread to a cpu */
|
||||||
|
static void bind_cpu(int cpu_num)
|
||||||
|
{
|
||||||
|
cpu_set_t mask;
|
||||||
|
|
||||||
|
CPU_ZERO(&mask);
|
||||||
|
CPU_SET(cpu_num, &mask);
|
||||||
|
|
||||||
|
/* bind my thread to cpu_num by assigning zero to the first argument */
|
||||||
|
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
|
||||||
|
pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *rw_thread_main(void *thread_info)
|
||||||
|
{
|
||||||
|
ssize_t rlen, wlen;
|
||||||
|
ssize_t ret;
|
||||||
|
struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
|
||||||
|
|
||||||
|
bind_cpu(ts->cpu_num);
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
/* Wait for a read order of trace data by Host OS */
|
||||||
|
if (!global_run_operation) {
|
||||||
|
pthread_mutex_lock(&mutex_notify);
|
||||||
|
pthread_cond_wait(&cond_wakeup, &mutex_notify);
|
||||||
|
pthread_mutex_unlock(&mutex_notify);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (global_sig_receive)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Each thread read trace_pipe_raw of each cpu bounding the
|
||||||
|
* thread, so contention of multi-threads does not occur.
|
||||||
|
*/
|
||||||
|
rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
|
||||||
|
ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
|
||||||
|
|
||||||
|
if (rlen < 0) {
|
||||||
|
pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
|
||||||
|
goto error;
|
||||||
|
} else if (rlen == 0) {
|
||||||
|
/*
|
||||||
|
* If trace data do not exist or are unreadable not
|
||||||
|
* for exceeding the page size, splice_read returns
|
||||||
|
* NULL. Then, this waits for being filled the data in a
|
||||||
|
* ring-buffer.
|
||||||
|
*/
|
||||||
|
usleep(READ_WAIT_USEC);
|
||||||
|
pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
wlen = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
|
||||||
|
rlen - wlen,
|
||||||
|
SPLICE_F_MOVE | SPLICE_F_MORE);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_err("Splice_write in rw-thread(%d)\n",
|
||||||
|
ts->cpu_num);
|
||||||
|
goto error;
|
||||||
|
} else if (ret == 0)
|
||||||
|
/*
|
||||||
|
* When host reader is not in time for reading
|
||||||
|
* trace data, guest will be stopped. This is
|
||||||
|
* because char dev in QEMU is not supported
|
||||||
|
* non-blocking mode. Then, writer might be
|
||||||
|
* sleep in that case.
|
||||||
|
* This sleep will be removed by supporting
|
||||||
|
* non-blocking mode.
|
||||||
|
*/
|
||||||
|
sleep(1);
|
||||||
|
wlen += ret;
|
||||||
|
} while (wlen < rlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
pthread_t rw_thread_per_cpu;
|
||||||
|
|
||||||
|
ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
|
||||||
|
if (ret != 0) {
|
||||||
|
pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rw_thread_per_cpu;
|
||||||
|
}
|
270
tools/virtio/virtio-trace/trace-agent.c
Normal file
270
tools/virtio/virtio-trace/trace-agent.c
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
/*
|
||||||
|
* Guest agent for virtio-trace
|
||||||
|
*
|
||||||
|
* Copyright (C) 2012 Hitachi, Ltd.
|
||||||
|
* Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
|
||||||
|
* Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
|
||||||
|
*
|
||||||
|
* Licensed under GPL version 2 only.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "trace-agent.h"
|
||||||
|
|
||||||
|
#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
|
||||||
|
#define PIPE_DEF_BUFS 16
|
||||||
|
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
|
||||||
|
#define PIPE_MAX_SIZE (1024*1024)
|
||||||
|
#define READ_PATH_FMT \
|
||||||
|
"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
|
||||||
|
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
|
||||||
|
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
|
||||||
|
|
||||||
|
pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
|
||||||
|
|
||||||
|
static int get_total_cpus(void)
|
||||||
|
{
|
||||||
|
int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
|
||||||
|
if (nr_cpus <= 0) {
|
||||||
|
pr_err("Could not read cpus\n");
|
||||||
|
goto error;
|
||||||
|
} else if (nr_cpus > MAX_CPUS) {
|
||||||
|
pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nr_cpus;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *agent_info_new(void)
|
||||||
|
{
|
||||||
|
struct agent_info *s;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
s = zalloc(sizeof(struct agent_info));
|
||||||
|
if (s == NULL) {
|
||||||
|
pr_err("agent_info zalloc error\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
s->pipe_size = PIPE_INIT;
|
||||||
|
s->use_stdout = false;
|
||||||
|
s->cpus = get_total_cpus();
|
||||||
|
s->ctl_fd = -1;
|
||||||
|
|
||||||
|
/* read/write threads init */
|
||||||
|
for (i = 0; i < s->cpus; i++)
|
||||||
|
s->rw_ti[i] = rw_thread_info_new();
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned long parse_size(const char *arg)
|
||||||
|
{
|
||||||
|
unsigned long value, round;
|
||||||
|
char *ptr;
|
||||||
|
|
||||||
|
value = strtoul(arg, &ptr, 10);
|
||||||
|
switch (*ptr) {
|
||||||
|
case 'K': case 'k':
|
||||||
|
value <<= 10;
|
||||||
|
break;
|
||||||
|
case 'M': case 'm':
|
||||||
|
value <<= 20;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value > PIPE_MAX_SIZE) {
|
||||||
|
pr_err("Pipe size must be less than 1MB\n");
|
||||||
|
goto error;
|
||||||
|
} else if (value < PIPE_MIN_SIZE) {
|
||||||
|
pr_err("Pipe size must be over 64KB\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Align buffer size with page unit */
|
||||||
|
round = value & (PAGE_SIZE - 1);
|
||||||
|
value = value - round;
|
||||||
|
|
||||||
|
return value;
|
||||||
|
error:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void usage(char const *prg)
|
||||||
|
{
|
||||||
|
pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *make_path(int cpu_num, bool this_is_write_path)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
char *buf;
|
||||||
|
|
||||||
|
buf = zalloc(PATH_MAX);
|
||||||
|
if (buf == NULL) {
|
||||||
|
pr_err("Could not allocate buffer\n");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this_is_write_path)
|
||||||
|
/* write(output) path */
|
||||||
|
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
|
||||||
|
else
|
||||||
|
/* read(input) path */
|
||||||
|
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
|
||||||
|
|
||||||
|
if (ret <= 0) {
|
||||||
|
pr_err("Failed to generate %s path(CPU#%d):%d\n",
|
||||||
|
this_is_write_path ? "read" : "write", cpu_num, ret);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
|
||||||
|
error:
|
||||||
|
free(buf);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *make_input_path(int cpu_num)
|
||||||
|
{
|
||||||
|
return make_path(cpu_num, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *make_output_path(int cpu_num)
|
||||||
|
{
|
||||||
|
return make_path(cpu_num, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *agent_info_init(struct agent_info *s)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
const char *in_path = NULL;
|
||||||
|
const char *out_path = NULL;
|
||||||
|
|
||||||
|
/* init read/write threads */
|
||||||
|
for (cpu = 0; cpu < s->cpus; cpu++) {
|
||||||
|
/* set read(input) path per read/write thread */
|
||||||
|
in_path = make_input_path(cpu);
|
||||||
|
if (in_path == NULL)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
/* set write(output) path per read/write thread*/
|
||||||
|
if (!s->use_stdout) {
|
||||||
|
out_path = make_output_path(cpu);
|
||||||
|
if (out_path == NULL)
|
||||||
|
goto error;
|
||||||
|
} else
|
||||||
|
/* stdout mode */
|
||||||
|
pr_debug("stdout mode\n");
|
||||||
|
|
||||||
|
rw_thread_init(cpu, in_path, out_path, s->use_stdout,
|
||||||
|
s->pipe_size, s->rw_ti[cpu]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* init controller of read/write threads */
|
||||||
|
s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *parse_args(int argc, char *argv[], struct agent_info *s)
|
||||||
|
{
|
||||||
|
int cmd;
|
||||||
|
unsigned long size;
|
||||||
|
|
||||||
|
while ((cmd = getopt(argc, argv, "hos:")) != -1) {
|
||||||
|
switch (cmd) {
|
||||||
|
/* stdout mode */
|
||||||
|
case 'o':
|
||||||
|
s->use_stdout = true;
|
||||||
|
break;
|
||||||
|
/* size of pipe */
|
||||||
|
case 's':
|
||||||
|
size = parse_size(optarg);
|
||||||
|
if (size == 0)
|
||||||
|
goto error;
|
||||||
|
s->pipe_size = size;
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
agent_info_init(s);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
error:
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void agent_main_loop(struct agent_info *s)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
pthread_t rw_thread_per_cpu[MAX_CPUS];
|
||||||
|
|
||||||
|
/* Start all read/write threads */
|
||||||
|
for (cpu = 0; cpu < s->cpus; cpu++)
|
||||||
|
rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
|
||||||
|
|
||||||
|
rw_ctl_loop(s->ctl_fd);
|
||||||
|
|
||||||
|
/* Finish all read/write threads */
|
||||||
|
for (cpu = 0; cpu < s->cpus; cpu++) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
|
||||||
|
if (ret != 0) {
|
||||||
|
pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void agent_info_free(struct agent_info *s)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
close(s->ctl_fd);
|
||||||
|
for (i = 0; i < s->cpus; i++) {
|
||||||
|
close(s->rw_ti[i]->in_fd);
|
||||||
|
close(s->rw_ti[i]->out_fd);
|
||||||
|
close(s->rw_ti[i]->read_pipe);
|
||||||
|
close(s->rw_ti[i]->write_pipe);
|
||||||
|
free(s->rw_ti[i]);
|
||||||
|
}
|
||||||
|
free(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
struct agent_info *s = NULL;
|
||||||
|
|
||||||
|
s = agent_info_new();
|
||||||
|
parse_args(argc, argv, s);
|
||||||
|
|
||||||
|
agent_main_loop(s);
|
||||||
|
|
||||||
|
agent_info_free(s);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
75
tools/virtio/virtio-trace/trace-agent.h
Normal file
75
tools/virtio/virtio-trace/trace-agent.h
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
#ifndef __TRACE_AGENT_H__
|
||||||
|
#define __TRACE_AGENT_H__
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#define MAX_CPUS 256
|
||||||
|
#define PIPE_INIT (1024*1024)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* agent_info - structure managing total information of guest agent
|
||||||
|
* @pipe_size: size of pipe (default 1MB)
|
||||||
|
* @use_stdout: set to true when o option is added (default false)
|
||||||
|
* @cpus: total number of CPUs
|
||||||
|
* @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
|
||||||
|
* @rw_ti: structure managing information of read/write threads
|
||||||
|
*/
|
||||||
|
struct agent_info {
|
||||||
|
unsigned long pipe_size;
|
||||||
|
bool use_stdout;
|
||||||
|
int cpus;
|
||||||
|
int ctl_fd;
|
||||||
|
struct rw_thread_info *rw_ti[MAX_CPUS];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rw_thread_info - structure managing a read/write thread a cpu
|
||||||
|
* @cpu_num: cpu number operating this read/write thread
|
||||||
|
* @in_fd: fd of reading trace data path in cpu_num
|
||||||
|
* @out_fd: fd of writing trace data path in cpu_num
|
||||||
|
* @read_pipe: fd of read pipe
|
||||||
|
* @write_pipe: fd of write pipe
|
||||||
|
* @pipe_size: size of pipe (default 1MB)
|
||||||
|
*/
|
||||||
|
struct rw_thread_info {
|
||||||
|
int cpu_num;
|
||||||
|
int in_fd;
|
||||||
|
int out_fd;
|
||||||
|
int read_pipe;
|
||||||
|
int write_pipe;
|
||||||
|
unsigned long pipe_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* use for stopping rw threads */
|
||||||
|
extern bool global_sig_receive;
|
||||||
|
|
||||||
|
/* use for notification */
|
||||||
|
extern bool global_run_operation;
|
||||||
|
extern pthread_mutex_t mutex_notify;
|
||||||
|
extern pthread_cond_t cond_wakeup;
|
||||||
|
|
||||||
|
/* for controller of read/write threads */
|
||||||
|
extern int rw_ctl_init(const char *ctl_path);
|
||||||
|
extern void *rw_ctl_loop(int ctl_fd);
|
||||||
|
|
||||||
|
/* for trace read/write thread */
|
||||||
|
extern void *rw_thread_info_new(void);
|
||||||
|
extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
|
||||||
|
bool stdout_flag, unsigned long pipe_size,
|
||||||
|
struct rw_thread_info *rw_ti);
|
||||||
|
extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
|
||||||
|
|
||||||
|
static inline void *zalloc(size_t size)
|
||||||
|
{
|
||||||
|
return calloc(1, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
|
||||||
|
#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define pr_debug(format, ...) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /*__TRACE_AGENT_H__*/
|
Reference in New Issue
Block a user