virtio-blkdev源码

块设备数据传输执行流程

virt块设备通信流程
当上层有IO请求时,块设备会经uk-blkdev-queue创建队列virtqueue用于数据传输:
1.virtio_blkdev_configure:分配队列所需内存空间
2.virtio_blkdev_queue_setup:设定队列的成员参数
3.virtio_blkdev_start:设备状态更新,可以与qemu进行通信
随后virtio块设备可以通知host,并进行上层请求的传输:
1.virtio_blkdev_request_set_sglist:将请求任务加入uk-blkdev-queue中存储
2.virtqueue_buffer_enqueue:将请求任务放入队列中
3.virtio_blkdev_submit_request:通知host有请求任务
当host完成请求任务后,将消息传回:
1.virtio_blkdev_queue_dequeue:将所有完成的请求出队
2.virtio_blkdev_complete_reqs:通知guest完成了请求
当所有IO请求完成,无需使用块设备时,块设备会销毁队列:
1.virtio_blkdev_stop:停止virtio块设备通信
2.virtio_blkdev_queue_release:释放队列成员
3.virtio_blkdev_unconfigure:释放队列所使用内存空间

块设备请求信息存入扇区链表

建立扇区链表sglist,将*virtio_blk_req的请求存放在unikraft块设备队列的sglist的段中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static int virtio_blkdev_request_set_sglist(struct uk_blkdev_queue *queue,
        struct virtio_blkdev_request *virtio_blk_req,
        __sector sector_size,
        bool have_data)
        // have_data:标记是否有数据需要存放,有数据为1
{
    struct virtio_blk_device *vbdev;
    struct uk_blkreq *req;
    size_t data_size = 0;
    size_t segment_size;
    size_t segment_max_size;
    size_t idx;
    uintptr_t start_data;
    int rc = 0;

    UK_ASSERT(queue);
    UK_ASSERT(virtio_blk_req);

    req = virtio_blk_req->req;
    vbdev = queue->vbd;
    start_data = (uintptr_t)req->aio_buf;
    data_size = req->nb_sectors * sector_size;
    segment_max_size = vbdev->max_size_segment;

    /* Prepare the sglist */
    uk_sglist_reset(&queue->sg);
    // 将virtio_blk_req->virtio_blk_outhdr添加到sglist中
    rc = uk_sglist_append(&queue->sg, &virtio_blk_req->virtio_blk_outhdr,
            sizeof(struct virtio_blk_outhdr));
    if (unlikely(rc != 0)) {
        uk_pr_err("Failed to append to sg list %d\n", rc);
        goto out;
    }

    /* Append to sglist chunks of `segment_max_size` size
     * Only for read / write operations
     **/
    if (have_data)
        // 每次申请段的大小不能超过segment_max_size
        // 从start_data+0开始申请段,每次增加segment_max_size,直到data_size
        for (idx = 0; idx < data_size; idx += segment_max_size) {
            segment_size = data_size - idx;  // 查看剩余部分是不是小于segment_max_size
            segment_size = (segment_size > segment_max_size) ?
                    segment_max_size : segment_size
            rc = uk_sglist_append(&queue->sg,
                    (void *)(start_data + idx),
                    segment_size);
            if (unlikely(rc != 0)) {
                uk_pr_err("Failed to append to sg list %d\n",
                        rc);
                goto out;
            }
        }

    // 将virtio_blk_req->status添加到sglist中
    rc = uk_sglist_append(&queue->sg, &virtio_blk_req->status,
            sizeof(uint8_t));
    if (unlikely(rc != 0)) {
        uk_pr_err("Failed to append to sg list %d\n", rc);
        goto out;
    }
out:
    return rc;
}

前端驱动准备好请求,并通知后端

该函数是unikraft块设备uk_blkdev成员submit_one的具体实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static int virtio_blkdev_submit_request(struct uk_blkdev *dev,
        struct uk_blkdev_queue *queue,
        struct uk_blkreq *req)
{
    int rc = 0;
    int status = 0x0;

    UK_ASSERT(req);
    UK_ASSERT(queue);
    UK_ASSERT(dev);

    rc = virtio_blkdev_queue_enqueue(queue, req);
    if (likely(rc >= 0)) {
        uk_pr_debug("Success and more descriptors available\n");
        status |= UK_BLKDEV_STATUS_SUCCESS;
        /**
         * Notify the host the new buffer.
         */
        virtqueue_host_notify(queue->vq);
        /**
         * When there is further space available in the ring
         * return UK_BLKDEV_STATUS_MORE.
         */
        status |= likely(rc > 0) ? UK_BLKDEV_STATUS_MORE : 0x0;
    } else if (rc == -ENOSPC) {
        uk_pr_debug("No more descriptors available\n");
        goto err;
    } else {
        uk_pr_err("Failed to enqueue descriptors into the ring: %d\n"
              rc);
        goto err;
    }

    return status;
err:
    return rc;
}

块设备请求的入队与出队

virtio-queue实现了环形缓冲区(ring buffer),用于保存前端驱动和后端处理程序执行的信息,并且它可以批量的方式保存前端驱动的多次I/O请求,并且交由后端去批量处理,减少虚拟运行环境的模式切换,从而提高Guest与hypervisor信息交换的效率。
virtqueue通信

请求事件加载到virtqueue

1.释放所有在uk_blkdev_queue中free_list所链接到的已完成的请求
2.分配virtio_blk_req空间存放uk_blkreq
3.根据请求操作类型(读、写、flush),分配读写段
4.将请求事件加入virtqueue中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
static int virtio_blkdev_queue_enqueue(struct uk_blkdev_queue *queue,
        struct uk_blkreq *req)
{
    struct virtio_blkdev_request *virtio_blk_req;
    __u16 write_segs = 0;
    __u16 read_segs = 0;
    int rc = 0;

    UK_ASSERT(queue);

    // 如果virtqueue不存在
    if (virtqueue_is_full(queue->vq)) {
        uk_pr_debug("The virtqueue is full\n");
        return -ENOSPC;
    }

    // 释放所有在queue->free_list中的请求(此类请求任务可能已经完成)
    virtio_blkdev_queue_cleanup_requests(queue);
    virtio_blk_req = uk_malloc(a, sizeof(*virtio_blk_req));
    if (!virtio_blk_req)
        return -ENOMEM;

    virtio_blk_req->req = req;
    virtio_blk_req->virtio_blk_outhdr.sector = req->start_sector;
    if (req->operation == UK_BLKREQ_WRITE ||
            req->operation == UK_BLKREQ_READ)
        rc = virtio_blkdev_request_write(queue, virtio_blk_req,
                &read_segs, &write_segs);
    else if (req->operation == UK_BLKREQ_FFLUSH)
        rc = virtio_blkdev_request_flush(queue, virtio_blk_req,
                &read_segs, &write_segs);
    else
        return -EINVAL;

    if (rc)
        goto out;

    // 将virtio_blk_req的请求放入virtqueue中(vring)
    rc = virtqueue_buffer_enqueue(queue->vq, virtio_blk_req, &queue->sg,
                      read_segs, write_segs);
out:
    return rc;
}
virtqueue_buffer_enqueue

具体流程如下:
1.由head_free_desc索引到空闲的vq_info[head_free_desc],用于链接要装载的请求
2.将uk_blkdev_queue的读写段(请求事件的写入数据等)加载到vring
3.更新avail的ring,将head_free_desc写入ring
4.更新head_free_desc,(下一个空闲vring_desc的索引)
入队流程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
int     virtqueue_buffer_enqueue(struct virtqueue *vq, void *cookie,
                 struct uk_sglist *sg, __u16 read_bufs,
                 __u16 write_bufs)
{
    __u32 total_desc = 0;
    __u16 head_idx = 0, idx = 0;
    struct virtqueue_vring *vrq = NULL;

    UK_ASSERT(vq);

    vrq = to_virtqueue_vring(vq);  // vq的vring的地址
    total_desc = read_bufs + write_bufs;  // 总段数(描述符总数量)
   
    if (unlikely(total_desc < 1 || total_desc > vrq->vring.num)) {
        uk_pr_err("%"__PRIu32" invalid number of descriptor\n",
              total_desc);
        return -EINVAL;
    } else if (vrq->desc_avail < total_desc) {  // 如果现存描述符过少
        uk_pr_err("Available descriptor:%"__PRIu16", Requested descriptor:%"__PRIu32"\n",
              vrq->desc_avail, total_desc);
        return -ENOSPC;
    }
    /* Get the head of free descriptor */
    head_idx = vrq->head_free_desc;
    UK_ASSERT(cookie);
    /* Additional information to reconstruct the data buffer */
    vrq->vq_info[head_idx].cookie = cookie;
    vrq->vq_info[head_idx].desc_count = total_desc;

    /**
     * We separate the descriptor management to enqueue segment(s).
     */
    // 将read和write的所有buffer信息放入vring的vring_desc数组中
    idx = virtqueue_buffer_enqueue_segments(vrq, head_idx, sg,
            read_bufs, write_bufs);
    /* Metadata maintenance for the virtqueue */
    // 更新virtqueue vring
    vrq->head_free_desc = idx;
    vrq->desc_avail -= total_desc;

    uk_pr_debug("Old head:%d, new head:%d, total_desc:%d\n",
            head_idx, idx, total_desc);

    virtqueue_ring_update_avail(vrq, head_idx);  // 索引更新
    return vrq->desc_avail;
}

请求事件从virtqueue出队

1.将请求从virtqueue队列中出队
2.更新请求事件的结果(完成or未完成)
3.将请求加入virtio块队列的free_list中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
static int virtio_blkdev_queue_dequeue(struct uk_blkdev_queue *queue,
        struct uk_blkreq **req)
{
    int ret = 0;
    __u32 len;
    struct virtio_blkdev_request *response_req;

    UK_ASSERT(req);
    *req = NULL;

    ret = virtqueue_buffer_dequeue(queue->vq, (void **) &response_req, &len);
    if (ret < 0) {
        uk_pr_info("No data available in the queue\n");
        return 0;
    }

    /* We need at least one byte for the result status */
    if (unlikely(len < 1)) {
        uk_pr_err("Received invalid response size: %u\n", len);
        ret = -EINVAL;
        goto out;
    }

    *req = response_req->req;
    (*req)->result = -response_req->status;

out:
    uk_list_add(&response_req->free_list_head, &queue->free_list);
    return ret;
}
virtqueue_buffer_dequeue

1.获取请求在vring_desc描述符中的索引
2.读取要出队的请求
3.将请求在vring的描述符删除
4.返回现在vring可用的vring_desc描述符数量
usedring索引

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
int virtqueue_buffer_dequeue(struct virtqueue *vq, void **cookie, __u32 *len)
{
    struct virtqueue_vring *vrq = NULL;
    __u16 used_idx, head_idx;
    struct vring_used_elem *elem;

    UK_ASSERT(vq);
    UK_ASSERT(cookie);
    vrq = to_virtqueue_vring(vq); // virtqueue所在的vring

    /* No new descriptor since last dequeue operation */
    if (!virtqueue_hasdata(vq))
        return -ENOMSG;
    // 得到buffer中队尾在使用的描述符ring的索引
    used_idx = vrq->last_used_desc_idx++ & (vrq->vring.num - 1);
    elem = &vrq->vring.used->ring[used_idx];
    /**
     * We are reading from the used descriptor information updated by the
     * host.
     */
    rmb();
    head_idx = elem->id;  // buffer队列的队头索引
    if (len)
        *len = elem->len;
    // 获取vrq中的请求virtio_blkdev_request
    *cookie = vrq->vq_info[head_idx].cookie;
    // 将head_idx所在的vring.desc全部置为可用,并将head_idx赋给head_free_desc
    virtqueue_detach_desc(vrq, head_idx);
    vrq->vq_info[head_idx].cookie = NULL;  // 清除请求
    return (vrq->vring.num - vrq->desc_avail);
}

后端完成请求事件

该函数是unikraft块设备uk_blkdev成员finish_reqs的具体实现
1.将所有完成的请求事件从virtqueue中出队
2.将完成的请求事件标记为已完成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static int virtio_blkdev_complete_reqs(struct uk_blkdev *dev,
        struct uk_blkdev_queue *queue)
{
    struct uk_blkreq *req;
    int rc = 0;

    UK_ASSERT(dev);

    /* Queue interrupts have to be off when calling receive */
    UK_ASSERT(!(queue->intr_enabled & VTBLK_INTR_EN));

moretodo:
    // 将所有完成的请求出队
    for (;;) {
        rc = virtio_blkdev_queue_dequeue(queue, &req);
        if (unlikely(rc < 0)) {
            uk_pr_err("Failed to dequeue the request: %d\n", rc);
            goto err_exit;
        }

        if (!req)
            break;

        // 标记请求已经完成
        uk_blkreq_finished(req);
        if (req->cb)
            req->cb(req, req->cb_cookie);
    }

    /* Enable interrupt only when user had previously enabled it */
    if (queue->intr_enabled & VTBLK_INTR_USR_EN_MASK) {
        rc = virtqueue_intr_enable(queue->vq);
        if (rc == 1)
            goto moretodo;
    }

    return 0;

err_exit:
    return rc;
}

uk_blkdev ops的底层实现

virtio_blkdev_configure

充当uk_blkdev的ops操作中dev_configure的具体实现
为virtio块设备分配uk_blkdev_queue队列的内存空间,功能由virtio_blkdev_queues_alloc实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static int virtio_blkdev_configure(struct uk_blkdev *dev,
        const struct uk_blkdev_conf *conf)
{
    int rc = 0;
    struct virtio_blk_device *vbdev = NULL;

    UK_ASSERT(dev != NULL);
    UK_ASSERT(conf != NULL);

    vbdev = to_virtioblkdev(dev);
    rc = virtio_blkdev_queues_alloc(vbdev, conf);
    if (rc) {
        uk_pr_err("Failed to allocate the queues %d\n", rc);
        goto exit;
    }

    uk_pr_info(DRIVER_NAME": %"__PRIu16" configured\n", vbdev->uid);
exit:
    return rc;
}
virtio_blkdev_queues_alloc

为virtio块设备分配uk_blkdev_queue队列的内存空间,数量与virtqueue保持一致
1.查看现存的virtqueue数量
2.为virtio_blk_device分配uk_blkdev_queue队列(根据virtqueue的数量)的内存空间
3.为每个队列赋予最多可用的描述符数量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
static int virtio_blkdev_queues_alloc(struct virtio_blk_device *vbdev,
                    const struct uk_blkdev_conf *conf)
{
    int rc = 0;
    uint16_t i = 0;
    int vq_avail = 0;
    __u16 qdesc_size[conf->nb_queues];

    if (conf->nb_queues > vbdev->max_vqueue_pairs) {
        uk_pr_err("Queue number not supported: %"__PRIu16"\n",
                conf->nb_queues);
        return -ENOTSUP;
    }

    vbdev->nb_queues = conf->nb_queues;
    // 查看现存的virtqueue队列的数量,调用vdev->cops->vqs_find
    vq_avail = virtio_find_vqs(vbdev->vdev, conf->nb_queues, qdesc_size);
    if (unlikely(vq_avail != conf->nb_queues)) {
        uk_pr_err("Expected: %d queues, Found: %d queues\n",
                conf->nb_queues, vq_avail);
        rc = -ENOMEM;
        goto exit;
    }

    vbdev->qs = uk_calloc(a, conf->nb_queues, sizeof(*vbdev->qs));
    if (unlikely(vbdev->qs == NULL)) {
        uk_pr_err("Failed to allocate memory for queue management\n");
        rc = -ENOMEM;
        goto exit;
    }

    // 为每个uk_blkdev_queue赋值max_nb_desc
    for (i = 0; i < conf->nb_queues; ++i)
        vbdev->qs[i].max_nb_desc = qdesc_size[i];

exit:
    return rc;
}

virtio_blkdev_queue_setup

充当uk_blkdev的ops操作中queue_configure的具体实现
设置virtio_blkdev的uk_blkdev_queue队列的成员
1.为uk_blkdev_queue的成员指定赋值
2.为uk_blkdev_queue分配扇区链表
3.设置virtqueue的成员

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
static struct uk_blkdev_queue *virtio_blkdev_queue_setup(struct uk_blkdev *dev,
        uint16_t queue_id,
        uint16_t nb_desc,
        const struct uk_blkdev_queue_conf *queue_conf)
{
    struct virtio_blk_device *vbdev;
    int rc = 0;
    struct uk_blkdev_queue *queue;

    UK_ASSERT(dev != NULL);
    UK_ASSERT(queue_conf != NULL);

    // 得到dev所在的virtio_blkdev
    vbdev = to_virtioblkdev(dev);
    if (unlikely(queue_id >= vbdev->nb_queues)) {
        uk_pr_err("Invalid queue_id %"__PRIu16"\n", queue_id);
        rc = -EINVAL;
        goto err_exit;
    }

    queue = &vbdev->qs[queue_id];  // 要setup的queue
    queue->a = queue_conf->a;

    /* Init sglist */
    queue->sgsegs = uk_malloc(queue->a,
            vbdev->max_segments * sizeof(*queue->sgsegs));
    if (unlikely(!queue->sgsegs)) {
        rc = -ENOMEM;
        goto err_exit;
    }

    uk_sglist_init(&queue->sg, vbdev->max_segments,
            queue->sgsegs);
    queue->vbd = vbdev;
    queue->nb_desc = nb_desc;
    queue->lqueue_id = queue_id;
    UK_INIT_LIST_HEAD(&queue->free_list);

    /* Setup the virtqueue with the descriptor */
    rc = virtio_blkdev_vqueue_setup(queue, nb_desc);
    if (rc < 0) {
        uk_pr_err("Failed to set up virtqueue %"__PRIu16": %d\n",
              queue_id, rc);
        goto setup_err;
    }

exit:
    return queue;
setup_err:
    // 无法创建就释放
    uk_free(queue->a, queue->sgsegs);
err_exit:
    queue = ERR2PTR(rc);
    goto exit;
}
virtio_blkdev_vqueue_setup

创建virtio_blkdev的一个virtqueue队列
1.确定要创建的virtqueue的描述符的数量
2.创建virtqueue

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
static int virtio_blkdev_vqueue_setup(struct uk_blkdev_queue *queue,
        uint16_t nr_desc)
{
    uint16_t max_desc;
    struct virtqueue *vq;

    UK_ASSERT(queue);
    max_desc = queue->max_nb_desc;
    if (unlikely(max_desc < nr_desc)) {
        uk_pr_err("Max desc: %"__PRIu16" Requested desc:%"__PRIu16"\n",
              max_desc, nr_desc);
        return -ENOBUFS;
    }

    nr_desc = (nr_desc) ? nr_desc : max_desc;
    uk_pr_debug("Configuring the %d descriptors\n", nr_desc);

    /* Check if the descriptor is a power of 2 */
    if (unlikely(nr_desc & (nr_desc - 1))) {
        uk_pr_err("Expected descriptor count as a power 2\n");
        return -EINVAL;
    }

    // 对vdev->cops->vq_setup的一层抽象
    vq = virtio_vqueue_setup(queue->vbd->vdev, queue->lqueue_id, nr_desc,
            virtio_blkdev_recv_done, a);
    if (unlikely(PTRISERR(vq))) {
        uk_pr_err("Failed to set up virtqueue %"__PRIu16"\n",
              queue->lqueue_id);
        return PTR2ERR(vq);
    }

    queue->vq = vq;
    vq->priv = queue;

    return 0;
}

virtio_blkdev_recv_done

virtqueue将事件从后端发送至前端(前端接收)
在块设备通信中,充当virtqueue的vq_callback回调函数的实现接口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
static int virtio_blkdev_recv_done(struct virtqueue *vq, void *priv)
{
    struct uk_blkdev_queue *queue = NULL;

    UK_ASSERT(vq && priv);

    queue = (struct uk_blkdev_queue *) priv;

    /* Disable the interrupt for the ring */
    virtqueue_intr_disable(vq);
    queue->intr_enabled &= ~(VTBLK_INTR_EN);

// 将virtqueue的事件转发给用户
    uk_blkdev_drv_queue_event(&queue->vbd->blkdev, queue->lqueue_id);

    return 1;
}

virtio_blkdev_start

充当uk_blkdev的ops操作中dev_start的具体实现
标记virtio配置完成,可以与后端开始通信和数据传输

1
2
3
4
5
6
7
8
9
10
11
12
13
static int virtio_blkdev_start(struct uk_blkdev *dev)
{
    struct virtio_blk_device *d;

    UK_ASSERT(dev != NULL);

    d = to_virtioblkdev(dev);
    // virtio_dev状态更新为virtio的配置初始化完成
    virtio_dev_drv_up(d->vdev);

    uk_pr_info(DRIVER_NAME": %"__PRIu16" started\n", d->uid);
    return 0;
}

virtio_blkdev_stop

充当uk_blkdev的ops操作中dev_stop的具体实现
停止virtio块设备的通信

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
static int virtio_blkdev_stop(struct uk_blkdev *dev)
{
    struct virtio_blk_device *d;
    uint16_t q_id;
    int rc = 0;

    UK_ASSERT(dev != NULL);
    d = to_virtioblkdev(dev);
    // 遍历vbd所有uk_blkdev_queue,其对应的virtqueue不能有请求
    for (q_id = 0; q_id < d->nb_queues; ++q_id) {
        if (virtqueue_hasdata(d->qs[q_id].vq)) {
            uk_pr_err("Queue:%"__PRIu16" has unconsumed responses\n",
                    q_id);
            return -EBUSY;
        }
    }
   
    // 重建virtio块设备,调用vdev->cops->device_reset
    rc = virtio_dev_reset(d->vdev);
    if (rc) {
        uk_pr_info(DRIVER_NAME":%"__PRIu16" stopped", d->uid);
        goto out;
    }

    uk_pr_warn(DRIVER_NAME":%"__PRIu16" Start is not allowed!!!", d->uid);

out:
    return rc;
}

virtio_blkdev_queue_release

充当uk_blkdev的ops操作中queue_unconfigure的具体实现
释放uk_blkdev_queue队列的成员
1.释放所有在queue->free_list中的请求(此类请求任务可能已经完成)
2.释放sglist扇区链表的内存空间
3.释放virtqueue队列的成员

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
static int virtio_blkdev_queue_release(struct uk_blkdev *dev,
        struct uk_blkdev_queue *queue)
{
    struct virtio_blk_device *vbdev;
    int rc = 0;

    UK_ASSERT(dev != NULL);
    vbdev = to_virtioblkdev(dev);

    // 释放所有在queue->free_list中的请求(此类请求任务可能已经完成)
    virtio_blkdev_queue_cleanup_requests(queue);
    // 释放sglist扇区链表
    uk_free(queue->a, queue->sgsegs);
    // 释放virtqueue,调用vdev->cops->vq_release
    virtio_vqueue_release(vbdev->vdev, queue->vq, queue->a);

    return rc;
}

virtio_blkdev_unconfigure

充当uk_blkdev的ops操作中dev_unconfigure的具体实现
释放virtio块设备中的uk_blkdev_queue的队列的内存空间

1
2
3
4
5
6
7
8
9
10
static int virtio_blkdev_unconfigure(struct uk_blkdev *dev)
{
    struct virtio_blk_device *d;

    UK_ASSERT(dev != NULL);
    d = to_virtioblkdev(dev);
    uk_free(a, d->qs);

    return 0;
}

驱动创建virtio块设备

在块设备io通信中,virtio_blk_drv_init为virtio驱动的add_dev成员
1.分配virtio块设备的内存空间
2.设置virtio块设备的成员
3.注册uk_blkdev块设备
4.设置驱动程序支持的功能
5.进行virtio块设备的特征协商

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
static int virtio_blk_add_dev(struct virtio_dev *vdev)
{
    struct virtio_blk_device *vbdev;
    int rc = 0;

    UK_ASSERT(vdev != NULL);

    vbdev = uk_calloc(a, 1, sizeof(*vbdev));
    if (!vbdev)
        return -ENOMEM;

    vbdev->vdev = vdev;
    vbdev->blkdev.finish_reqs = virtio_blkdev_complete_reqs;
    vbdev->blkdev.submit_one = virtio_blkdev_submit_request;
    vbdev->blkdev.dev_ops = &virtio_blkdev_ops;

    rc = uk_blkdev_drv_register(&vbdev->blkdev, a, drv_name);
    if (rc < 0) {
        uk_pr_err("Failed to register virtio_blk device: %d\n", rc);
        goto err_out;
    }

    vbdev->uid = rc;
    // 设置驱动程序支持的功能
    virtio_blkdev_feature_set(vbdev);
    // 进行特征协商
    rc = virtio_blkdev_feature_negotiate(vbdev);
    if (rc) {
        uk_pr_err("Failed to negotiate the device feature %d\n", rc);
        goto err_negotiate_feature;
    }

    uk_pr_info("Virtio-blk device registered with libukblkdev\n");

out:
    return rc;
err_negotiate_feature:
    virtio_dev_status_update(vbdev->vdev, VIRTIO_CONFIG_STATUS_FAIL);
err_out:
    uk_free(a, vbdev);
    goto out;
}

virtio_blkdev_feature_negotiate

virtio块设备的特征协商

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
static int virtio_blkdev_feature_negotiate(struct virtio_blk_device *vbdev)
{
    struct uk_blkdev_cap *cap;
    __u64 host_features = 0;
    __sector sectors;
    __sector ssize;
    __u16 num_queues;
    __u32 max_segments;
    __u32 max_size_segment;
    int rc = 0;

    UK_ASSERT(vbdev);
    cap = &vbdev->blkdev.capabilities;
    // 获得host的features,调用vdev->cops->features_get
    host_features = virtio_feature_get(vbdev->vdev);

    /* Get size of device */
    // 1. 得到设备中sectors的数量,调用vdev->cops->config_get
    rc = virtio_config_get(vbdev->vdev,
            __offsetof(struct virtio_blk_config, capacity),
            &sectors,
            sizeof(sectors),
            1);
    if (unlikely(rc)) {
        uk_pr_err("Failed to get nb of sectors from device %d\n", rc);
        goto exit;
    }
    // 2. 得到设备中sector的size信息
    // 如果没有sectors的size信息,设为默认值
    if (!VIRTIO_FEATURE_HAS(host_features, VIRTIO_BLK_F_BLK_SIZE)) {
        ssize = DEFAULT_SECTOR_SIZE;
    } else {
        rc = virtio_config_get(vbdev->vdev,
                __offsetof(struct virtio_blk_config, blk_size),
                &ssize,
                sizeof(ssize),
                1);
        if (unlikely(rc)) {
            uk_pr_err("Failed to get ssize from the device %d\n",
                    rc);
            goto exit;
        }
    }

    /* If the device does not support multi-queues,
     * we will use only one queue.
     */
    // 3. 得到设备中virtqueue数量
    if (VIRTIO_FEATURE_HAS(host_features, VIRTIO_BLK_F_MQ)) {
        rc = virtio_config_get(vbdev->vdev,
                    __offsetof(struct virtio_blk_config,
                            num_queues),
                    &num_queues,
                    sizeof(num_queues),
                    1);
        if (unlikely(rc)) {
            uk_pr_err("Failed to read max-queues\n");
            goto exit;
        }
    } else
        num_queues = 1;

    // 4. 得到设备中可承受的最大段数
    if (VIRTIO_FEATURE_HAS(host_features, VIRTIO_BLK_F_SEG_MAX)) {
        rc = virtio_config_get(vbdev->vdev,
            __offsetof(struct virtio_blk_config, seg_max),
            &max_segments,
            sizeof(max_segments),
            1);
        if (unlikely(rc)) {
            uk_pr_err("Failed to get maximum nb of segments\n");
            goto exit;
        }
    } else
        max_segments = 1;

    /* We need extra sg elements for head (header) and tail (status). */
    max_segments += 2;

    // 5. 得到设备中段可用的最大size
    if (VIRTIO_FEATURE_HAS(host_features, VIRTIO_BLK_F_SIZE_MAX)) {
        rc = virtio_config_get(vbdev->vdev,
            __offsetof(struct virtio_blk_config, size_max),
            &max_size_segment,
            sizeof(max_size_segment),
            1);
        if (unlikely(rc)) {
            uk_pr_err("Failed to get size max from device %d\n",
                    rc);
            goto exit;
        }
    } else
        max_size_segment = __PAGE_SIZE;

    cap->ssize = ssize;
    cap->sectors = sectors;
    cap->ioalign = sizeof(void *);
    cap->mode = (VIRTIO_FEATURE_HAS(
            host_features, VIRTIO_BLK_F_RO)) ? O_RDONLY : O_RDWR;
    cap->max_sectors_per_req =
            max_size_segment / ssize * (max_segments - 2);

    vbdev->max_vqueue_pairs = num_queues;
    vbdev->max_segments = max_segments;
    vbdev->max_size_segment = max_size_segment;
    vbdev->writeback = VIRTIO_FEATURE_HAS(host_features,
                VIRTIO_BLK_F_FLUSH);

    /**
     * Mask out features supported by both driver and device.
     */
    vbdev->vdev->features &= host_features;
    // 设置virtio_dev的features,调用vdev->cops->features_set
    virtio_feature_set(vbdev->vdev, vbdev->vdev->features);

exit:
    return rc;
}

与PCI的交互

上述函数中有很多功能的底层实现均调用了virtio_dev的配置操作*ops,结构如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
struct virtio_config_ops {
    /** Resetting the device */
    void (*device_reset)(struct virtio_dev *vdev);
    /** Set configuration option */
    int (*config_set)(struct virtio_dev *vdev, __u16 offset,
              const void *buf, __u32 len);
    /** Get configuration option */
    int (*config_get)(struct virtio_dev *vdev, __u16 offset, void *buf,
              __u32 len, __u8 type_len);
    /** Get the feature */
    __u64 (*features_get)(struct virtio_dev *vdev);
    /** Set the feature */
    void (*features_set)(struct virtio_dev *vdev, __u64 features);
    /** Get and Set Status */
    __u8 (*status_get)(struct virtio_dev *vdev);
    void (*status_set)(struct virtio_dev *vdev, __u8 status);
    /** Find the virtqueue */
    int (*vqs_find)(struct virtio_dev *vdev, __u16 num_vq, __u16 *vq_size);
    /** Setup the virtqueue */
    struct virtqueue *(*vq_setup)(struct virtio_dev *vdev, __u16 num_desc,
                      __u16 queue_id,
                      virtqueue_callback_t callback,
                      struct uk_alloc *a);
    void (*vq_release)(struct virtio_dev *vdev, struct virtqueue *vq,
                struct uk_alloc *a);
};

blk本身也是一个pci设备,因此blk将通过virtio dev与pci交互,virtio ops配置操作主要是由pci实现
pci交互
virtio_pci.c文件中vpci_legacy_ops负责了virtio dev的配置操作ops的实现

1
2
3
4
5
6
7
8
9
10
11
12
static struct virtio_config_ops vpci_legacy_ops = {
    .device_reset = vpci_legacy_pci_dev_reset,
    .config_get   = vpci_legacy_pci_config_get,
    .config_set   = vpci_legacy_pci_config_set,
    .features_get = vpci_legacy_pci_features_get,
    .features_set = vpci_legacy_pci_features_set,
    .status_get   = vpci_legacy_pci_status_get,
    .status_set   = vpci_legacy_pci_status_set,
    .vqs_find     = vpci_legacy_pci_vq_find,
    .vq_setup     = vpci_legacy_vq_setup,
    .vq_release   = vpci_legacy_vq_release,
};

同时,在virtio_mmio.c文件中virtio_mmio_config_ops也有一个virtio dev的配置操作ops的实现

1
2
3
4
5
6
7
8
9
10
11
static struct virtio_config_ops virtio_mmio_config_ops = {
    .config_get = vm_get,
    .config_set = vm_set,
    .status_get = vm_get_status,
    .status_set = vm_set_status,
    .device_reset   = vm_reset,
    .features_get   = vm_get_features,
    .features_set   = vm_set_features,
    .vqs_find   = vm_find_vqs,
    .vq_setup   = vm_setup_vq,
};