2
0
Эх сурвалжийг харах

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio, vhost: fixes, features

IOTLB support in vhost-user.
A bunch of fixes all over the place.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 02 Jun 2017 17:33:25 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  spec/vhost-user spec: Add IOMMU support
  vhost-user: add slave-req-fd support
  vhost-user: add vhost_user to hold the chr
  vhost: rework IOTLB messaging
  vhost: propagate errors in vhost_device_iotlb_miss()
  virtio-serial: fix segfault on disconnect
  virtio: add virtqueue_alloc_element tracepoint
  virtio-serial-bus: Unset hotplug handler when unrealize

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Peter Maydell 8 жил өмнө
parent
commit
c6e84fbd44

+ 114 - 2
docs/specs/vhost-user.txt

@@ -97,6 +97,25 @@ Depending on the request type, payload can be:
    log offset: offset from start of supplied file descriptor
        where logging starts (i.e. where guest address 0 would be logged)
 
+ * An IOTLB message
+   ---------------------------------------------------------
+   | iova | size | user address | permissions flags | type |
+   ---------------------------------------------------------
+
+   IOVA: a 64-bit I/O virtual address programmed by the guest
+   Size: a 64-bit size
+   User address: a 64-bit user address
+   Permissions: a 8-bit value:
+    - 0: No access
+    - 1: Read access
+    - 2: Write access
+    - 3: Read/Write access
+   Type: a 8-bit IOTLB message type:
+    - 1: IOTLB miss
+    - 2: IOTLB update
+    - 3: IOTLB invalidate
+    - 4: IOTLB access fail
+
 In QEMU the vhost-user message is implemented with the following struct:
 
 typedef struct VhostUserMsg {
@@ -109,6 +128,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
         VhostUserLog log;
+        struct vhost_iotlb_msg iotlb;
     };
 } QEMU_PACKED VhostUserMsg;
 
@@ -139,6 +159,7 @@ in the ancillary data:
  * VHOST_USER_SET_VRING_KICK
  * VHOST_USER_SET_VRING_CALL
  * VHOST_USER_SET_VRING_ERR
+ * VHOST_USER_SET_SLAVE_REQ_FD
 
 If Master is unable to send the full message or receives a wrong reply it will
 close the connection. An optional reconnection mechanism can be implemented.
@@ -252,6 +273,50 @@ Once the source has finished migration, rings will be stopped by
 the source. No further update must be done before rings are
 restarted.
 
+IOMMU support
+-------------
+
+When the VIRTIO_F_IOMMU_PLATFORM feature has been negotiated, the master
+sends IOTLB entries update & invalidation by sending VHOST_USER_IOTLB_MSG
+requests to the slave with a struct vhost_iotlb_msg as payload. For update
+events, the iotlb payload has to be filled with the update message type (2),
+the I/O virtual address, the size, the user virtual address, and the
+permissions flags. Addresses and size must be within vhost memory regions set
+via the VHOST_USER_SET_MEM_TABLE request. For invalidation events, the iotlb
+payload has to be filled with the invalidation message type (3), the I/O virtual
+address and the size. On success, the slave is expected to reply with a zero
+payload, non-zero otherwise.
+
+The slave relies on the slave communcation channel (see "Slave communication"
+section below) to send IOTLB miss and access failure events, by sending
+VHOST_USER_SLAVE_IOTLB_MSG requests to the master with a struct vhost_iotlb_msg
+as payload. For miss events, the iotlb payload has to be filled with the miss
+message type (1), the I/O virtual address and the permissions flags. For access
+failure event, the iotlb payload has to be filled with the access failure
+message type (4), the I/O virtual address and the permissions flags.
+For synchronization purpose, the slave may rely on the reply-ack feature,
+so the master may send a reply when operation is completed if the reply-ack
+feature is negotiated and slaves requests a reply. For miss events, completed
+operation means either master sent an update message containing the IOTLB entry
+containing requested address and permission, or master sent nothing if the IOTLB
+miss message is invalid (invalid IOVA or permission).
+
+The master isn't expected to take the initiative to send IOTLB update messages,
+as the slave sends IOTLB miss messages for the guest virtual memory areas it
+needs to access.
+
+Slave communication
+-------------------
+
+An optional communication channel is provided if the slave declares
+VHOST_USER_PROTOCOL_F_SLAVE_REQ protocol feature, to allow the slave to make
+requests to the master.
+
+The fd is provided via VHOST_USER_SET_SLAVE_REQ_FD ancillary data.
+
+A slave may then send VHOST_USER_SLAVE_* messages to the master
+using this fd communication channel.
+
 Protocol features
 -----------------
 
@@ -260,9 +325,10 @@ Protocol features
 #define VHOST_USER_PROTOCOL_F_RARP           2
 #define VHOST_USER_PROTOCOL_F_REPLY_ACK      3
 #define VHOST_USER_PROTOCOL_F_MTU            4
+#define VHOST_USER_PROTOCOL_F_SLAVE_REQ      5
 
-Message types
--------------
+Master message types
+--------------------
 
  * VHOST_USER_GET_FEATURES
 
@@ -486,6 +552,52 @@ Message types
       If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
       with zero in case the specified MTU is valid, or non-zero otherwise.
 
+ * VHOST_USER_SET_SLAVE_REQ_FD
+
+      Id: 21
+      Equivalent ioctl: N/A
+      Master payload: N/A
+
+      Set the socket file descriptor for slave initiated requests. It is passed
+      in the ancillary data.
+      This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES
+      has been negotiated, and protocol feature bit VHOST_USER_PROTOCOL_F_SLAVE_REQ
+      bit is present in VHOST_USER_GET_PROTOCOL_FEATURES.
+      If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond
+      with zero for success, non-zero otherwise.
+
+ * VHOST_USER_IOTLB_MSG
+
+      Id: 22
+      Equivalent ioctl: N/A (equivalent to VHOST_IOTLB_MSG message type)
+      Master payload: struct vhost_iotlb_msg
+      Slave payload: u64
+
+      Send IOTLB messages with struct vhost_iotlb_msg as payload.
+      Master sends such requests to update and invalidate entries in the device
+      IOTLB. The slave has to acknowledge the request with sending zero as u64
+      payload for success, non-zero otherwise.
+      This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature
+      has been successfully negotiated.
+
+Slave message types
+-------------------
+
+ * VHOST_USER_SLAVE_IOTLB_MSG
+
+      Id: 1
+      Equivalent ioctl: N/A (equivalent to VHOST_IOTLB_MSG message type)
+      Slave payload: struct vhost_iotlb_msg
+      Master payload: N/A
+
+      Send IOTLB messages with struct vhost_iotlb_msg as payload.
+      Slave sends such requests to notify of an IOTLB miss, or an IOTLB
+      access failure. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated,
+      and slave set the VHOST_USER_NEED_REPLY flag, master must respond with
+      zero when operation is successfully completed, or non-zero otherwise.
+      This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature
+      has been successfully negotiated.
+
 VHOST_USER_PROTOCOL_F_REPLY_ACK:
 -------------------------------
 The original vhost-user specification only demands replies for certain

+ 6 - 0
hw/char/virtio-serial-bus.c

@@ -186,6 +186,9 @@ static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
                                   port->elem->out_sg[i].iov_base
                                   + port->iov_offset,
                                   buf_size);
+            if (!port->elem) { /* bail if we got disconnected */
+                return;
+            }
             if (port->throttled) {
                 port->iov_idx = i;
                 if (ret > 0) {
@@ -1121,6 +1124,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
         timer_free(vser->post_load->timer);
         g_free(vser->post_load);
     }
+
+    qbus_set_hotplug_handler(BUS(&vser->bus), NULL, errp);
+
     virtio_cleanup(vdev);
 }
 

+ 1 - 0
hw/net/vhost_net.c

@@ -77,6 +77,7 @@ static const int user_feature_bits[] = {
     VIRTIO_NET_F_HOST_UFO,
     VIRTIO_NET_F_MRG_RXBUF,
     VIRTIO_NET_F_MTU,
+    VIRTIO_F_IOMMU_PLATFORM,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,

+ 1 - 0
hw/virtio/trace-events

@@ -1,6 +1,7 @@
 # See docs/tracing.txt for syntax documentation.
 
 # hw/virtio/virtio.c
+virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
 virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
 virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
 virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"

+ 73 - 57
hw/virtio/vhost-backend.c

@@ -192,7 +192,6 @@ static void vhost_kernel_iotlb_read(void *opaque)
     ssize_t len;
 
     while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) {
-        struct vhost_iotlb_msg *imsg = &msg.iotlb;
         if (len < sizeof msg) {
             error_report("Wrong vhost message len: %d", (int)len);
             break;
@@ -201,70 +200,21 @@ static void vhost_kernel_iotlb_read(void *opaque)
             error_report("Unknown vhost iotlb message type");
             break;
         }
-        switch (imsg->type) {
-        case VHOST_IOTLB_MISS:
-            vhost_device_iotlb_miss(dev, imsg->iova,
-                                    imsg->perm != VHOST_ACCESS_RO);
-            break;
-        case VHOST_IOTLB_UPDATE:
-        case VHOST_IOTLB_INVALIDATE:
-            error_report("Unexpected IOTLB message type");
-            break;
-        case VHOST_IOTLB_ACCESS_FAIL:
-            /* FIXME: report device iotlb error */
-            break;
-        default:
-            break;
-        }
-    }
-}
 
-static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev,
-                                            uint64_t iova, uint64_t uaddr,
-                                            uint64_t len,
-                                            IOMMUAccessFlags perm)
-{
-    struct vhost_msg msg;
-    msg.type = VHOST_IOTLB_MSG;
-    msg.iotlb.iova =  iova;
-    msg.iotlb.uaddr = uaddr;
-    msg.iotlb.size = len;
-    msg.iotlb.type = VHOST_IOTLB_UPDATE;
-
-    switch (perm) {
-    case IOMMU_RO:
-        msg.iotlb.perm = VHOST_ACCESS_RO;
-        break;
-    case IOMMU_WO:
-        msg.iotlb.perm = VHOST_ACCESS_WO;
-        break;
-    case IOMMU_RW:
-        msg.iotlb.perm = VHOST_ACCESS_RW;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
-        error_report("Fail to update device iotlb");
-        return -EFAULT;
+        vhost_backend_handle_iotlb_msg(dev, &msg.iotlb);
     }
-
-    return 0;
 }
 
-static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev,
-                                                uint64_t iova, uint64_t len)
+static int vhost_kernel_send_device_iotlb_msg(struct vhost_dev *dev,
+                                              struct vhost_iotlb_msg *imsg)
 {
     struct vhost_msg msg;
 
     msg.type = VHOST_IOTLB_MSG;
-    msg.iotlb.iova = iova;
-    msg.iotlb.size = len;
-    msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+    msg.iotlb = *imsg;
 
     if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) {
-        error_report("Fail to invalidate device iotlb");
+        error_report("Fail to update device iotlb");
         return -EFAULT;
     }
 
@@ -311,8 +261,7 @@ static const VhostOps kernel_ops = {
         .vhost_vsock_set_running = vhost_kernel_vsock_set_running,
 #endif /* CONFIG_VHOST_VSOCK */
         .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback,
-        .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb,
-        .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb,
+        .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg,
 };
 
 int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
@@ -333,3 +282,70 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
 
     return r;
 }
+
+int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
+                                             uint64_t iova, uint64_t uaddr,
+                                             uint64_t len,
+                                             IOMMUAccessFlags perm)
+{
+    struct vhost_iotlb_msg imsg;
+
+    imsg.iova =  iova;
+    imsg.uaddr = uaddr;
+    imsg.size = len;
+    imsg.type = VHOST_IOTLB_UPDATE;
+
+    switch (perm) {
+    case IOMMU_RO:
+        imsg.perm = VHOST_ACCESS_RO;
+        break;
+    case IOMMU_WO:
+        imsg.perm = VHOST_ACCESS_WO;
+        break;
+    case IOMMU_RW:
+        imsg.perm = VHOST_ACCESS_RW;
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+}
+
+int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
+                                                 uint64_t iova, uint64_t len)
+{
+    struct vhost_iotlb_msg imsg;
+
+    imsg.iova = iova;
+    imsg.size = len;
+    imsg.type = VHOST_IOTLB_INVALIDATE;
+
+    return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg);
+}
+
+int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
+                                          struct vhost_iotlb_msg *imsg)
+{
+    int ret = 0;
+
+    switch (imsg->type) {
+    case VHOST_IOTLB_MISS:
+        ret = vhost_device_iotlb_miss(dev, imsg->iova,
+                                      imsg->perm != VHOST_ACCESS_RO);
+        break;
+    case VHOST_IOTLB_ACCESS_FAIL:
+        /* FIXME: report device iotlb error */
+        error_report("Access failure IOTLB message type not supported");
+        ret = -ENOTSUP;
+        break;
+    case VHOST_IOTLB_UPDATE:
+    case VHOST_IOTLB_INVALIDATE:
+    default:
+        error_report("Unexpected IOTLB message type");
+        ret = -EINVAL;
+        break;
+    }
+
+    return ret;
+}

+ 188 - 6
hw/virtio/vhost-user.c

@@ -32,6 +32,7 @@ enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_RARP = 2,
     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
+    VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
 
     VHOST_USER_PROTOCOL_F_MAX
 };
@@ -60,9 +61,17 @@ typedef enum VhostUserRequest {
     VHOST_USER_SET_VRING_ENABLE = 18,
     VHOST_USER_SEND_RARP = 19,
     VHOST_USER_NET_SET_MTU = 20,
+    VHOST_USER_SET_SLAVE_REQ_FD = 21,
+    VHOST_USER_IOTLB_MSG = 22,
     VHOST_USER_MAX
 } VhostUserRequest;
 
+typedef enum VhostUserSlaveRequest {
+    VHOST_USER_SLAVE_NONE = 0,
+    VHOST_USER_SLAVE_IOTLB_MSG = 1,
+    VHOST_USER_SLAVE_MAX
+}  VhostUserSlaveRequest;
+
 typedef struct VhostUserMemoryRegion {
     uint64_t guest_phys_addr;
     uint64_t memory_size;
@@ -97,6 +106,7 @@ typedef struct VhostUserMsg {
         struct vhost_vring_addr addr;
         VhostUserMemory memory;
         VhostUserLog log;
+        struct vhost_iotlb_msg iotlb;
     } payload;
 } QEMU_PACKED VhostUserMsg;
 
@@ -110,6 +120,11 @@ static VhostUserMsg m __attribute__ ((unused));
 /* The version of the protocol we support */
 #define VHOST_USER_VERSION    (0x1)
 
+struct vhost_user {
+    CharBackend *chr;
+    int slave_fd;
+};
+
 static bool ioeventfd_enabled(void)
 {
     return kvm_enabled() && kvm_eventfds_enabled();
@@ -117,7 +132,8 @@ static bool ioeventfd_enabled(void)
 
 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 {
-    CharBackend *chr = dev->opaque;
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->chr;
     uint8_t *p = (uint8_t *) msg;
     int r, size = VHOST_USER_HDR_SIZE;
 
@@ -202,7 +218,8 @@ static bool vhost_user_one_time_request(VhostUserRequest request)
 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                             int *fds, int fd_num)
 {
-    CharBackend *chr = dev->opaque;
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->chr;
     int ret, size = VHOST_USER_HDR_SIZE + msg->size;
 
     /*
@@ -572,14 +589,130 @@ static int vhost_user_reset_device(struct vhost_dev *dev)
     return 0;
 }
 
+static void slave_read(void *opaque)
+{
+    struct vhost_dev *dev = opaque;
+    struct vhost_user *u = dev->opaque;
+    VhostUserMsg msg = { 0, };
+    int size, ret = 0;
+
+    /* Read header */
+    size = read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE);
+    if (size != VHOST_USER_HDR_SIZE) {
+        error_report("Failed to read from slave.");
+        goto err;
+    }
+
+    if (msg.size > VHOST_USER_PAYLOAD_SIZE) {
+        error_report("Failed to read msg header."
+                " Size %d exceeds the maximum %zu.", msg.size,
+                VHOST_USER_PAYLOAD_SIZE);
+        goto err;
+    }
+
+    /* Read payload */
+    size = read(u->slave_fd, &msg.payload, msg.size);
+    if (size != msg.size) {
+        error_report("Failed to read payload from slave.");
+        goto err;
+    }
+
+    switch (msg.request) {
+    case VHOST_USER_SLAVE_IOTLB_MSG:
+        ret = vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb);
+        break;
+    default:
+        error_report("Received unexpected msg type.");
+        ret = -EINVAL;
+    }
+
+    /*
+     * REPLY_ACK feature handling. Other reply types has to be managed
+     * directly in their request handlers.
+     */
+    if (msg.flags & VHOST_USER_NEED_REPLY_MASK) {
+        msg.flags &= ~VHOST_USER_NEED_REPLY_MASK;
+        msg.flags |= VHOST_USER_REPLY_MASK;
+
+        msg.payload.u64 = !!ret;
+        msg.size = sizeof(msg.payload.u64);
+
+        size = write(u->slave_fd, &msg, VHOST_USER_HDR_SIZE + msg.size);
+        if (size != VHOST_USER_HDR_SIZE + msg.size) {
+            error_report("Failed to send msg reply to slave.");
+            goto err;
+        }
+    }
+
+    return;
+
+err:
+    qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
+    close(u->slave_fd);
+    u->slave_fd = -1;
+    return;
+}
+
+static int vhost_setup_slave_channel(struct vhost_dev *dev)
+{
+    VhostUserMsg msg = {
+        .request = VHOST_USER_SET_SLAVE_REQ_FD,
+        .flags = VHOST_USER_VERSION,
+    };
+    struct vhost_user *u = dev->opaque;
+    int sv[2], ret = 0;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
+        return 0;
+    }
+
+    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+        error_report("socketpair() failed");
+        return -1;
+    }
+
+    u->slave_fd = sv[0];
+    qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
+
+    if (reply_supported) {
+        msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    ret = vhost_user_write(dev, &msg, &sv[1], 1);
+    if (ret) {
+        goto out;
+    }
+
+    if (reply_supported) {
+        ret = process_message_reply(dev, &msg);
+    }
+
+out:
+    close(sv[1]);
+    if (ret) {
+        qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
+        close(u->slave_fd);
+        u->slave_fd = -1;
+    }
+
+    return ret;
+}
+
 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
 {
-    uint64_t features;
+    uint64_t features, protocol_features;
+    struct vhost_user *u;
     int err;
 
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
 
-    dev->opaque = opaque;
+    u = g_new0(struct vhost_user, 1);
+    u->chr = opaque;
+    u->slave_fd = -1;
+    dev->opaque = u;
 
     err = vhost_user_get_features(dev, &features);
     if (err < 0) {
@@ -590,12 +723,13 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
 
         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
-                                 &features);
+                                 &protocol_features);
         if (err < 0) {
             return err;
         }
 
-        dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
+        dev->protocol_features =
+            protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
         if (err < 0) {
             return err;
@@ -609,6 +743,16 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
                 return err;
             }
         }
+
+        if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
+                !(virtio_has_feature(dev->protocol_features,
+                    VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
+                 virtio_has_feature(dev->protocol_features,
+                    VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
+            error_report("IOMMU support requires reply-ack and "
+                         "slave-req protocol features.");
+            return -1;
+        }
     }
 
     if (dev->migration_blocker == NULL &&
@@ -619,13 +763,26 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque)
                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
     }
 
+    err = vhost_setup_slave_channel(dev);
+    if (err < 0) {
+        return err;
+    }
+
     return 0;
 }
 
 static int vhost_user_cleanup(struct vhost_dev *dev)
 {
+    struct vhost_user *u;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
 
+    u = dev->opaque;
+    if (u->slave_fd >= 0) {
+        close(u->slave_fd);
+        u->slave_fd = -1;
+    }
+    g_free(u);
     dev->opaque = 0;
 
     return 0;
@@ -722,6 +879,29 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
     return 0;
 }
 
+static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
+                                            struct vhost_iotlb_msg *imsg)
+{
+    VhostUserMsg msg = {
+        .request = VHOST_USER_IOTLB_MSG,
+        .size = sizeof(msg.payload.iotlb),
+        .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
+        .payload.iotlb = *imsg,
+    };
+
+    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+        return -EFAULT;
+    }
+
+    return process_message_reply(dev, &msg);
+}
+
+
+static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
+{
+    /* No-op as the receive channel is not dedicated to IOTLB messages. */
+}
+
 const VhostOps user_ops = {
         .backend_type = VHOST_BACKEND_TYPE_USER,
         .vhost_backend_init = vhost_user_init,
@@ -746,4 +926,6 @@ const VhostOps user_ops = {
         .vhost_migration_done = vhost_user_migration_done,
         .vhost_backend_can_merge = vhost_user_can_merge,
         .vhost_net_set_mtu = vhost_user_net_set_mtu,
+        .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
+        .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
 };

+ 12 - 7
hw/virtio/vhost.c

@@ -724,8 +724,8 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
     struct vhost_dev *hdev = iommu->hdev;
     hwaddr iova = iotlb->iova + iommu->iommu_offset;
 
-    if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, iova,
-                                                       iotlb->addr_mask + 1)) {
+    if (vhost_backend_invalidate_device_iotlb(hdev, iova,
+                                              iotlb->addr_mask + 1)) {
         error_report("Fail to invalidate device iotlb");
     }
 }
@@ -971,18 +971,20 @@ static int vhost_memory_region_lookup(struct vhost_dev *hdev,
     return -EFAULT;
 }
 
-void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
+int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
 {
     IOMMUTLBEntry iotlb;
     uint64_t uaddr, len;
+    int ret = -EFAULT;
 
     rcu_read_lock();
 
     iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
                                           iova, write);
     if (iotlb.target_as != NULL) {
-        if (vhost_memory_region_lookup(dev, iotlb.translated_addr,
-                                       &uaddr, &len)) {
+        ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
+                                         &uaddr, &len);
+        if (ret) {
             error_report("Fail to lookup the translated address "
                          "%"PRIx64, iotlb.translated_addr);
             goto out;
@@ -991,14 +993,17 @@ void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
         len = MIN(iotlb.addr_mask + 1, len);
         iova = iova & ~iotlb.addr_mask;
 
-        if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr,
-                                                      len, iotlb.perm)) {
+        ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
+                                                len, iotlb.perm);
+        if (ret) {
             error_report("Fail to update device iotlb");
             goto out;
         }
     }
 out:
     rcu_read_unlock();
+
+    return ret;
 }
 
 static int vhost_virtqueue_start(struct vhost_dev *dev,

+ 1 - 0
hw/virtio/virtio.c

@@ -815,6 +815,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu
 
     assert(sz >= sizeof(VirtQueueElement));
     elem = g_malloc(out_sg_end);
+    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
     elem->out_num = out_num;
     elem->in_num = in_num;
     elem->in_addr = (void *)elem + in_addr_ofs;

+ 15 - 8
include/hw/virtio/vhost-backend.h

@@ -27,6 +27,7 @@ struct vhost_vring_file;
 struct vhost_vring_state;
 struct vhost_vring_addr;
 struct vhost_scsi_target;
+struct vhost_iotlb_msg;
 
 typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque);
 typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
@@ -81,12 +82,8 @@ typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
 typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
 typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev,
                                            int enabled);
-typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev,
-                                            uint64_t iova, uint64_t uaddr,
-                                            uint64_t len,
-                                            IOMMUAccessFlags perm);
-typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev,
-                                                uint64_t iova, uint64_t len);
+typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev,
+                                              struct vhost_iotlb_msg *imsg);
 
 typedef struct VhostOps {
     VhostBackendType backend_type;
@@ -120,8 +117,7 @@ typedef struct VhostOps {
     vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
     vhost_vsock_set_running_op vhost_vsock_set_running;
     vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
-    vhost_update_device_iotlb_op vhost_update_device_iotlb;
-    vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb;
+    vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg;
 } VhostOps;
 
 extern const VhostOps user_ops;
@@ -129,4 +125,15 @@ extern const VhostOps user_ops;
 int vhost_set_backend_type(struct vhost_dev *dev,
                            VhostBackendType backend_type);
 
+int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
+                                             uint64_t iova, uint64_t uaddr,
+                                             uint64_t len,
+                                             IOMMUAccessFlags perm);
+
+int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev,
+                                                 uint64_t iova, uint64_t len);
+
+int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev,
+                                          struct vhost_iotlb_msg *imsg);
+
 #endif /* VHOST_BACKEND_H */

+ 1 - 1
include/hw/virtio/vhost.h

@@ -105,5 +105,5 @@ bool vhost_has_free_slot(void);
 int vhost_net_set_backend(struct vhost_dev *hdev,
                           struct vhost_vring_file *file);
 
-void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
+int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
 #endif