vhost-shadow-virtqueue.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. /*
  2. * vhost shadow virtqueue
  3. *
  4. * SPDX-FileCopyrightText: Red Hat, Inc. 2021
  5. * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
  6. *
  7. * SPDX-License-Identifier: GPL-2.0-or-later
  8. */
  9. #include "qemu/osdep.h"
  10. #include "hw/virtio/vhost-shadow-virtqueue.h"
  11. #include "qemu/error-report.h"
  12. #include "qapi/error.h"
  13. #include "qemu/main-loop.h"
  14. #include "qemu/log.h"
  15. #include "qemu/memalign.h"
  16. #include "linux-headers/linux/vhost.h"
  17. /**
  18. * Validate the transport device features that both guests can use with the SVQ
  19. * and SVQs can use with the device.
  20. *
  21. * @dev_features: The features
  22. * @errp: Error pointer
  23. */
  24. bool vhost_svq_valid_features(uint64_t features, Error **errp)
  25. {
  26. bool ok = true;
  27. uint64_t svq_features = features;
  28. for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
  29. ++b) {
  30. switch (b) {
  31. case VIRTIO_F_ANY_LAYOUT:
  32. case VIRTIO_RING_F_EVENT_IDX:
  33. continue;
  34. case VIRTIO_F_ACCESS_PLATFORM:
  35. /* SVQ trust in the host's IOMMU to translate addresses */
  36. case VIRTIO_F_VERSION_1:
  37. /* SVQ trust that the guest vring is little endian */
  38. if (!(svq_features & BIT_ULL(b))) {
  39. svq_features |= BIT_ULL(b);
  40. ok = false;
  41. }
  42. continue;
  43. default:
  44. if (svq_features & BIT_ULL(b)) {
  45. svq_features &= ~BIT_ULL(b);
  46. ok = false;
  47. }
  48. }
  49. }
  50. if (!ok) {
  51. error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
  52. ", ok: 0x%"PRIx64, features, svq_features);
  53. }
  54. return ok;
  55. }
  56. /**
  57. * Number of descriptors that the SVQ can make available from the guest.
  58. *
  59. * @svq: The svq
  60. */
  61. uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
  62. {
  63. return svq->num_free;
  64. }
  65. /**
  66. * Translate addresses between the qemu's virtual address and the SVQ IOVA
  67. *
  68. * @svq: Shadow VirtQueue
  69. * @vaddr: Translated IOVA addresses
  70. * @iovec: Source qemu's VA addresses
  71. * @num: Length of iovec and minimum length of vaddr
  72. * @gpas: Descriptors' GPAs, if backed by guest memory
  73. */
  74. static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
  75. hwaddr *addrs, const struct iovec *iovec,
  76. size_t num, const hwaddr *gpas)
  77. {
  78. if (num == 0) {
  79. return true;
  80. }
  81. for (size_t i = 0; i < num; ++i) {
  82. Int128 needle_last, map_last;
  83. size_t off;
  84. const DMAMap *map;
  85. DMAMap needle;
  86. /* Check if the descriptor is backed by guest memory */
  87. if (gpas) {
  88. /* Search the GPA->IOVA tree */
  89. needle = (DMAMap) {
  90. .translated_addr = gpas[i],
  91. .size = iovec[i].iov_len,
  92. };
  93. map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle);
  94. } else {
  95. /* Search the IOVA->HVA tree */
  96. needle = (DMAMap) {
  97. .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
  98. .size = iovec[i].iov_len,
  99. };
  100. map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
  101. }
  102. /*
  103. * Map cannot be NULL since iova map contains all guest space and
  104. * qemu already has a physical address mapped
  105. */
  106. if (unlikely(!map)) {
  107. qemu_log_mask(LOG_GUEST_ERROR,
  108. "Invalid address 0x%"HWADDR_PRIx" given by guest",
  109. needle.translated_addr);
  110. return false;
  111. }
  112. off = needle.translated_addr - map->translated_addr;
  113. addrs[i] = map->iova + off;
  114. needle_last = int128_add(int128_make64(needle.translated_addr),
  115. int128_makes64(iovec[i].iov_len - 1));
  116. map_last = int128_make64(map->translated_addr + map->size);
  117. if (unlikely(int128_gt(needle_last, map_last))) {
  118. qemu_log_mask(LOG_GUEST_ERROR,
  119. "Guest buffer expands over iova range");
  120. return false;
  121. }
  122. }
  123. return true;
  124. }
  125. /**
  126. * Write descriptors to SVQ vring
  127. *
  128. * @svq: The shadow virtqueue
  129. * @sg: Cache for hwaddr
  130. * @iovec: The iovec from the guest
  131. * @num: iovec length
  132. * @addr: Descriptors' GPAs, if backed by guest memory
  133. * @more_descs: True if more descriptors come in the chain
  134. * @write: True if they are writeable descriptors
  135. *
  136. * Return true if success, false otherwise and print error.
  137. */
  138. static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
  139. const struct iovec *iovec, size_t num,
  140. const hwaddr *addr, bool more_descs,
  141. bool write)
  142. {
  143. uint16_t i = svq->free_head, last = svq->free_head;
  144. unsigned n;
  145. uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
  146. vring_desc_t *descs = svq->vring.desc;
  147. bool ok;
  148. if (num == 0) {
  149. return true;
  150. }
  151. ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr);
  152. if (unlikely(!ok)) {
  153. return false;
  154. }
  155. for (n = 0; n < num; n++) {
  156. if (more_descs || (n + 1 < num)) {
  157. descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
  158. descs[i].next = cpu_to_le16(svq->desc_next[i]);
  159. } else {
  160. descs[i].flags = flags;
  161. }
  162. descs[i].addr = cpu_to_le64(sg[n]);
  163. descs[i].len = cpu_to_le32(iovec[n].iov_len);
  164. last = i;
  165. i = svq->desc_next[i];
  166. }
  167. svq->free_head = svq->desc_next[last];
  168. return true;
  169. }
  170. static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
  171. const struct iovec *out_sg, size_t out_num,
  172. const hwaddr *out_addr,
  173. const struct iovec *in_sg, size_t in_num,
  174. const hwaddr *in_addr, unsigned *head)
  175. {
  176. unsigned avail_idx;
  177. vring_avail_t *avail = svq->vring.avail;
  178. bool ok;
  179. g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
  180. *head = svq->free_head;
  181. /* We need some descriptors here */
  182. if (unlikely(!out_num && !in_num)) {
  183. qemu_log_mask(LOG_GUEST_ERROR,
  184. "Guest provided element with no descriptors");
  185. return false;
  186. }
  187. ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr,
  188. in_num > 0, false);
  189. if (unlikely(!ok)) {
  190. return false;
  191. }
  192. ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false,
  193. true);
  194. if (unlikely(!ok)) {
  195. return false;
  196. }
  197. /*
  198. * Put the entry in the available array (but don't update avail->idx until
  199. * they do sync).
  200. */
  201. avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
  202. avail->ring[avail_idx] = cpu_to_le16(*head);
  203. svq->shadow_avail_idx++;
  204. /* Update the avail index after write the descriptor */
  205. smp_wmb();
  206. avail->idx = cpu_to_le16(svq->shadow_avail_idx);
  207. return true;
  208. }
  209. static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  210. {
  211. bool needs_kick;
  212. /*
  213. * We need to expose the available array entries before checking the used
  214. * flags
  215. */
  216. smp_mb();
  217. if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
  218. uint16_t avail_event = le16_to_cpu(
  219. *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
  220. needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
  221. } else {
  222. needs_kick =
  223. !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
  224. }
  225. if (!needs_kick) {
  226. return;
  227. }
  228. event_notifier_set(&svq->hdev_kick);
  229. }
  230. /**
  231. * Add an element to a SVQ.
  232. *
  233. * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  234. */
  235. int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
  236. size_t out_num, const hwaddr *out_addr,
  237. const struct iovec *in_sg, size_t in_num,
  238. const hwaddr *in_addr, VirtQueueElement *elem)
  239. {
  240. unsigned qemu_head;
  241. unsigned ndescs = in_num + out_num;
  242. bool ok;
  243. if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
  244. return -ENOSPC;
  245. }
  246. ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num,
  247. in_addr, &qemu_head);
  248. if (unlikely(!ok)) {
  249. return -EINVAL;
  250. }
  251. svq->num_free -= ndescs;
  252. svq->desc_state[qemu_head].elem = elem;
  253. svq->desc_state[qemu_head].ndescs = ndescs;
  254. vhost_svq_kick(svq);
  255. return 0;
  256. }
  257. /* Convenience wrapper to add a guest's element to SVQ */
  258. static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
  259. VirtQueueElement *elem)
  260. {
  261. return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr,
  262. elem->in_sg, elem->in_num, elem->in_addr, elem);
  263. }
  264. /**
  265. * Forward available buffers.
  266. *
  267. * @svq: Shadow VirtQueue
  268. *
  269. * Note that this function does not guarantee that all guest's available
  270. * buffers are available to the device in SVQ avail ring. The guest may have
  271. * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
  272. * qemu vaddr.
  273. *
  274. * If that happens, guest's kick notifications will be disabled until the
  275. * device uses some buffers.
  276. */
  277. static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
  278. {
  279. /* Clear event notifier */
  280. event_notifier_test_and_clear(&svq->svq_kick);
  281. /* Forward to the device as many available buffers as possible */
  282. do {
  283. virtio_queue_set_notification(svq->vq, false);
  284. while (true) {
  285. g_autofree VirtQueueElement *elem = NULL;
  286. int r;
  287. if (svq->next_guest_avail_elem) {
  288. elem = g_steal_pointer(&svq->next_guest_avail_elem);
  289. } else {
  290. elem = virtqueue_pop(svq->vq, sizeof(*elem));
  291. }
  292. if (!elem) {
  293. break;
  294. }
  295. if (svq->ops) {
  296. r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
  297. } else {
  298. r = vhost_svq_add_element(svq, elem);
  299. }
  300. if (unlikely(r != 0)) {
  301. if (r == -ENOSPC) {
  302. /*
  303. * This condition is possible since a contiguous buffer in
  304. * GPA does not imply a contiguous buffer in qemu's VA
  305. * scatter-gather segments. If that happens, the buffer
  306. * exposed to the device needs to be a chain of descriptors
  307. * at this moment.
  308. *
  309. * SVQ cannot hold more available buffers if we are here:
  310. * queue the current guest descriptor and ignore kicks
  311. * until some elements are used.
  312. */
  313. svq->next_guest_avail_elem = g_steal_pointer(&elem);
  314. }
  315. /* VQ is full or broken, just return and ignore kicks */
  316. return;
  317. }
  318. /* elem belongs to SVQ or external caller now */
  319. elem = NULL;
  320. }
  321. virtio_queue_set_notification(svq->vq, true);
  322. } while (!virtio_queue_empty(svq->vq));
  323. }
  324. /**
  325. * Handle guest's kick.
  326. *
  327. * @n: guest kick event notifier, the one that guest set to notify svq.
  328. */
  329. static void vhost_handle_guest_kick_notifier(EventNotifier *n)
  330. {
  331. VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
  332. event_notifier_test_and_clear(n);
  333. vhost_handle_guest_kick(svq);
  334. }
  335. static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
  336. {
  337. uint16_t *used_idx = &svq->vring.used->idx;
  338. if (svq->last_used_idx != svq->shadow_used_idx) {
  339. return true;
  340. }
  341. svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx);
  342. return svq->last_used_idx != svq->shadow_used_idx;
  343. }
  344. /**
  345. * Enable vhost device calls after disable them.
  346. *
  347. * @svq: The svq
  348. *
  349. * It returns false if there are pending used buffers from the vhost device,
  350. * avoiding the possible races between SVQ checking for more work and enabling
  351. * callbacks. True if SVQ used vring has no more pending buffers.
  352. */
  353. static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
  354. {
  355. if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
  356. uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
  357. *used_event = cpu_to_le16(svq->shadow_used_idx);
  358. } else {
  359. svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
  360. }
  361. /* Make sure the event is enabled before the read of used_idx */
  362. smp_mb();
  363. return !vhost_svq_more_used(svq);
  364. }
  365. static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
  366. {
  367. /*
  368. * No need to disable notification in the event idx case, since used event
  369. * index is already an index too far away.
  370. */
  371. if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
  372. svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
  373. }
  374. }
  375. static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
  376. uint16_t num, uint16_t i)
  377. {
  378. for (uint16_t j = 0; j < (num - 1); ++j) {
  379. i = svq->desc_next[i];
  380. }
  381. return i;
  382. }
  383. G_GNUC_WARN_UNUSED_RESULT
  384. static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
  385. uint32_t *len)
  386. {
  387. const vring_used_t *used = svq->vring.used;
  388. vring_used_elem_t used_elem;
  389. uint16_t last_used, last_used_chain, num;
  390. if (!vhost_svq_more_used(svq)) {
  391. return NULL;
  392. }
  393. /* Only get used array entries after they have been exposed by dev */
  394. smp_rmb();
  395. last_used = svq->last_used_idx & (svq->vring.num - 1);
  396. used_elem.id = le32_to_cpu(used->ring[last_used].id);
  397. used_elem.len = le32_to_cpu(used->ring[last_used].len);
  398. svq->last_used_idx++;
  399. if (unlikely(used_elem.id >= svq->vring.num)) {
  400. qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
  401. svq->vdev->name, used_elem.id);
  402. return NULL;
  403. }
  404. if (unlikely(!svq->desc_state[used_elem.id].ndescs)) {
  405. qemu_log_mask(LOG_GUEST_ERROR,
  406. "Device %s says index %u is used, but it was not available",
  407. svq->vdev->name, used_elem.id);
  408. return NULL;
  409. }
  410. num = svq->desc_state[used_elem.id].ndescs;
  411. svq->desc_state[used_elem.id].ndescs = 0;
  412. last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
  413. svq->desc_next[last_used_chain] = svq->free_head;
  414. svq->free_head = used_elem.id;
  415. svq->num_free += num;
  416. *len = used_elem.len;
  417. return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
  418. }
  419. /**
  420. * Push an element to SVQ, returning it to the guest.
  421. */
  422. void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  423. const VirtQueueElement *elem, uint32_t len)
  424. {
  425. virtqueue_push(svq->vq, elem, len);
  426. if (svq->next_guest_avail_elem) {
  427. /*
  428. * Avail ring was full when vhost_svq_flush was called, so it's a
  429. * good moment to make more descriptors available if possible.
  430. */
  431. vhost_handle_guest_kick(svq);
  432. }
  433. }
  434. static void vhost_svq_flush(VhostShadowVirtqueue *svq,
  435. bool check_for_avail_queue)
  436. {
  437. VirtQueue *vq = svq->vq;
  438. /* Forward as many used buffers as possible. */
  439. do {
  440. unsigned i = 0;
  441. vhost_svq_disable_notification(svq);
  442. while (true) {
  443. uint32_t len;
  444. g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
  445. if (!elem) {
  446. break;
  447. }
  448. if (unlikely(i >= svq->vring.num)) {
  449. qemu_log_mask(LOG_GUEST_ERROR,
  450. "More than %u used buffers obtained in a %u size SVQ",
  451. i, svq->vring.num);
  452. virtqueue_fill(vq, elem, len, i);
  453. virtqueue_flush(vq, i);
  454. return;
  455. }
  456. virtqueue_fill(vq, elem, len, i++);
  457. }
  458. virtqueue_flush(vq, i);
  459. event_notifier_set(&svq->svq_call);
  460. if (check_for_avail_queue && svq->next_guest_avail_elem) {
  461. /*
  462. * Avail ring was full when vhost_svq_flush was called, so it's a
  463. * good moment to make more descriptors available if possible.
  464. */
  465. vhost_handle_guest_kick(svq);
  466. }
  467. } while (!vhost_svq_enable_notification(svq));
  468. }
  469. /**
  470. * Poll the SVQ to wait for the device to use the specified number
  471. * of elements and return the total length written by the device.
  472. *
  473. * This function race with main event loop SVQ polling, so extra
  474. * synchronization is needed.
  475. *
  476. * @svq: The svq
  477. * @num: The number of elements that need to be used
  478. */
  479. size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num)
  480. {
  481. size_t len = 0;
  482. while (num--) {
  483. g_autofree VirtQueueElement *elem = NULL;
  484. int64_t start_us = g_get_monotonic_time();
  485. uint32_t r = 0;
  486. do {
  487. if (vhost_svq_more_used(svq)) {
  488. break;
  489. }
  490. if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
  491. return len;
  492. }
  493. } while (true);
  494. elem = vhost_svq_get_buf(svq, &r);
  495. len += r;
  496. }
  497. return len;
  498. }
  499. /**
  500. * Forward used buffers.
  501. *
  502. * @n: hdev call event notifier, the one that device set to notify svq.
  503. *
  504. * Note that we are not making any buffers available in the loop, there is no
  505. * way that it runs more than virtqueue size times.
  506. */
  507. static void vhost_svq_handle_call(EventNotifier *n)
  508. {
  509. VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
  510. hdev_call);
  511. event_notifier_test_and_clear(n);
  512. vhost_svq_flush(svq, true);
  513. }
  514. /**
  515. * Set the call notifier for the SVQ to call the guest
  516. *
  517. * @svq: Shadow virtqueue
  518. * @call_fd: call notifier
  519. *
  520. * Called on BQL context.
  521. */
  522. void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
  523. {
  524. if (call_fd == VHOST_FILE_UNBIND) {
  525. /*
  526. * Fail event_notifier_set if called handling device call.
  527. *
  528. * SVQ still needs device notifications, since it needs to keep
  529. * forwarding used buffers even with the unbind.
  530. */
  531. memset(&svq->svq_call, 0, sizeof(svq->svq_call));
  532. } else {
  533. event_notifier_init_fd(&svq->svq_call, call_fd);
  534. }
  535. }
  536. /**
  537. * Get the shadow vq vring address.
  538. * @svq: Shadow virtqueue
  539. * @addr: Destination to store address
  540. */
  541. void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
  542. struct vhost_vring_addr *addr)
  543. {
  544. addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
  545. addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
  546. addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
  547. }
  548. size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
  549. {
  550. size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
  551. size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) +
  552. sizeof(uint16_t);
  553. return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size());
  554. }
  555. size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
  556. {
  557. size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) +
  558. sizeof(uint16_t);
  559. return ROUND_UP(used_size, qemu_real_host_page_size());
  560. }
  561. /**
  562. * Set a new file descriptor for the guest to kick the SVQ and notify for avail
  563. *
  564. * @svq: The svq
  565. * @svq_kick_fd: The svq kick fd
  566. *
  567. * Note that the SVQ will never close the old file descriptor.
  568. */
  569. void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
  570. {
  571. EventNotifier *svq_kick = &svq->svq_kick;
  572. bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
  573. bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
  574. if (poll_stop) {
  575. event_notifier_set_handler(svq_kick, NULL);
  576. }
  577. event_notifier_init_fd(svq_kick, svq_kick_fd);
  578. /*
  579. * event_notifier_set_handler already checks for guest's notifications if
  580. * they arrive at the new file descriptor in the switch, so there is no
  581. * need to explicitly check for them.
  582. */
  583. if (poll_start) {
  584. event_notifier_set(svq_kick);
  585. event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
  586. }
  587. }
  588. /**
  589. * Start the shadow virtqueue operation.
  590. *
  591. * @svq: Shadow Virtqueue
  592. * @vdev: VirtIO device
  593. * @vq: Virtqueue to shadow
  594. * @iova_tree: Tree to perform descriptors translations
  595. */
  596. void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
  597. VirtQueue *vq, VhostIOVATree *iova_tree)
  598. {
  599. size_t desc_size;
  600. event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
  601. svq->next_guest_avail_elem = NULL;
  602. svq->shadow_avail_idx = 0;
  603. svq->shadow_used_idx = 0;
  604. svq->last_used_idx = 0;
  605. svq->vdev = vdev;
  606. svq->vq = vq;
  607. svq->iova_tree = iova_tree;
  608. svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
  609. svq->num_free = svq->vring.num;
  610. svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq),
  611. PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
  612. -1, 0);
  613. desc_size = sizeof(vring_desc_t) * svq->vring.num;
  614. svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
  615. svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq),
  616. PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
  617. -1, 0);
  618. svq->desc_state = g_new0(SVQDescState, svq->vring.num);
  619. svq->desc_next = g_new0(uint16_t, svq->vring.num);
  620. for (unsigned i = 0; i < svq->vring.num - 1; i++) {
  621. svq->desc_next[i] = i + 1;
  622. }
  623. }
  624. /**
  625. * Stop the shadow virtqueue operation.
  626. * @svq: Shadow Virtqueue
  627. */
  628. void vhost_svq_stop(VhostShadowVirtqueue *svq)
  629. {
  630. vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND);
  631. g_autofree VirtQueueElement *next_avail_elem = NULL;
  632. if (!svq->vq) {
  633. return;
  634. }
  635. /* Send all pending used descriptors to guest */
  636. vhost_svq_flush(svq, false);
  637. for (unsigned i = 0; i < svq->vring.num; ++i) {
  638. g_autofree VirtQueueElement *elem = NULL;
  639. elem = g_steal_pointer(&svq->desc_state[i].elem);
  640. if (elem) {
  641. /*
  642. * TODO: This is ok for networking, but other kinds of devices
  643. * might have problems with just unpop these.
  644. */
  645. virtqueue_unpop(svq->vq, elem, 0);
  646. }
  647. }
  648. next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
  649. if (next_avail_elem) {
  650. virtqueue_unpop(svq->vq, next_avail_elem, 0);
  651. }
  652. svq->vq = NULL;
  653. g_free(svq->desc_next);
  654. g_free(svq->desc_state);
  655. munmap(svq->vring.desc, vhost_svq_driver_area_size(svq));
  656. munmap(svq->vring.used, vhost_svq_device_area_size(svq));
  657. event_notifier_set_handler(&svq->hdev_call, NULL);
  658. }
  659. /**
  660. * Creates vhost shadow virtqueue, and instructs the vhost device to use the
  661. * shadow methods and file descriptors.
  662. *
  663. * @ops: SVQ owner callbacks
  664. * @ops_opaque: ops opaque pointer
  665. */
  666. VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
  667. void *ops_opaque)
  668. {
  669. VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
  670. event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
  671. svq->ops = ops;
  672. svq->ops_opaque = ops_opaque;
  673. return svq;
  674. }
  675. /**
  676. * Free the resources of the shadow virtqueue.
  677. *
  678. * @pvq: gpointer to SVQ so it can be used by autofree functions.
  679. */
  680. void vhost_svq_free(gpointer pvq)
  681. {
  682. VhostShadowVirtqueue *vq = pvq;
  683. vhost_svq_stop(vq);
  684. g_free(vq);
  685. }