2
0

vring.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. /* Copyright 2012 Red Hat, Inc.
  2. * Copyright IBM, Corp. 2012
  3. *
  4. * Based on Linux 2.6.39 vhost code:
  5. * Copyright (C) 2009 Red Hat, Inc.
  6. * Copyright (C) 2006 Rusty Russell IBM Corporation
  7. *
  8. * Author: Michael S. Tsirkin <mst@redhat.com>
  9. * Stefan Hajnoczi <stefanha@redhat.com>
  10. *
  11. * Inspiration, some code, and most witty comments come from
  12. * Documentation/virtual/lguest/lguest.c, by Rusty Russell
  13. *
  14. * This work is licensed under the terms of the GNU GPL, version 2.
  15. */
  16. #include "trace.h"
  17. #include "hw/dataplane/vring.h"
  18. /* Map the guest's vring to host memory */
  19. bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
  20. {
  21. hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
  22. hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
  23. void *vring_ptr;
  24. vring->broken = false;
  25. hostmem_init(&vring->hostmem);
  26. vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
  27. if (!vring_ptr) {
  28. error_report("Failed to map vring "
  29. "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
  30. vring_addr, vring_size);
  31. vring->broken = true;
  32. return false;
  33. }
  34. vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
  35. vring->last_avail_idx = 0;
  36. vring->last_used_idx = 0;
  37. vring->signalled_used = 0;
  38. vring->signalled_used_valid = false;
  39. trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
  40. vring->vr.desc, vring->vr.avail, vring->vr.used);
  41. return true;
  42. }
  43. void vring_teardown(Vring *vring)
  44. {
  45. hostmem_finalize(&vring->hostmem);
  46. }
  47. /* Disable guest->host notifies */
  48. void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
  49. {
  50. if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
  51. vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
  52. }
  53. }
  54. /* Enable guest->host notifies
  55. *
  56. * Return true if the vring is empty, false if there are more requests.
  57. */
  58. bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
  59. {
  60. if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  61. vring_avail_event(&vring->vr) = vring->vr.avail->idx;
  62. } else {
  63. vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
  64. }
  65. smp_mb(); /* ensure update is seen before reading avail_idx */
  66. return !vring_more_avail(vring);
  67. }
  68. /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
  69. bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
  70. {
  71. uint16_t old, new;
  72. bool v;
  73. /* Flush out used index updates. This is paired
  74. * with the barrier that the Guest executes when enabling
  75. * interrupts. */
  76. smp_mb();
  77. if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
  78. unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
  79. return true;
  80. }
  81. if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
  82. return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
  83. }
  84. old = vring->signalled_used;
  85. v = vring->signalled_used_valid;
  86. new = vring->signalled_used = vring->last_used_idx;
  87. vring->signalled_used_valid = true;
  88. if (unlikely(!v)) {
  89. return true;
  90. }
  91. return vring_need_event(vring_used_event(&vring->vr), new, old);
  92. }
  93. /* This is stolen from linux/drivers/vhost/vhost.c. */
  94. static int get_indirect(Vring *vring,
  95. struct iovec iov[], struct iovec *iov_end,
  96. unsigned int *out_num, unsigned int *in_num,
  97. struct vring_desc *indirect)
  98. {
  99. struct vring_desc desc;
  100. unsigned int i = 0, count, found = 0;
  101. /* Sanity check */
  102. if (unlikely(indirect->len % sizeof(desc))) {
  103. error_report("Invalid length in indirect descriptor: "
  104. "len %#x not multiple of %#zx",
  105. indirect->len, sizeof(desc));
  106. vring->broken = true;
  107. return -EFAULT;
  108. }
  109. count = indirect->len / sizeof(desc);
  110. /* Buffers are chained via a 16 bit next field, so
  111. * we can have at most 2^16 of these. */
  112. if (unlikely(count > USHRT_MAX + 1)) {
  113. error_report("Indirect buffer length too big: %d", indirect->len);
  114. vring->broken = true;
  115. return -EFAULT;
  116. }
  117. do {
  118. struct vring_desc *desc_ptr;
  119. /* Translate indirect descriptor */
  120. desc_ptr = hostmem_lookup(&vring->hostmem,
  121. indirect->addr + found * sizeof(desc),
  122. sizeof(desc), false);
  123. if (!desc_ptr) {
  124. error_report("Failed to map indirect descriptor "
  125. "addr %#" PRIx64 " len %zu",
  126. (uint64_t)indirect->addr + found * sizeof(desc),
  127. sizeof(desc));
  128. vring->broken = true;
  129. return -EFAULT;
  130. }
  131. desc = *desc_ptr;
  132. /* Ensure descriptor has been loaded before accessing fields */
  133. barrier(); /* read_barrier_depends(); */
  134. if (unlikely(++found > count)) {
  135. error_report("Loop detected: last one at %u "
  136. "indirect size %u", i, count);
  137. vring->broken = true;
  138. return -EFAULT;
  139. }
  140. if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
  141. error_report("Nested indirect descriptor");
  142. vring->broken = true;
  143. return -EFAULT;
  144. }
  145. /* Stop for now if there are not enough iovecs available. */
  146. if (iov >= iov_end) {
  147. return -ENOBUFS;
  148. }
  149. iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
  150. desc.flags & VRING_DESC_F_WRITE);
  151. if (!iov->iov_base) {
  152. error_report("Failed to map indirect descriptor"
  153. "addr %#" PRIx64 " len %u",
  154. (uint64_t)desc.addr, desc.len);
  155. vring->broken = true;
  156. return -EFAULT;
  157. }
  158. iov->iov_len = desc.len;
  159. iov++;
  160. /* If this is an input descriptor, increment that count. */
  161. if (desc.flags & VRING_DESC_F_WRITE) {
  162. *in_num += 1;
  163. } else {
  164. /* If it's an output descriptor, they're all supposed
  165. * to come before any input descriptors. */
  166. if (unlikely(*in_num)) {
  167. error_report("Indirect descriptor "
  168. "has out after in: idx %u", i);
  169. vring->broken = true;
  170. return -EFAULT;
  171. }
  172. *out_num += 1;
  173. }
  174. i = desc.next;
  175. } while (desc.flags & VRING_DESC_F_NEXT);
  176. return 0;
  177. }
  178. /* This looks in the virtqueue and for the first available buffer, and converts
  179. * it to an iovec for convenient access. Since descriptors consist of some
  180. * number of output then some number of input descriptors, it's actually two
  181. * iovecs, but we pack them into one and note how many of each there were.
  182. *
  183. * This function returns the descriptor number found, or vq->num (which is
  184. * never a valid descriptor number) if none was found. A negative code is
  185. * returned on error.
  186. *
  187. * Stolen from linux/drivers/vhost/vhost.c.
  188. */
  189. int vring_pop(VirtIODevice *vdev, Vring *vring,
  190. struct iovec iov[], struct iovec *iov_end,
  191. unsigned int *out_num, unsigned int *in_num)
  192. {
  193. struct vring_desc desc;
  194. unsigned int i, head, found = 0, num = vring->vr.num;
  195. uint16_t avail_idx, last_avail_idx;
  196. /* If there was a fatal error then refuse operation */
  197. if (vring->broken) {
  198. return -EFAULT;
  199. }
  200. /* Check it isn't doing very strange things with descriptor numbers. */
  201. last_avail_idx = vring->last_avail_idx;
  202. avail_idx = vring->vr.avail->idx;
  203. barrier(); /* load indices now and not again later */
  204. if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
  205. error_report("Guest moved used index from %u to %u",
  206. last_avail_idx, avail_idx);
  207. vring->broken = true;
  208. return -EFAULT;
  209. }
  210. /* If there's nothing new since last we looked. */
  211. if (avail_idx == last_avail_idx) {
  212. return -EAGAIN;
  213. }
  214. /* Only get avail ring entries after they have been exposed by guest. */
  215. smp_rmb();
  216. /* Grab the next descriptor number they're advertising, and increment
  217. * the index we've seen. */
  218. head = vring->vr.avail->ring[last_avail_idx % num];
  219. /* If their number is silly, that's an error. */
  220. if (unlikely(head >= num)) {
  221. error_report("Guest says index %u > %u is available", head, num);
  222. vring->broken = true;
  223. return -EFAULT;
  224. }
  225. if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  226. vring_avail_event(&vring->vr) = vring->vr.avail->idx;
  227. }
  228. /* When we start there are none of either input nor output. */
  229. *out_num = *in_num = 0;
  230. i = head;
  231. do {
  232. if (unlikely(i >= num)) {
  233. error_report("Desc index is %u > %u, head = %u", i, num, head);
  234. vring->broken = true;
  235. return -EFAULT;
  236. }
  237. if (unlikely(++found > num)) {
  238. error_report("Loop detected: last one at %u vq size %u head %u",
  239. i, num, head);
  240. vring->broken = true;
  241. return -EFAULT;
  242. }
  243. desc = vring->vr.desc[i];
  244. /* Ensure descriptor is loaded before accessing fields */
  245. barrier();
  246. if (desc.flags & VRING_DESC_F_INDIRECT) {
  247. int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
  248. if (ret < 0) {
  249. return ret;
  250. }
  251. continue;
  252. }
  253. /* If there are not enough iovecs left, stop for now. The caller
  254. * should check if there are more descs available once they have dealt
  255. * with the current set.
  256. */
  257. if (iov >= iov_end) {
  258. return -ENOBUFS;
  259. }
  260. /* TODO handle non-contiguous memory across region boundaries */
  261. iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
  262. desc.flags & VRING_DESC_F_WRITE);
  263. if (!iov->iov_base) {
  264. error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
  265. (uint64_t)desc.addr, desc.len);
  266. vring->broken = true;
  267. return -EFAULT;
  268. }
  269. iov->iov_len = desc.len;
  270. iov++;
  271. if (desc.flags & VRING_DESC_F_WRITE) {
  272. /* If this is an input descriptor,
  273. * increment that count. */
  274. *in_num += 1;
  275. } else {
  276. /* If it's an output descriptor, they're all supposed
  277. * to come before any input descriptors. */
  278. if (unlikely(*in_num)) {
  279. error_report("Descriptor has out after in: idx %d", i);
  280. vring->broken = true;
  281. return -EFAULT;
  282. }
  283. *out_num += 1;
  284. }
  285. i = desc.next;
  286. } while (desc.flags & VRING_DESC_F_NEXT);
  287. /* On success, increment avail index. */
  288. vring->last_avail_idx++;
  289. return head;
  290. }
  291. /* After we've used one of their buffers, we tell them about it.
  292. *
  293. * Stolen from linux/drivers/vhost/vhost.c.
  294. */
  295. void vring_push(Vring *vring, unsigned int head, int len)
  296. {
  297. struct vring_used_elem *used;
  298. uint16_t new;
  299. /* Don't touch vring if a fatal error occurred */
  300. if (vring->broken) {
  301. return;
  302. }
  303. /* The virtqueue contains a ring of used buffers. Get a pointer to the
  304. * next entry in that used ring. */
  305. used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
  306. used->id = head;
  307. used->len = len;
  308. /* Make sure buffer is written before we update index. */
  309. smp_wmb();
  310. new = vring->vr.used->idx = ++vring->last_used_idx;
  311. if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
  312. vring->signalled_used_valid = false;
  313. }
  314. }