virtio.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. /*
  2. * Virtio Support
  3. *
  4. * Copyright IBM, Corp. 2007
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include <inttypes.h>
  14. #include "trace.h"
  15. #include "qemu-error.h"
  16. #include "virtio.h"
  17. /* The alignment to use between consumer and producer parts of vring.
  18. * x86 pagesize again. */
  19. #define VIRTIO_PCI_VRING_ALIGN 4096
  20. /* QEMU doesn't strictly need write barriers since everything runs in
  21. * lock-step. We'll leave the calls to wmb() in though to make it obvious for
  22. * KVM or if kqemu gets SMP support.
  23. * In any case, we must prevent the compiler from reordering the code.
  24. * TODO: we likely need some rmb()/mb() as well.
  25. */
  26. #define wmb() __asm__ __volatile__("": : :"memory")
  27. typedef struct VRingDesc
  28. {
  29. uint64_t addr;
  30. uint32_t len;
  31. uint16_t flags;
  32. uint16_t next;
  33. } VRingDesc;
  34. typedef struct VRingAvail
  35. {
  36. uint16_t flags;
  37. uint16_t idx;
  38. uint16_t ring[0];
  39. } VRingAvail;
  40. typedef struct VRingUsedElem
  41. {
  42. uint32_t id;
  43. uint32_t len;
  44. } VRingUsedElem;
  45. typedef struct VRingUsed
  46. {
  47. uint16_t flags;
  48. uint16_t idx;
  49. VRingUsedElem ring[0];
  50. } VRingUsed;
  51. typedef struct VRing
  52. {
  53. unsigned int num;
  54. target_phys_addr_t desc;
  55. target_phys_addr_t avail;
  56. target_phys_addr_t used;
  57. } VRing;
  58. struct VirtQueue
  59. {
  60. VRing vring;
  61. target_phys_addr_t pa;
  62. uint16_t last_avail_idx;
  63. /* Last used index value we have signalled on */
  64. uint16_t signalled_used;
  65. /* Last used index value we have signalled on */
  66. bool signalled_used_valid;
  67. /* Notification enabled? */
  68. bool notification;
  69. int inuse;
  70. uint16_t vector;
  71. void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  72. VirtIODevice *vdev;
  73. EventNotifier guest_notifier;
  74. EventNotifier host_notifier;
  75. };
  76. /* virt queue functions */
  77. static void virtqueue_init(VirtQueue *vq)
  78. {
  79. target_phys_addr_t pa = vq->pa;
  80. vq->vring.desc = pa;
  81. vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
  82. vq->vring.used = vring_align(vq->vring.avail +
  83. offsetof(VRingAvail, ring[vq->vring.num]),
  84. VIRTIO_PCI_VRING_ALIGN);
  85. }
  86. static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
  87. {
  88. target_phys_addr_t pa;
  89. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
  90. return ldq_phys(pa);
  91. }
  92. static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
  93. {
  94. target_phys_addr_t pa;
  95. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
  96. return ldl_phys(pa);
  97. }
  98. static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
  99. {
  100. target_phys_addr_t pa;
  101. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
  102. return lduw_phys(pa);
  103. }
  104. static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
  105. {
  106. target_phys_addr_t pa;
  107. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
  108. return lduw_phys(pa);
  109. }
  110. static inline uint16_t vring_avail_flags(VirtQueue *vq)
  111. {
  112. target_phys_addr_t pa;
  113. pa = vq->vring.avail + offsetof(VRingAvail, flags);
  114. return lduw_phys(pa);
  115. }
  116. static inline uint16_t vring_avail_idx(VirtQueue *vq)
  117. {
  118. target_phys_addr_t pa;
  119. pa = vq->vring.avail + offsetof(VRingAvail, idx);
  120. return lduw_phys(pa);
  121. }
  122. static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
  123. {
  124. target_phys_addr_t pa;
  125. pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
  126. return lduw_phys(pa);
  127. }
  128. static inline uint16_t vring_used_event(VirtQueue *vq)
  129. {
  130. return vring_avail_ring(vq, vq->vring.num);
  131. }
  132. static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
  133. {
  134. target_phys_addr_t pa;
  135. pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
  136. stl_phys(pa, val);
  137. }
  138. static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
  139. {
  140. target_phys_addr_t pa;
  141. pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
  142. stl_phys(pa, val);
  143. }
  144. static uint16_t vring_used_idx(VirtQueue *vq)
  145. {
  146. target_phys_addr_t pa;
  147. pa = vq->vring.used + offsetof(VRingUsed, idx);
  148. return lduw_phys(pa);
  149. }
  150. static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
  151. {
  152. target_phys_addr_t pa;
  153. pa = vq->vring.used + offsetof(VRingUsed, idx);
  154. stw_phys(pa, val);
  155. }
  156. static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
  157. {
  158. target_phys_addr_t pa;
  159. pa = vq->vring.used + offsetof(VRingUsed, flags);
  160. stw_phys(pa, lduw_phys(pa) | mask);
  161. }
  162. static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
  163. {
  164. target_phys_addr_t pa;
  165. pa = vq->vring.used + offsetof(VRingUsed, flags);
  166. stw_phys(pa, lduw_phys(pa) & ~mask);
  167. }
  168. static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
  169. {
  170. target_phys_addr_t pa;
  171. if (!vq->notification) {
  172. return;
  173. }
  174. pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
  175. stw_phys(pa, val);
  176. }
  177. void virtio_queue_set_notification(VirtQueue *vq, int enable)
  178. {
  179. vq->notification = enable;
  180. if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  181. vring_avail_event(vq, vring_avail_idx(vq));
  182. } else if (enable) {
  183. vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
  184. } else {
  185. vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
  186. }
  187. }
  188. int virtio_queue_ready(VirtQueue *vq)
  189. {
  190. return vq->vring.avail != 0;
  191. }
  192. int virtio_queue_empty(VirtQueue *vq)
  193. {
  194. return vring_avail_idx(vq) == vq->last_avail_idx;
  195. }
  196. void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
  197. unsigned int len, unsigned int idx)
  198. {
  199. unsigned int offset;
  200. int i;
  201. trace_virtqueue_fill(vq, elem, len, idx);
  202. offset = 0;
  203. for (i = 0; i < elem->in_num; i++) {
  204. size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
  205. cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
  206. elem->in_sg[i].iov_len,
  207. 1, size);
  208. offset += elem->in_sg[i].iov_len;
  209. }
  210. for (i = 0; i < elem->out_num; i++)
  211. cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
  212. elem->out_sg[i].iov_len,
  213. 0, elem->out_sg[i].iov_len);
  214. idx = (idx + vring_used_idx(vq)) % vq->vring.num;
  215. /* Get a pointer to the next entry in the used ring. */
  216. vring_used_ring_id(vq, idx, elem->index);
  217. vring_used_ring_len(vq, idx, len);
  218. }
  219. void virtqueue_flush(VirtQueue *vq, unsigned int count)
  220. {
  221. uint16_t old, new;
  222. /* Make sure buffer is written before we update index. */
  223. wmb();
  224. trace_virtqueue_flush(vq, count);
  225. old = vring_used_idx(vq);
  226. new = old + count;
  227. vring_used_idx_set(vq, new);
  228. vq->inuse -= count;
  229. if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
  230. vq->signalled_used_valid = false;
  231. }
  232. void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  233. unsigned int len)
  234. {
  235. virtqueue_fill(vq, elem, len, 0);
  236. virtqueue_flush(vq, 1);
  237. }
  238. static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
  239. {
  240. uint16_t num_heads = vring_avail_idx(vq) - idx;
  241. /* Check it isn't doing very strange things with descriptor numbers. */
  242. if (num_heads > vq->vring.num) {
  243. error_report("Guest moved used index from %u to %u",
  244. idx, vring_avail_idx(vq));
  245. exit(1);
  246. }
  247. return num_heads;
  248. }
  249. static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
  250. {
  251. unsigned int head;
  252. /* Grab the next descriptor number they're advertising, and increment
  253. * the index we've seen. */
  254. head = vring_avail_ring(vq, idx % vq->vring.num);
  255. /* If their number is silly, that's a fatal mistake. */
  256. if (head >= vq->vring.num) {
  257. error_report("Guest says index %u is available", head);
  258. exit(1);
  259. }
  260. return head;
  261. }
  262. static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
  263. unsigned int i, unsigned int max)
  264. {
  265. unsigned int next;
  266. /* If this descriptor says it doesn't chain, we're done. */
  267. if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
  268. return max;
  269. /* Check they're not leading us off end of descriptors. */
  270. next = vring_desc_next(desc_pa, i);
  271. /* Make sure compiler knows to grab that: we don't want it changing! */
  272. wmb();
  273. if (next >= max) {
  274. error_report("Desc next is %u", next);
  275. exit(1);
  276. }
  277. return next;
  278. }
  279. int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
  280. {
  281. unsigned int idx;
  282. int total_bufs, in_total, out_total;
  283. idx = vq->last_avail_idx;
  284. total_bufs = in_total = out_total = 0;
  285. while (virtqueue_num_heads(vq, idx)) {
  286. unsigned int max, num_bufs, indirect = 0;
  287. target_phys_addr_t desc_pa;
  288. int i;
  289. max = vq->vring.num;
  290. num_bufs = total_bufs;
  291. i = virtqueue_get_head(vq, idx++);
  292. desc_pa = vq->vring.desc;
  293. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  294. if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  295. error_report("Invalid size for indirect buffer table");
  296. exit(1);
  297. }
  298. /* If we've got too many, that implies a descriptor loop. */
  299. if (num_bufs >= max) {
  300. error_report("Looped descriptor");
  301. exit(1);
  302. }
  303. /* loop over the indirect descriptor table */
  304. indirect = 1;
  305. max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  306. num_bufs = i = 0;
  307. desc_pa = vring_desc_addr(desc_pa, i);
  308. }
  309. do {
  310. /* If we've got too many, that implies a descriptor loop. */
  311. if (++num_bufs > max) {
  312. error_report("Looped descriptor");
  313. exit(1);
  314. }
  315. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
  316. if (in_bytes > 0 &&
  317. (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
  318. return 1;
  319. } else {
  320. if (out_bytes > 0 &&
  321. (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
  322. return 1;
  323. }
  324. } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
  325. if (!indirect)
  326. total_bufs = num_bufs;
  327. else
  328. total_bufs++;
  329. }
  330. return 0;
  331. }
  332. void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
  333. size_t num_sg, int is_write)
  334. {
  335. unsigned int i;
  336. target_phys_addr_t len;
  337. for (i = 0; i < num_sg; i++) {
  338. len = sg[i].iov_len;
  339. sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
  340. if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
  341. error_report("virtio: trying to map MMIO memory");
  342. exit(1);
  343. }
  344. }
  345. }
  346. int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
  347. {
  348. unsigned int i, head, max;
  349. target_phys_addr_t desc_pa = vq->vring.desc;
  350. if (!virtqueue_num_heads(vq, vq->last_avail_idx))
  351. return 0;
  352. /* When we start there are none of either input nor output. */
  353. elem->out_num = elem->in_num = 0;
  354. max = vq->vring.num;
  355. i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
  356. if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  357. vring_avail_event(vq, vring_avail_idx(vq));
  358. }
  359. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  360. if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  361. error_report("Invalid size for indirect buffer table");
  362. exit(1);
  363. }
  364. /* loop over the indirect descriptor table */
  365. max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  366. desc_pa = vring_desc_addr(desc_pa, i);
  367. i = 0;
  368. }
  369. /* Collect all the descriptors */
  370. do {
  371. struct iovec *sg;
  372. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
  373. if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
  374. error_report("Too many write descriptors in indirect table");
  375. exit(1);
  376. }
  377. elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
  378. sg = &elem->in_sg[elem->in_num++];
  379. } else {
  380. if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
  381. error_report("Too many read descriptors in indirect table");
  382. exit(1);
  383. }
  384. elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
  385. sg = &elem->out_sg[elem->out_num++];
  386. }
  387. sg->iov_len = vring_desc_len(desc_pa, i);
  388. /* If we've got too many, that implies a descriptor loop. */
  389. if ((elem->in_num + elem->out_num) > max) {
  390. error_report("Looped descriptor");
  391. exit(1);
  392. }
  393. } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
  394. /* Now map what we have collected */
  395. virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
  396. virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
  397. elem->index = head;
  398. vq->inuse++;
  399. trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
  400. return elem->in_num + elem->out_num;
  401. }
  402. /* virtio device */
  403. static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
  404. {
  405. if (vdev->binding->notify) {
  406. vdev->binding->notify(vdev->binding_opaque, vector);
  407. }
  408. }
  409. void virtio_update_irq(VirtIODevice *vdev)
  410. {
  411. virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
  412. }
  413. void virtio_reset(void *opaque)
  414. {
  415. VirtIODevice *vdev = opaque;
  416. int i;
  417. virtio_set_status(vdev, 0);
  418. if (vdev->reset)
  419. vdev->reset(vdev);
  420. vdev->guest_features = 0;
  421. vdev->queue_sel = 0;
  422. vdev->status = 0;
  423. vdev->isr = 0;
  424. vdev->config_vector = VIRTIO_NO_VECTOR;
  425. virtio_notify_vector(vdev, vdev->config_vector);
  426. for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  427. vdev->vq[i].vring.desc = 0;
  428. vdev->vq[i].vring.avail = 0;
  429. vdev->vq[i].vring.used = 0;
  430. vdev->vq[i].last_avail_idx = 0;
  431. vdev->vq[i].pa = 0;
  432. vdev->vq[i].vector = VIRTIO_NO_VECTOR;
  433. vdev->vq[i].signalled_used = 0;
  434. vdev->vq[i].signalled_used_valid = false;
  435. vdev->vq[i].notification = true;
  436. }
  437. }
  438. uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
  439. {
  440. uint8_t val;
  441. vdev->get_config(vdev, vdev->config);
  442. if (addr > (vdev->config_len - sizeof(val)))
  443. return (uint32_t)-1;
  444. memcpy(&val, vdev->config + addr, sizeof(val));
  445. return val;
  446. }
  447. uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
  448. {
  449. uint16_t val;
  450. vdev->get_config(vdev, vdev->config);
  451. if (addr > (vdev->config_len - sizeof(val)))
  452. return (uint32_t)-1;
  453. memcpy(&val, vdev->config + addr, sizeof(val));
  454. return val;
  455. }
  456. uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
  457. {
  458. uint32_t val;
  459. vdev->get_config(vdev, vdev->config);
  460. if (addr > (vdev->config_len - sizeof(val)))
  461. return (uint32_t)-1;
  462. memcpy(&val, vdev->config + addr, sizeof(val));
  463. return val;
  464. }
  465. void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  466. {
  467. uint8_t val = data;
  468. if (addr > (vdev->config_len - sizeof(val)))
  469. return;
  470. memcpy(vdev->config + addr, &val, sizeof(val));
  471. if (vdev->set_config)
  472. vdev->set_config(vdev, vdev->config);
  473. }
  474. void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  475. {
  476. uint16_t val = data;
  477. if (addr > (vdev->config_len - sizeof(val)))
  478. return;
  479. memcpy(vdev->config + addr, &val, sizeof(val));
  480. if (vdev->set_config)
  481. vdev->set_config(vdev, vdev->config);
  482. }
  483. void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  484. {
  485. uint32_t val = data;
  486. if (addr > (vdev->config_len - sizeof(val)))
  487. return;
  488. memcpy(vdev->config + addr, &val, sizeof(val));
  489. if (vdev->set_config)
  490. vdev->set_config(vdev, vdev->config);
  491. }
  492. void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
  493. {
  494. vdev->vq[n].pa = addr;
  495. virtqueue_init(&vdev->vq[n]);
  496. }
  497. target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
  498. {
  499. return vdev->vq[n].pa;
  500. }
  501. int virtio_queue_get_num(VirtIODevice *vdev, int n)
  502. {
  503. return vdev->vq[n].vring.num;
  504. }
  505. void virtio_queue_notify_vq(VirtQueue *vq)
  506. {
  507. if (vq->vring.desc) {
  508. VirtIODevice *vdev = vq->vdev;
  509. trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
  510. vq->handle_output(vdev, vq);
  511. }
  512. }
  513. void virtio_queue_notify(VirtIODevice *vdev, int n)
  514. {
  515. virtio_queue_notify_vq(&vdev->vq[n]);
  516. }
  517. uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
  518. {
  519. return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
  520. VIRTIO_NO_VECTOR;
  521. }
  522. void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
  523. {
  524. if (n < VIRTIO_PCI_QUEUE_MAX)
  525. vdev->vq[n].vector = vector;
  526. }
  527. VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
  528. void (*handle_output)(VirtIODevice *, VirtQueue *))
  529. {
  530. int i;
  531. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  532. if (vdev->vq[i].vring.num == 0)
  533. break;
  534. }
  535. if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
  536. abort();
  537. vdev->vq[i].vring.num = queue_size;
  538. vdev->vq[i].handle_output = handle_output;
  539. return &vdev->vq[i];
  540. }
  541. void virtio_irq(VirtQueue *vq)
  542. {
  543. trace_virtio_irq(vq);
  544. vq->vdev->isr |= 0x01;
  545. virtio_notify_vector(vq->vdev, vq->vector);
  546. }
  547. /* Assuming a given event_idx value from the other size, if
  548. * we have just incremented index from old to new_idx,
  549. * should we trigger an event? */
  550. static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
  551. {
  552. /* Note: Xen has similar logic for notification hold-off
  553. * in include/xen/interface/io/ring.h with req_event and req_prod
  554. * corresponding to event_idx + 1 and new respectively.
  555. * Note also that req_event and req_prod in Xen start at 1,
  556. * event indexes in virtio start at 0. */
  557. return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
  558. }
  559. static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
  560. {
  561. uint16_t old, new;
  562. bool v;
  563. /* Always notify when queue is empty (when feature acknowledge) */
  564. if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
  565. !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
  566. return true;
  567. }
  568. if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
  569. return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
  570. }
  571. v = vq->signalled_used_valid;
  572. vq->signalled_used_valid = true;
  573. old = vq->signalled_used;
  574. new = vq->signalled_used = vring_used_idx(vq);
  575. return !v || vring_need_event(vring_used_event(vq), new, old);
  576. }
  577. void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
  578. {
  579. if (!vring_notify(vdev, vq)) {
  580. return;
  581. }
  582. trace_virtio_notify(vdev, vq);
  583. vdev->isr |= 0x01;
  584. virtio_notify_vector(vdev, vq->vector);
  585. }
  586. void virtio_notify_config(VirtIODevice *vdev)
  587. {
  588. if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  589. return;
  590. vdev->isr |= 0x03;
  591. virtio_notify_vector(vdev, vdev->config_vector);
  592. }
  593. void virtio_save(VirtIODevice *vdev, QEMUFile *f)
  594. {
  595. int i;
  596. if (vdev->binding->save_config)
  597. vdev->binding->save_config(vdev->binding_opaque, f);
  598. qemu_put_8s(f, &vdev->status);
  599. qemu_put_8s(f, &vdev->isr);
  600. qemu_put_be16s(f, &vdev->queue_sel);
  601. qemu_put_be32s(f, &vdev->guest_features);
  602. qemu_put_be32(f, vdev->config_len);
  603. qemu_put_buffer(f, vdev->config, vdev->config_len);
  604. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  605. if (vdev->vq[i].vring.num == 0)
  606. break;
  607. }
  608. qemu_put_be32(f, i);
  609. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  610. if (vdev->vq[i].vring.num == 0)
  611. break;
  612. qemu_put_be32(f, vdev->vq[i].vring.num);
  613. qemu_put_be64(f, vdev->vq[i].pa);
  614. qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
  615. if (vdev->binding->save_queue)
  616. vdev->binding->save_queue(vdev->binding_opaque, i, f);
  617. }
  618. }
  619. int virtio_load(VirtIODevice *vdev, QEMUFile *f)
  620. {
  621. int num, i, ret;
  622. uint32_t features;
  623. uint32_t supported_features =
  624. vdev->binding->get_features(vdev->binding_opaque);
  625. if (vdev->binding->load_config) {
  626. ret = vdev->binding->load_config(vdev->binding_opaque, f);
  627. if (ret)
  628. return ret;
  629. }
  630. qemu_get_8s(f, &vdev->status);
  631. qemu_get_8s(f, &vdev->isr);
  632. qemu_get_be16s(f, &vdev->queue_sel);
  633. qemu_get_be32s(f, &features);
  634. if (features & ~supported_features) {
  635. error_report("Features 0x%x unsupported. Allowed features: 0x%x",
  636. features, supported_features);
  637. return -1;
  638. }
  639. if (vdev->set_features)
  640. vdev->set_features(vdev, features);
  641. vdev->guest_features = features;
  642. vdev->config_len = qemu_get_be32(f);
  643. qemu_get_buffer(f, vdev->config, vdev->config_len);
  644. num = qemu_get_be32(f);
  645. for (i = 0; i < num; i++) {
  646. vdev->vq[i].vring.num = qemu_get_be32(f);
  647. vdev->vq[i].pa = qemu_get_be64(f);
  648. qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
  649. vdev->vq[i].signalled_used_valid = false;
  650. vdev->vq[i].notification = true;
  651. if (vdev->vq[i].pa) {
  652. uint16_t nheads;
  653. virtqueue_init(&vdev->vq[i]);
  654. nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
  655. /* Check it isn't doing very strange things with descriptor numbers. */
  656. if (nheads > vdev->vq[i].vring.num) {
  657. error_report("VQ %d size 0x%x Guest index 0x%x "
  658. "inconsistent with Host index 0x%x: delta 0x%x",
  659. i, vdev->vq[i].vring.num,
  660. vring_avail_idx(&vdev->vq[i]),
  661. vdev->vq[i].last_avail_idx, nheads);
  662. return -1;
  663. }
  664. } else if (vdev->vq[i].last_avail_idx) {
  665. error_report("VQ %d address 0x0 "
  666. "inconsistent with Host index 0x%x",
  667. i, vdev->vq[i].last_avail_idx);
  668. return -1;
  669. }
  670. if (vdev->binding->load_queue) {
  671. ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
  672. if (ret)
  673. return ret;
  674. }
  675. }
  676. virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
  677. return 0;
  678. }
  679. void virtio_cleanup(VirtIODevice *vdev)
  680. {
  681. qemu_del_vm_change_state_handler(vdev->vmstate);
  682. if (vdev->config)
  683. qemu_free(vdev->config);
  684. qemu_free(vdev->vq);
  685. }
  686. static void virtio_vmstate_change(void *opaque, int running, int reason)
  687. {
  688. VirtIODevice *vdev = opaque;
  689. bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
  690. vdev->vm_running = running;
  691. if (backend_run) {
  692. virtio_set_status(vdev, vdev->status);
  693. }
  694. if (vdev->binding->vmstate_change) {
  695. vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
  696. }
  697. if (!backend_run) {
  698. virtio_set_status(vdev, vdev->status);
  699. }
  700. }
  701. VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
  702. size_t config_size, size_t struct_size)
  703. {
  704. VirtIODevice *vdev;
  705. int i;
  706. vdev = qemu_mallocz(struct_size);
  707. vdev->device_id = device_id;
  708. vdev->status = 0;
  709. vdev->isr = 0;
  710. vdev->queue_sel = 0;
  711. vdev->config_vector = VIRTIO_NO_VECTOR;
  712. vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
  713. vdev->vm_running = vm_running;
  714. for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  715. vdev->vq[i].vector = VIRTIO_NO_VECTOR;
  716. vdev->vq[i].vdev = vdev;
  717. }
  718. vdev->name = name;
  719. vdev->config_len = config_size;
  720. if (vdev->config_len)
  721. vdev->config = qemu_mallocz(config_size);
  722. else
  723. vdev->config = NULL;
  724. vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
  725. return vdev;
  726. }
  727. void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
  728. void *opaque)
  729. {
  730. vdev->binding = binding;
  731. vdev->binding_opaque = opaque;
  732. }
  733. target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
  734. {
  735. return vdev->vq[n].vring.desc;
  736. }
  737. target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
  738. {
  739. return vdev->vq[n].vring.avail;
  740. }
  741. target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
  742. {
  743. return vdev->vq[n].vring.used;
  744. }
  745. target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
  746. {
  747. return vdev->vq[n].vring.desc;
  748. }
  749. target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
  750. {
  751. return sizeof(VRingDesc) * vdev->vq[n].vring.num;
  752. }
  753. target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
  754. {
  755. return offsetof(VRingAvail, ring) +
  756. sizeof(uint64_t) * vdev->vq[n].vring.num;
  757. }
  758. target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
  759. {
  760. return offsetof(VRingUsed, ring) +
  761. sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
  762. }
  763. target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
  764. {
  765. return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
  766. virtio_queue_get_used_size(vdev, n);
  767. }
  768. uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
  769. {
  770. return vdev->vq[n].last_avail_idx;
  771. }
  772. void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
  773. {
  774. vdev->vq[n].last_avail_idx = idx;
  775. }
  776. VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
  777. {
  778. return vdev->vq + n;
  779. }
  780. EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
  781. {
  782. return &vq->guest_notifier;
  783. }
  784. EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
  785. {
  786. return &vq->host_notifier;
  787. }