virtio.c 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121
  1. /*
  2. * Virtio Support
  3. *
  4. * Copyright IBM, Corp. 2007
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include <inttypes.h>
  14. #include "trace.h"
  15. #include "qemu/error-report.h"
  16. #include "virtio.h"
  17. #include "qemu/atomic.h"
  18. #include "virtio-bus.h"
  19. /* The alignment to use between consumer and producer parts of vring.
  20. * x86 pagesize again. */
  21. #define VIRTIO_PCI_VRING_ALIGN 4096
  22. typedef struct VRingDesc
  23. {
  24. uint64_t addr;
  25. uint32_t len;
  26. uint16_t flags;
  27. uint16_t next;
  28. } VRingDesc;
  29. typedef struct VRingAvail
  30. {
  31. uint16_t flags;
  32. uint16_t idx;
  33. uint16_t ring[0];
  34. } VRingAvail;
  35. typedef struct VRingUsedElem
  36. {
  37. uint32_t id;
  38. uint32_t len;
  39. } VRingUsedElem;
  40. typedef struct VRingUsed
  41. {
  42. uint16_t flags;
  43. uint16_t idx;
  44. VRingUsedElem ring[0];
  45. } VRingUsed;
  46. typedef struct VRing
  47. {
  48. unsigned int num;
  49. hwaddr desc;
  50. hwaddr avail;
  51. hwaddr used;
  52. } VRing;
  53. struct VirtQueue
  54. {
  55. VRing vring;
  56. hwaddr pa;
  57. uint16_t last_avail_idx;
  58. /* Last used index value we have signalled on */
  59. uint16_t signalled_used;
  60. /* Last used index value we have signalled on */
  61. bool signalled_used_valid;
  62. /* Notification enabled? */
  63. bool notification;
  64. uint16_t queue_index;
  65. int inuse;
  66. uint16_t vector;
  67. void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  68. VirtIODevice *vdev;
  69. EventNotifier guest_notifier;
  70. EventNotifier host_notifier;
  71. };
  72. /* virt queue functions */
  73. static void virtqueue_init(VirtQueue *vq)
  74. {
  75. hwaddr pa = vq->pa;
  76. vq->vring.desc = pa;
  77. vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
  78. vq->vring.used = vring_align(vq->vring.avail +
  79. offsetof(VRingAvail, ring[vq->vring.num]),
  80. VIRTIO_PCI_VRING_ALIGN);
  81. }
  82. static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
  83. {
  84. hwaddr pa;
  85. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
  86. return ldq_phys(pa);
  87. }
  88. static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
  89. {
  90. hwaddr pa;
  91. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
  92. return ldl_phys(pa);
  93. }
  94. static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
  95. {
  96. hwaddr pa;
  97. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
  98. return lduw_phys(pa);
  99. }
  100. static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
  101. {
  102. hwaddr pa;
  103. pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
  104. return lduw_phys(pa);
  105. }
  106. static inline uint16_t vring_avail_flags(VirtQueue *vq)
  107. {
  108. hwaddr pa;
  109. pa = vq->vring.avail + offsetof(VRingAvail, flags);
  110. return lduw_phys(pa);
  111. }
  112. static inline uint16_t vring_avail_idx(VirtQueue *vq)
  113. {
  114. hwaddr pa;
  115. pa = vq->vring.avail + offsetof(VRingAvail, idx);
  116. return lduw_phys(pa);
  117. }
  118. static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
  119. {
  120. hwaddr pa;
  121. pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
  122. return lduw_phys(pa);
  123. }
  124. static inline uint16_t vring_used_event(VirtQueue *vq)
  125. {
  126. return vring_avail_ring(vq, vq->vring.num);
  127. }
  128. static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
  129. {
  130. hwaddr pa;
  131. pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
  132. stl_phys(pa, val);
  133. }
  134. static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
  135. {
  136. hwaddr pa;
  137. pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
  138. stl_phys(pa, val);
  139. }
  140. static uint16_t vring_used_idx(VirtQueue *vq)
  141. {
  142. hwaddr pa;
  143. pa = vq->vring.used + offsetof(VRingUsed, idx);
  144. return lduw_phys(pa);
  145. }
  146. static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
  147. {
  148. hwaddr pa;
  149. pa = vq->vring.used + offsetof(VRingUsed, idx);
  150. stw_phys(pa, val);
  151. }
  152. static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
  153. {
  154. hwaddr pa;
  155. pa = vq->vring.used + offsetof(VRingUsed, flags);
  156. stw_phys(pa, lduw_phys(pa) | mask);
  157. }
  158. static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
  159. {
  160. hwaddr pa;
  161. pa = vq->vring.used + offsetof(VRingUsed, flags);
  162. stw_phys(pa, lduw_phys(pa) & ~mask);
  163. }
  164. static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
  165. {
  166. hwaddr pa;
  167. if (!vq->notification) {
  168. return;
  169. }
  170. pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
  171. stw_phys(pa, val);
  172. }
  173. void virtio_queue_set_notification(VirtQueue *vq, int enable)
  174. {
  175. vq->notification = enable;
  176. if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  177. vring_avail_event(vq, vring_avail_idx(vq));
  178. } else if (enable) {
  179. vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
  180. } else {
  181. vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
  182. }
  183. if (enable) {
  184. /* Expose avail event/used flags before caller checks the avail idx. */
  185. smp_mb();
  186. }
  187. }
  188. int virtio_queue_ready(VirtQueue *vq)
  189. {
  190. return vq->vring.avail != 0;
  191. }
  192. int virtio_queue_empty(VirtQueue *vq)
  193. {
  194. return vring_avail_idx(vq) == vq->last_avail_idx;
  195. }
  196. void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
  197. unsigned int len, unsigned int idx)
  198. {
  199. unsigned int offset;
  200. int i;
  201. trace_virtqueue_fill(vq, elem, len, idx);
  202. offset = 0;
  203. for (i = 0; i < elem->in_num; i++) {
  204. size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
  205. cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
  206. elem->in_sg[i].iov_len,
  207. 1, size);
  208. offset += size;
  209. }
  210. for (i = 0; i < elem->out_num; i++)
  211. cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
  212. elem->out_sg[i].iov_len,
  213. 0, elem->out_sg[i].iov_len);
  214. idx = (idx + vring_used_idx(vq)) % vq->vring.num;
  215. /* Get a pointer to the next entry in the used ring. */
  216. vring_used_ring_id(vq, idx, elem->index);
  217. vring_used_ring_len(vq, idx, len);
  218. }
  219. void virtqueue_flush(VirtQueue *vq, unsigned int count)
  220. {
  221. uint16_t old, new;
  222. /* Make sure buffer is written before we update index. */
  223. smp_wmb();
  224. trace_virtqueue_flush(vq, count);
  225. old = vring_used_idx(vq);
  226. new = old + count;
  227. vring_used_idx_set(vq, new);
  228. vq->inuse -= count;
  229. if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
  230. vq->signalled_used_valid = false;
  231. }
  232. void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  233. unsigned int len)
  234. {
  235. virtqueue_fill(vq, elem, len, 0);
  236. virtqueue_flush(vq, 1);
  237. }
  238. static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
  239. {
  240. uint16_t num_heads = vring_avail_idx(vq) - idx;
  241. /* Check it isn't doing very strange things with descriptor numbers. */
  242. if (num_heads > vq->vring.num) {
  243. error_report("Guest moved used index from %u to %u",
  244. idx, vring_avail_idx(vq));
  245. exit(1);
  246. }
  247. /* On success, callers read a descriptor at vq->last_avail_idx.
  248. * Make sure descriptor read does not bypass avail index read. */
  249. if (num_heads) {
  250. smp_rmb();
  251. }
  252. return num_heads;
  253. }
  254. static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
  255. {
  256. unsigned int head;
  257. /* Grab the next descriptor number they're advertising, and increment
  258. * the index we've seen. */
  259. head = vring_avail_ring(vq, idx % vq->vring.num);
  260. /* If their number is silly, that's a fatal mistake. */
  261. if (head >= vq->vring.num) {
  262. error_report("Guest says index %u is available", head);
  263. exit(1);
  264. }
  265. return head;
  266. }
  267. static unsigned virtqueue_next_desc(hwaddr desc_pa,
  268. unsigned int i, unsigned int max)
  269. {
  270. unsigned int next;
  271. /* If this descriptor says it doesn't chain, we're done. */
  272. if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
  273. return max;
  274. /* Check they're not leading us off end of descriptors. */
  275. next = vring_desc_next(desc_pa, i);
  276. /* Make sure compiler knows to grab that: we don't want it changing! */
  277. smp_wmb();
  278. if (next >= max) {
  279. error_report("Desc next is %u", next);
  280. exit(1);
  281. }
  282. return next;
  283. }
  284. void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
  285. unsigned int *out_bytes,
  286. unsigned max_in_bytes, unsigned max_out_bytes)
  287. {
  288. unsigned int idx;
  289. unsigned int total_bufs, in_total, out_total;
  290. idx = vq->last_avail_idx;
  291. total_bufs = in_total = out_total = 0;
  292. while (virtqueue_num_heads(vq, idx)) {
  293. unsigned int max, num_bufs, indirect = 0;
  294. hwaddr desc_pa;
  295. int i;
  296. max = vq->vring.num;
  297. num_bufs = total_bufs;
  298. i = virtqueue_get_head(vq, idx++);
  299. desc_pa = vq->vring.desc;
  300. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  301. if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  302. error_report("Invalid size for indirect buffer table");
  303. exit(1);
  304. }
  305. /* If we've got too many, that implies a descriptor loop. */
  306. if (num_bufs >= max) {
  307. error_report("Looped descriptor");
  308. exit(1);
  309. }
  310. /* loop over the indirect descriptor table */
  311. indirect = 1;
  312. max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  313. num_bufs = i = 0;
  314. desc_pa = vring_desc_addr(desc_pa, i);
  315. }
  316. do {
  317. /* If we've got too many, that implies a descriptor loop. */
  318. if (++num_bufs > max) {
  319. error_report("Looped descriptor");
  320. exit(1);
  321. }
  322. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
  323. in_total += vring_desc_len(desc_pa, i);
  324. } else {
  325. out_total += vring_desc_len(desc_pa, i);
  326. }
  327. if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
  328. goto done;
  329. }
  330. } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
  331. if (!indirect)
  332. total_bufs = num_bufs;
  333. else
  334. total_bufs++;
  335. }
  336. done:
  337. if (in_bytes) {
  338. *in_bytes = in_total;
  339. }
  340. if (out_bytes) {
  341. *out_bytes = out_total;
  342. }
  343. }
  344. int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
  345. unsigned int out_bytes)
  346. {
  347. unsigned int in_total, out_total;
  348. virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
  349. return in_bytes <= in_total && out_bytes <= out_total;
  350. }
  351. void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
  352. size_t num_sg, int is_write)
  353. {
  354. unsigned int i;
  355. hwaddr len;
  356. for (i = 0; i < num_sg; i++) {
  357. len = sg[i].iov_len;
  358. sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
  359. if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
  360. error_report("virtio: trying to map MMIO memory");
  361. exit(1);
  362. }
  363. }
  364. }
  365. int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
  366. {
  367. unsigned int i, head, max;
  368. hwaddr desc_pa = vq->vring.desc;
  369. if (!virtqueue_num_heads(vq, vq->last_avail_idx))
  370. return 0;
  371. /* When we start there are none of either input nor output. */
  372. elem->out_num = elem->in_num = 0;
  373. max = vq->vring.num;
  374. i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
  375. if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
  376. vring_avail_event(vq, vring_avail_idx(vq));
  377. }
  378. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
  379. if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
  380. error_report("Invalid size for indirect buffer table");
  381. exit(1);
  382. }
  383. /* loop over the indirect descriptor table */
  384. max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
  385. desc_pa = vring_desc_addr(desc_pa, i);
  386. i = 0;
  387. }
  388. /* Collect all the descriptors */
  389. do {
  390. struct iovec *sg;
  391. if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
  392. if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
  393. error_report("Too many write descriptors in indirect table");
  394. exit(1);
  395. }
  396. elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
  397. sg = &elem->in_sg[elem->in_num++];
  398. } else {
  399. if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
  400. error_report("Too many read descriptors in indirect table");
  401. exit(1);
  402. }
  403. elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
  404. sg = &elem->out_sg[elem->out_num++];
  405. }
  406. sg->iov_len = vring_desc_len(desc_pa, i);
  407. /* If we've got too many, that implies a descriptor loop. */
  408. if ((elem->in_num + elem->out_num) > max) {
  409. error_report("Looped descriptor");
  410. exit(1);
  411. }
  412. } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
  413. /* Now map what we have collected */
  414. virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
  415. virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
  416. elem->index = head;
  417. vq->inuse++;
  418. trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
  419. return elem->in_num + elem->out_num;
  420. }
  421. /* virtio device */
  422. static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
  423. {
  424. if (vdev->binding->notify) {
  425. vdev->binding->notify(vdev->binding_opaque, vector);
  426. }
  427. }
  428. void virtio_update_irq(VirtIODevice *vdev)
  429. {
  430. virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
  431. }
  432. void virtio_set_status(VirtIODevice *vdev, uint8_t val)
  433. {
  434. trace_virtio_set_status(vdev, val);
  435. if (vdev->set_status) {
  436. vdev->set_status(vdev, val);
  437. }
  438. vdev->status = val;
  439. }
  440. void virtio_reset(void *opaque)
  441. {
  442. VirtIODevice *vdev = opaque;
  443. int i;
  444. virtio_set_status(vdev, 0);
  445. if (vdev->reset)
  446. vdev->reset(vdev);
  447. vdev->guest_features = 0;
  448. vdev->queue_sel = 0;
  449. vdev->status = 0;
  450. vdev->isr = 0;
  451. vdev->config_vector = VIRTIO_NO_VECTOR;
  452. virtio_notify_vector(vdev, vdev->config_vector);
  453. for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  454. vdev->vq[i].vring.desc = 0;
  455. vdev->vq[i].vring.avail = 0;
  456. vdev->vq[i].vring.used = 0;
  457. vdev->vq[i].last_avail_idx = 0;
  458. vdev->vq[i].pa = 0;
  459. vdev->vq[i].vector = VIRTIO_NO_VECTOR;
  460. vdev->vq[i].signalled_used = 0;
  461. vdev->vq[i].signalled_used_valid = false;
  462. vdev->vq[i].notification = true;
  463. }
  464. }
  465. uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
  466. {
  467. uint8_t val;
  468. vdev->get_config(vdev, vdev->config);
  469. if (addr > (vdev->config_len - sizeof(val)))
  470. return (uint32_t)-1;
  471. val = ldub_p(vdev->config + addr);
  472. return val;
  473. }
  474. uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
  475. {
  476. uint16_t val;
  477. vdev->get_config(vdev, vdev->config);
  478. if (addr > (vdev->config_len - sizeof(val)))
  479. return (uint32_t)-1;
  480. val = lduw_p(vdev->config + addr);
  481. return val;
  482. }
  483. uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
  484. {
  485. uint32_t val;
  486. vdev->get_config(vdev, vdev->config);
  487. if (addr > (vdev->config_len - sizeof(val)))
  488. return (uint32_t)-1;
  489. val = ldl_p(vdev->config + addr);
  490. return val;
  491. }
  492. void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  493. {
  494. uint8_t val = data;
  495. if (addr > (vdev->config_len - sizeof(val)))
  496. return;
  497. stb_p(vdev->config + addr, val);
  498. if (vdev->set_config)
  499. vdev->set_config(vdev, vdev->config);
  500. }
  501. void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  502. {
  503. uint16_t val = data;
  504. if (addr > (vdev->config_len - sizeof(val)))
  505. return;
  506. stw_p(vdev->config + addr, val);
  507. if (vdev->set_config)
  508. vdev->set_config(vdev, vdev->config);
  509. }
  510. void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
  511. {
  512. uint32_t val = data;
  513. if (addr > (vdev->config_len - sizeof(val)))
  514. return;
  515. stl_p(vdev->config + addr, val);
  516. if (vdev->set_config)
  517. vdev->set_config(vdev, vdev->config);
  518. }
  519. void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
  520. {
  521. vdev->vq[n].pa = addr;
  522. virtqueue_init(&vdev->vq[n]);
  523. }
  524. hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
  525. {
  526. return vdev->vq[n].pa;
  527. }
  528. int virtio_queue_get_num(VirtIODevice *vdev, int n)
  529. {
  530. return vdev->vq[n].vring.num;
  531. }
  532. int virtio_queue_get_id(VirtQueue *vq)
  533. {
  534. VirtIODevice *vdev = vq->vdev;
  535. assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
  536. return vq - &vdev->vq[0];
  537. }
  538. void virtio_queue_notify_vq(VirtQueue *vq)
  539. {
  540. if (vq->vring.desc) {
  541. VirtIODevice *vdev = vq->vdev;
  542. trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
  543. vq->handle_output(vdev, vq);
  544. }
  545. }
  546. void virtio_queue_notify(VirtIODevice *vdev, int n)
  547. {
  548. virtio_queue_notify_vq(&vdev->vq[n]);
  549. }
  550. uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
  551. {
  552. return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
  553. VIRTIO_NO_VECTOR;
  554. }
  555. void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
  556. {
  557. if (n < VIRTIO_PCI_QUEUE_MAX)
  558. vdev->vq[n].vector = vector;
  559. }
  560. VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
  561. void (*handle_output)(VirtIODevice *, VirtQueue *))
  562. {
  563. int i;
  564. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  565. if (vdev->vq[i].vring.num == 0)
  566. break;
  567. }
  568. if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
  569. abort();
  570. vdev->vq[i].vring.num = queue_size;
  571. vdev->vq[i].handle_output = handle_output;
  572. return &vdev->vq[i];
  573. }
  574. void virtio_del_queue(VirtIODevice *vdev, int n)
  575. {
  576. if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
  577. abort();
  578. }
  579. vdev->vq[n].vring.num = 0;
  580. }
  581. void virtio_irq(VirtQueue *vq)
  582. {
  583. trace_virtio_irq(vq);
  584. vq->vdev->isr |= 0x01;
  585. virtio_notify_vector(vq->vdev, vq->vector);
  586. }
  587. /* Assuming a given event_idx value from the other size, if
  588. * we have just incremented index from old to new_idx,
  589. * should we trigger an event? */
  590. static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
  591. {
  592. /* Note: Xen has similar logic for notification hold-off
  593. * in include/xen/interface/io/ring.h with req_event and req_prod
  594. * corresponding to event_idx + 1 and new respectively.
  595. * Note also that req_event and req_prod in Xen start at 1,
  596. * event indexes in virtio start at 0. */
  597. return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
  598. }
  599. static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
  600. {
  601. uint16_t old, new;
  602. bool v;
  603. /* We need to expose used array entries before checking used event. */
  604. smp_mb();
  605. /* Always notify when queue is empty (when feature acknowledge) */
  606. if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
  607. !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
  608. return true;
  609. }
  610. if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
  611. return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
  612. }
  613. v = vq->signalled_used_valid;
  614. vq->signalled_used_valid = true;
  615. old = vq->signalled_used;
  616. new = vq->signalled_used = vring_used_idx(vq);
  617. return !v || vring_need_event(vring_used_event(vq), new, old);
  618. }
  619. void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
  620. {
  621. if (!vring_notify(vdev, vq)) {
  622. return;
  623. }
  624. trace_virtio_notify(vdev, vq);
  625. vdev->isr |= 0x01;
  626. virtio_notify_vector(vdev, vq->vector);
  627. }
  628. void virtio_notify_config(VirtIODevice *vdev)
  629. {
  630. if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  631. return;
  632. vdev->isr |= 0x03;
  633. virtio_notify_vector(vdev, vdev->config_vector);
  634. }
  635. void virtio_save(VirtIODevice *vdev, QEMUFile *f)
  636. {
  637. int i;
  638. if (vdev->binding->save_config)
  639. vdev->binding->save_config(vdev->binding_opaque, f);
  640. qemu_put_8s(f, &vdev->status);
  641. qemu_put_8s(f, &vdev->isr);
  642. qemu_put_be16s(f, &vdev->queue_sel);
  643. qemu_put_be32s(f, &vdev->guest_features);
  644. qemu_put_be32(f, vdev->config_len);
  645. qemu_put_buffer(f, vdev->config, vdev->config_len);
  646. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  647. if (vdev->vq[i].vring.num == 0)
  648. break;
  649. }
  650. qemu_put_be32(f, i);
  651. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  652. if (vdev->vq[i].vring.num == 0)
  653. break;
  654. qemu_put_be32(f, vdev->vq[i].vring.num);
  655. qemu_put_be64(f, vdev->vq[i].pa);
  656. qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
  657. if (vdev->binding->save_queue)
  658. vdev->binding->save_queue(vdev->binding_opaque, i, f);
  659. }
  660. }
  661. int virtio_set_features(VirtIODevice *vdev, uint32_t val)
  662. {
  663. uint32_t supported_features =
  664. vdev->binding->get_features(vdev->binding_opaque);
  665. bool bad = (val & ~supported_features) != 0;
  666. val &= supported_features;
  667. if (vdev->set_features) {
  668. vdev->set_features(vdev, val);
  669. }
  670. vdev->guest_features = val;
  671. return bad ? -1 : 0;
  672. }
  673. int virtio_load(VirtIODevice *vdev, QEMUFile *f)
  674. {
  675. int num, i, ret;
  676. uint32_t features;
  677. uint32_t supported_features;
  678. if (vdev->binding->load_config) {
  679. ret = vdev->binding->load_config(vdev->binding_opaque, f);
  680. if (ret)
  681. return ret;
  682. }
  683. qemu_get_8s(f, &vdev->status);
  684. qemu_get_8s(f, &vdev->isr);
  685. qemu_get_be16s(f, &vdev->queue_sel);
  686. qemu_get_be32s(f, &features);
  687. if (virtio_set_features(vdev, features) < 0) {
  688. supported_features = vdev->binding->get_features(vdev->binding_opaque);
  689. error_report("Features 0x%x unsupported. Allowed features: 0x%x",
  690. features, supported_features);
  691. return -1;
  692. }
  693. vdev->config_len = qemu_get_be32(f);
  694. qemu_get_buffer(f, vdev->config, vdev->config_len);
  695. num = qemu_get_be32(f);
  696. for (i = 0; i < num; i++) {
  697. vdev->vq[i].vring.num = qemu_get_be32(f);
  698. vdev->vq[i].pa = qemu_get_be64(f);
  699. qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
  700. vdev->vq[i].signalled_used_valid = false;
  701. vdev->vq[i].notification = true;
  702. if (vdev->vq[i].pa) {
  703. uint16_t nheads;
  704. virtqueue_init(&vdev->vq[i]);
  705. nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
  706. /* Check it isn't doing very strange things with descriptor numbers. */
  707. if (nheads > vdev->vq[i].vring.num) {
  708. error_report("VQ %d size 0x%x Guest index 0x%x "
  709. "inconsistent with Host index 0x%x: delta 0x%x",
  710. i, vdev->vq[i].vring.num,
  711. vring_avail_idx(&vdev->vq[i]),
  712. vdev->vq[i].last_avail_idx, nheads);
  713. return -1;
  714. }
  715. } else if (vdev->vq[i].last_avail_idx) {
  716. error_report("VQ %d address 0x0 "
  717. "inconsistent with Host index 0x%x",
  718. i, vdev->vq[i].last_avail_idx);
  719. return -1;
  720. }
  721. if (vdev->binding->load_queue) {
  722. ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
  723. if (ret)
  724. return ret;
  725. }
  726. }
  727. virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
  728. return 0;
  729. }
  730. void virtio_common_cleanup(VirtIODevice *vdev)
  731. {
  732. qemu_del_vm_change_state_handler(vdev->vmstate);
  733. g_free(vdev->config);
  734. g_free(vdev->vq);
  735. }
  736. void virtio_cleanup(VirtIODevice *vdev)
  737. {
  738. virtio_common_cleanup(vdev);
  739. g_free(vdev);
  740. }
  741. static void virtio_vmstate_change(void *opaque, int running, RunState state)
  742. {
  743. VirtIODevice *vdev = opaque;
  744. bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
  745. vdev->vm_running = running;
  746. if (backend_run) {
  747. virtio_set_status(vdev, vdev->status);
  748. }
  749. if (vdev->binding->vmstate_change) {
  750. vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
  751. }
  752. if (!backend_run) {
  753. virtio_set_status(vdev, vdev->status);
  754. }
  755. }
  756. void virtio_init(VirtIODevice *vdev, const char *name,
  757. uint16_t device_id, size_t config_size)
  758. {
  759. int i;
  760. vdev->device_id = device_id;
  761. vdev->status = 0;
  762. vdev->isr = 0;
  763. vdev->queue_sel = 0;
  764. vdev->config_vector = VIRTIO_NO_VECTOR;
  765. vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
  766. vdev->vm_running = runstate_is_running();
  767. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  768. vdev->vq[i].vector = VIRTIO_NO_VECTOR;
  769. vdev->vq[i].vdev = vdev;
  770. vdev->vq[i].queue_index = i;
  771. }
  772. vdev->name = name;
  773. vdev->config_len = config_size;
  774. if (vdev->config_len) {
  775. vdev->config = g_malloc0(config_size);
  776. } else {
  777. vdev->config = NULL;
  778. }
  779. vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
  780. vdev);
  781. }
  782. VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
  783. size_t config_size, size_t struct_size)
  784. {
  785. VirtIODevice *vdev;
  786. vdev = g_malloc0(struct_size);
  787. virtio_init(vdev, name, device_id, config_size);
  788. return vdev;
  789. }
  790. void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
  791. DeviceState *opaque)
  792. {
  793. vdev->binding = binding;
  794. vdev->binding_opaque = opaque;
  795. }
  796. hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
  797. {
  798. return vdev->vq[n].vring.desc;
  799. }
  800. hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
  801. {
  802. return vdev->vq[n].vring.avail;
  803. }
  804. hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
  805. {
  806. return vdev->vq[n].vring.used;
  807. }
  808. hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
  809. {
  810. return vdev->vq[n].vring.desc;
  811. }
  812. hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
  813. {
  814. return sizeof(VRingDesc) * vdev->vq[n].vring.num;
  815. }
  816. hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
  817. {
  818. return offsetof(VRingAvail, ring) +
  819. sizeof(uint64_t) * vdev->vq[n].vring.num;
  820. }
  821. hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
  822. {
  823. return offsetof(VRingUsed, ring) +
  824. sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
  825. }
  826. hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
  827. {
  828. return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
  829. virtio_queue_get_used_size(vdev, n);
  830. }
  831. uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
  832. {
  833. return vdev->vq[n].last_avail_idx;
  834. }
  835. void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
  836. {
  837. vdev->vq[n].last_avail_idx = idx;
  838. }
  839. VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
  840. {
  841. return vdev->vq + n;
  842. }
  843. uint16_t virtio_get_queue_index(VirtQueue *vq)
  844. {
  845. return vq->queue_index;
  846. }
  847. static void virtio_queue_guest_notifier_read(EventNotifier *n)
  848. {
  849. VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
  850. if (event_notifier_test_and_clear(n)) {
  851. virtio_irq(vq);
  852. }
  853. }
  854. void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
  855. bool with_irqfd)
  856. {
  857. if (assign && !with_irqfd) {
  858. event_notifier_set_handler(&vq->guest_notifier,
  859. virtio_queue_guest_notifier_read);
  860. } else {
  861. event_notifier_set_handler(&vq->guest_notifier, NULL);
  862. }
  863. if (!assign) {
  864. /* Test and clear notifier before closing it,
  865. * in case poll callback didn't have time to run. */
  866. virtio_queue_guest_notifier_read(&vq->guest_notifier);
  867. }
  868. }
  869. EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
  870. {
  871. return &vq->guest_notifier;
  872. }
  873. static void virtio_queue_host_notifier_read(EventNotifier *n)
  874. {
  875. VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
  876. if (event_notifier_test_and_clear(n)) {
  877. virtio_queue_notify_vq(vq);
  878. }
  879. }
  880. void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
  881. bool set_handler)
  882. {
  883. if (assign && set_handler) {
  884. event_notifier_set_handler(&vq->host_notifier,
  885. virtio_queue_host_notifier_read);
  886. } else {
  887. event_notifier_set_handler(&vq->host_notifier, NULL);
  888. }
  889. if (!assign) {
  890. /* Test and clear notifier before after disabling event,
  891. * in case poll callback didn't have time to run. */
  892. virtio_queue_host_notifier_read(&vq->host_notifier);
  893. }
  894. }
  895. EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
  896. {
  897. return &vq->host_notifier;
  898. }
  899. static int virtio_device_init(DeviceState *qdev)
  900. {
  901. VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
  902. VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
  903. assert(k->init != NULL);
  904. if (k->init(vdev) < 0) {
  905. return -1;
  906. }
  907. virtio_bus_plug_device(vdev);
  908. return 0;
  909. }
  910. static void virtio_device_class_init(ObjectClass *klass, void *data)
  911. {
  912. /* Set the default value here. */
  913. DeviceClass *dc = DEVICE_CLASS(klass);
  914. dc->init = virtio_device_init;
  915. dc->bus_type = TYPE_VIRTIO_BUS;
  916. }
  917. static const TypeInfo virtio_device_info = {
  918. .name = TYPE_VIRTIO_DEVICE,
  919. .parent = TYPE_DEVICE,
  920. .instance_size = sizeof(VirtIODevice),
  921. .class_init = virtio_device_class_init,
  922. .abstract = true,
  923. .class_size = sizeof(VirtioDeviceClass),
  924. };
  925. static void virtio_register_types(void)
  926. {
  927. type_register_static(&virtio_device_info);
  928. }
  929. type_init(virtio_register_types)