virtio.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. /*
  2. * Virtio Support
  3. *
  4. * Copyright IBM, Corp. 2007
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include <inttypes.h>
  14. #include "virtio.h"
  15. #include "sysemu.h"
  16. //#define VIRTIO_ZERO_COPY
  17. /* from Linux's linux/virtio_pci.h */
  18. /* A 32-bit r/o bitmask of the features supported by the host */
  19. #define VIRTIO_PCI_HOST_FEATURES 0
  20. /* A 32-bit r/w bitmask of features activated by the guest */
  21. #define VIRTIO_PCI_GUEST_FEATURES 4
  22. /* A 32-bit r/w PFN for the currently selected queue */
  23. #define VIRTIO_PCI_QUEUE_PFN 8
  24. /* A 16-bit r/o queue size for the currently selected queue */
  25. #define VIRTIO_PCI_QUEUE_NUM 12
  26. /* A 16-bit r/w queue selector */
  27. #define VIRTIO_PCI_QUEUE_SEL 14
  28. /* A 16-bit r/w queue notifier */
  29. #define VIRTIO_PCI_QUEUE_NOTIFY 16
  30. /* An 8-bit device status register. */
  31. #define VIRTIO_PCI_STATUS 18
  32. /* An 8-bit r/o interrupt status register. Reading the value will return the
  33. * current contents of the ISR and will also clear it. This is effectively
  34. * a read-and-acknowledge. */
  35. #define VIRTIO_PCI_ISR 19
  36. #define VIRTIO_PCI_CONFIG 20
  37. /* Virtio ABI version, if we increment this, we break the guest driver. */
  38. #define VIRTIO_PCI_ABI_VERSION 0
  39. /* How many bits to shift physical queue address written to QUEUE_PFN.
  40. * 12 is historical, and due to x86 page size. */
  41. #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
  42. /* The alignment to use between consumer and producer parts of vring.
  43. * x86 pagesize again. */
  44. #define VIRTIO_PCI_VRING_ALIGN 4096
  45. /* QEMU doesn't strictly need write barriers since everything runs in
  46. * lock-step. We'll leave the calls to wmb() in though to make it obvious for
  47. * KVM or if kqemu gets SMP support.
  48. */
  49. #define wmb() do { } while (0)
  50. typedef struct VRingDesc
  51. {
  52. uint64_t addr;
  53. uint32_t len;
  54. uint16_t flags;
  55. uint16_t next;
  56. } VRingDesc;
  57. typedef struct VRingAvail
  58. {
  59. uint16_t flags;
  60. uint16_t idx;
  61. uint16_t ring[0];
  62. } VRingAvail;
  63. typedef struct VRingUsedElem
  64. {
  65. uint32_t id;
  66. uint32_t len;
  67. } VRingUsedElem;
  68. typedef struct VRingUsed
  69. {
  70. uint16_t flags;
  71. uint16_t idx;
  72. VRingUsedElem ring[0];
  73. } VRingUsed;
  74. typedef struct VRing
  75. {
  76. unsigned int num;
  77. target_phys_addr_t desc;
  78. target_phys_addr_t avail;
  79. target_phys_addr_t used;
  80. } VRing;
  81. struct VirtQueue
  82. {
  83. VRing vring;
  84. uint32_t pfn;
  85. uint16_t last_avail_idx;
  86. int inuse;
  87. void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
  88. };
  89. #define VIRTIO_PCI_QUEUE_MAX 16
  90. /* virt queue functions */
  91. #ifdef VIRTIO_ZERO_COPY
  92. static void *virtio_map_gpa(target_phys_addr_t addr, size_t size)
  93. {
  94. ram_addr_t off;
  95. target_phys_addr_t addr1;
  96. off = cpu_get_physical_page_desc(addr);
  97. if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
  98. fprintf(stderr, "virtio DMA to IO ram\n");
  99. exit(1);
  100. }
  101. off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK);
  102. for (addr1 = addr + TARGET_PAGE_SIZE;
  103. addr1 < TARGET_PAGE_ALIGN(addr + size);
  104. addr1 += TARGET_PAGE_SIZE) {
  105. ram_addr_t off1;
  106. off1 = cpu_get_physical_page_desc(addr1);
  107. if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
  108. fprintf(stderr, "virtio DMA to IO ram\n");
  109. exit(1);
  110. }
  111. off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK);
  112. if (off1 != (off + (addr1 - addr))) {
  113. fprintf(stderr, "discontigous virtio memory\n");
  114. exit(1);
  115. }
  116. }
  117. return phys_ram_base + off;
  118. }
  119. #endif
  120. static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa)
  121. {
  122. vq->vring.desc = pa;
  123. vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
  124. vq->vring.used = vring_align(vq->vring.avail +
  125. offsetof(VRingAvail, ring[vq->vring.num]),
  126. VIRTIO_PCI_VRING_ALIGN);
  127. }
  128. static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
  129. {
  130. target_phys_addr_t pa;
  131. pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
  132. return ldq_phys(pa);
  133. }
  134. static inline uint32_t vring_desc_len(VirtQueue *vq, int i)
  135. {
  136. target_phys_addr_t pa;
  137. pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
  138. return ldl_phys(pa);
  139. }
  140. static inline uint16_t vring_desc_flags(VirtQueue *vq, int i)
  141. {
  142. target_phys_addr_t pa;
  143. pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
  144. return lduw_phys(pa);
  145. }
  146. static inline uint16_t vring_desc_next(VirtQueue *vq, int i)
  147. {
  148. target_phys_addr_t pa;
  149. pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
  150. return lduw_phys(pa);
  151. }
  152. static inline uint16_t vring_avail_flags(VirtQueue *vq)
  153. {
  154. target_phys_addr_t pa;
  155. pa = vq->vring.avail + offsetof(VRingAvail, flags);
  156. return lduw_phys(pa);
  157. }
  158. static inline uint16_t vring_avail_idx(VirtQueue *vq)
  159. {
  160. target_phys_addr_t pa;
  161. pa = vq->vring.avail + offsetof(VRingAvail, idx);
  162. return lduw_phys(pa);
  163. }
  164. static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
  165. {
  166. target_phys_addr_t pa;
  167. pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
  168. return lduw_phys(pa);
  169. }
  170. static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
  171. {
  172. target_phys_addr_t pa;
  173. pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
  174. stl_phys(pa, val);
  175. }
  176. static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
  177. {
  178. target_phys_addr_t pa;
  179. pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
  180. stl_phys(pa, val);
  181. }
  182. static uint16_t vring_used_idx(VirtQueue *vq)
  183. {
  184. target_phys_addr_t pa;
  185. pa = vq->vring.used + offsetof(VRingUsed, idx);
  186. return lduw_phys(pa);
  187. }
  188. static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
  189. {
  190. target_phys_addr_t pa;
  191. pa = vq->vring.used + offsetof(VRingUsed, idx);
  192. stw_phys(pa, vring_used_idx(vq) + val);
  193. }
  194. static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
  195. {
  196. target_phys_addr_t pa;
  197. pa = vq->vring.used + offsetof(VRingUsed, flags);
  198. stw_phys(pa, lduw_phys(pa) | mask);
  199. }
  200. static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
  201. {
  202. target_phys_addr_t pa;
  203. pa = vq->vring.used + offsetof(VRingUsed, flags);
  204. stw_phys(pa, lduw_phys(pa) & ~mask);
  205. }
  206. void virtio_queue_set_notification(VirtQueue *vq, int enable)
  207. {
  208. if (enable)
  209. vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
  210. else
  211. vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
  212. }
  213. int virtio_queue_ready(VirtQueue *vq)
  214. {
  215. return vq->vring.avail != 0;
  216. }
  217. int virtio_queue_empty(VirtQueue *vq)
  218. {
  219. return vring_avail_idx(vq) == vq->last_avail_idx;
  220. }
  221. void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
  222. unsigned int len, unsigned int idx)
  223. {
  224. unsigned int offset;
  225. int i;
  226. #ifndef VIRTIO_ZERO_COPY
  227. for (i = 0; i < elem->out_num; i++)
  228. qemu_free(elem->out_sg[i].iov_base);
  229. #endif
  230. offset = 0;
  231. for (i = 0; i < elem->in_num; i++) {
  232. size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
  233. #ifdef VIRTIO_ZERO_COPY
  234. if (size) {
  235. ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base;
  236. ram_addr_t off;
  237. for (off = 0; off < size; off += TARGET_PAGE_SIZE)
  238. cpu_physical_memory_set_dirty(addr + off);
  239. }
  240. #else
  241. if (size)
  242. cpu_physical_memory_write(elem->in_addr[i],
  243. elem->in_sg[i].iov_base,
  244. size);
  245. qemu_free(elem->in_sg[i].iov_base);
  246. #endif
  247. offset += size;
  248. }
  249. idx = (idx + vring_used_idx(vq)) % vq->vring.num;
  250. /* Get a pointer to the next entry in the used ring. */
  251. vring_used_ring_id(vq, idx, elem->index);
  252. vring_used_ring_len(vq, idx, len);
  253. }
  254. void virtqueue_flush(VirtQueue *vq, unsigned int count)
  255. {
  256. /* Make sure buffer is written before we update index. */
  257. wmb();
  258. vring_used_idx_increment(vq, count);
  259. vq->inuse -= count;
  260. }
  261. void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
  262. unsigned int len)
  263. {
  264. virtqueue_fill(vq, elem, len, 0);
  265. virtqueue_flush(vq, 1);
  266. }
  267. static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
  268. {
  269. uint16_t num_heads = vring_avail_idx(vq) - idx;
  270. /* Check it isn't doing very strange things with descriptor numbers. */
  271. if (num_heads > vq->vring.num) {
  272. fprintf(stderr, "Guest moved used index from %u to %u",
  273. idx, vring_avail_idx(vq));
  274. exit(1);
  275. }
  276. return num_heads;
  277. }
  278. static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
  279. {
  280. unsigned int head;
  281. /* Grab the next descriptor number they're advertising, and increment
  282. * the index we've seen. */
  283. head = vring_avail_ring(vq, idx % vq->vring.num);
  284. /* If their number is silly, that's a fatal mistake. */
  285. if (head >= vq->vring.num) {
  286. fprintf(stderr, "Guest says index %u is available", head);
  287. exit(1);
  288. }
  289. return head;
  290. }
  291. static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
  292. {
  293. unsigned int next;
  294. /* If this descriptor says it doesn't chain, we're done. */
  295. if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
  296. return vq->vring.num;
  297. /* Check they're not leading us off end of descriptors. */
  298. next = vring_desc_next(vq, i);
  299. /* Make sure compiler knows to grab that: we don't want it changing! */
  300. wmb();
  301. if (next >= vq->vring.num) {
  302. fprintf(stderr, "Desc next is %u", next);
  303. exit(1);
  304. }
  305. return next;
  306. }
  307. int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
  308. {
  309. unsigned int idx;
  310. int num_bufs, in_total, out_total;
  311. idx = vq->last_avail_idx;
  312. num_bufs = in_total = out_total = 0;
  313. while (virtqueue_num_heads(vq, idx)) {
  314. int i;
  315. i = virtqueue_get_head(vq, idx++);
  316. do {
  317. /* If we've got too many, that implies a descriptor loop. */
  318. if (++num_bufs > vq->vring.num) {
  319. fprintf(stderr, "Looped descriptor");
  320. exit(1);
  321. }
  322. if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
  323. if (in_bytes > 0 &&
  324. (in_total += vring_desc_len(vq, i)) >= in_bytes)
  325. return 1;
  326. } else {
  327. if (out_bytes > 0 &&
  328. (out_total += vring_desc_len(vq, i)) >= out_bytes)
  329. return 1;
  330. }
  331. } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
  332. }
  333. return 0;
  334. }
  335. int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
  336. {
  337. unsigned int i, head;
  338. if (!virtqueue_num_heads(vq, vq->last_avail_idx))
  339. return 0;
  340. /* When we start there are none of either input nor output. */
  341. elem->out_num = elem->in_num = 0;
  342. i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
  343. do {
  344. struct iovec *sg;
  345. if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
  346. elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
  347. sg = &elem->in_sg[elem->in_num++];
  348. } else
  349. sg = &elem->out_sg[elem->out_num++];
  350. /* Grab the first descriptor, and check it's OK. */
  351. sg->iov_len = vring_desc_len(vq, i);
  352. #ifdef VIRTIO_ZERO_COPY
  353. sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len);
  354. #else
  355. /* cap individual scatter element size to prevent unbounded allocations
  356. of memory from the guest. Practically speaking, no virtio driver
  357. will ever pass more than a page in each element. We set the cap to
  358. be 2MB in case for some reason a large page makes it way into the
  359. sg list. When we implement a zero copy API, this limitation will
  360. disappear */
  361. if (sg->iov_len > (2 << 20))
  362. sg->iov_len = 2 << 20;
  363. sg->iov_base = qemu_malloc(sg->iov_len);
  364. if (!(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) {
  365. cpu_physical_memory_read(vring_desc_addr(vq, i),
  366. sg->iov_base,
  367. sg->iov_len);
  368. }
  369. #endif
  370. if (sg->iov_base == NULL) {
  371. fprintf(stderr, "Invalid mapping\n");
  372. exit(1);
  373. }
  374. /* If we've got too many, that implies a descriptor loop. */
  375. if ((elem->in_num + elem->out_num) > vq->vring.num) {
  376. fprintf(stderr, "Looped descriptor");
  377. exit(1);
  378. }
  379. } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
  380. elem->index = head;
  381. vq->inuse++;
  382. return elem->in_num + elem->out_num;
  383. }
  384. /* virtio device */
  385. static VirtIODevice *to_virtio_device(PCIDevice *pci_dev)
  386. {
  387. return (VirtIODevice *)pci_dev;
  388. }
  389. static void virtio_update_irq(VirtIODevice *vdev)
  390. {
  391. qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1);
  392. }
  393. static void virtio_reset(void *opaque)
  394. {
  395. VirtIODevice *vdev = opaque;
  396. int i;
  397. if (vdev->reset)
  398. vdev->reset(vdev);
  399. vdev->features = 0;
  400. vdev->queue_sel = 0;
  401. vdev->status = 0;
  402. vdev->isr = 0;
  403. virtio_update_irq(vdev);
  404. for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  405. vdev->vq[i].vring.desc = 0;
  406. vdev->vq[i].vring.avail = 0;
  407. vdev->vq[i].vring.used = 0;
  408. vdev->vq[i].last_avail_idx = 0;
  409. vdev->vq[i].pfn = 0;
  410. }
  411. }
  412. static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
  413. {
  414. VirtIODevice *vdev = to_virtio_device(opaque);
  415. ram_addr_t pa;
  416. addr -= vdev->addr;
  417. switch (addr) {
  418. case VIRTIO_PCI_GUEST_FEATURES:
  419. /* Guest does not negotiate properly? We have to assume nothing. */
  420. if (val & (1 << VIRTIO_F_BAD_FEATURE)) {
  421. if (vdev->bad_features)
  422. val = vdev->bad_features(vdev);
  423. else
  424. val = 0;
  425. }
  426. if (vdev->set_features)
  427. vdev->set_features(vdev, val);
  428. vdev->features = val;
  429. break;
  430. case VIRTIO_PCI_QUEUE_PFN:
  431. pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
  432. vdev->vq[vdev->queue_sel].pfn = val;
  433. if (pa == 0) {
  434. virtio_reset(vdev);
  435. } else {
  436. virtqueue_init(&vdev->vq[vdev->queue_sel], pa);
  437. }
  438. break;
  439. case VIRTIO_PCI_QUEUE_SEL:
  440. if (val < VIRTIO_PCI_QUEUE_MAX)
  441. vdev->queue_sel = val;
  442. break;
  443. case VIRTIO_PCI_QUEUE_NOTIFY:
  444. if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc)
  445. vdev->vq[val].handle_output(vdev, &vdev->vq[val]);
  446. break;
  447. case VIRTIO_PCI_STATUS:
  448. vdev->status = val & 0xFF;
  449. if (vdev->status == 0)
  450. virtio_reset(vdev);
  451. break;
  452. }
  453. }
  454. static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
  455. {
  456. VirtIODevice *vdev = to_virtio_device(opaque);
  457. uint32_t ret = 0xFFFFFFFF;
  458. addr -= vdev->addr;
  459. switch (addr) {
  460. case VIRTIO_PCI_HOST_FEATURES:
  461. ret = vdev->get_features(vdev);
  462. ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | (1 << VIRTIO_F_BAD_FEATURE);
  463. break;
  464. case VIRTIO_PCI_GUEST_FEATURES:
  465. ret = vdev->features;
  466. break;
  467. case VIRTIO_PCI_QUEUE_PFN:
  468. ret = vdev->vq[vdev->queue_sel].pfn;
  469. break;
  470. case VIRTIO_PCI_QUEUE_NUM:
  471. ret = vdev->vq[vdev->queue_sel].vring.num;
  472. break;
  473. case VIRTIO_PCI_QUEUE_SEL:
  474. ret = vdev->queue_sel;
  475. break;
  476. case VIRTIO_PCI_STATUS:
  477. ret = vdev->status;
  478. break;
  479. case VIRTIO_PCI_ISR:
  480. /* reading from the ISR also clears it. */
  481. ret = vdev->isr;
  482. vdev->isr = 0;
  483. virtio_update_irq(vdev);
  484. break;
  485. default:
  486. break;
  487. }
  488. return ret;
  489. }
  490. static uint32_t virtio_config_readb(void *opaque, uint32_t addr)
  491. {
  492. VirtIODevice *vdev = opaque;
  493. uint8_t val;
  494. vdev->get_config(vdev, vdev->config);
  495. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  496. if (addr > (vdev->config_len - sizeof(val)))
  497. return (uint32_t)-1;
  498. memcpy(&val, vdev->config + addr, sizeof(val));
  499. return val;
  500. }
  501. static uint32_t virtio_config_readw(void *opaque, uint32_t addr)
  502. {
  503. VirtIODevice *vdev = opaque;
  504. uint16_t val;
  505. vdev->get_config(vdev, vdev->config);
  506. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  507. if (addr > (vdev->config_len - sizeof(val)))
  508. return (uint32_t)-1;
  509. memcpy(&val, vdev->config + addr, sizeof(val));
  510. return val;
  511. }
  512. static uint32_t virtio_config_readl(void *opaque, uint32_t addr)
  513. {
  514. VirtIODevice *vdev = opaque;
  515. uint32_t val;
  516. vdev->get_config(vdev, vdev->config);
  517. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  518. if (addr > (vdev->config_len - sizeof(val)))
  519. return (uint32_t)-1;
  520. memcpy(&val, vdev->config + addr, sizeof(val));
  521. return val;
  522. }
  523. static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data)
  524. {
  525. VirtIODevice *vdev = opaque;
  526. uint8_t val = data;
  527. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  528. if (addr > (vdev->config_len - sizeof(val)))
  529. return;
  530. memcpy(vdev->config + addr, &val, sizeof(val));
  531. if (vdev->set_config)
  532. vdev->set_config(vdev, vdev->config);
  533. }
  534. static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data)
  535. {
  536. VirtIODevice *vdev = opaque;
  537. uint16_t val = data;
  538. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  539. if (addr > (vdev->config_len - sizeof(val)))
  540. return;
  541. memcpy(vdev->config + addr, &val, sizeof(val));
  542. if (vdev->set_config)
  543. vdev->set_config(vdev, vdev->config);
  544. }
  545. static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data)
  546. {
  547. VirtIODevice *vdev = opaque;
  548. uint32_t val = data;
  549. addr -= vdev->addr + VIRTIO_PCI_CONFIG;
  550. if (addr > (vdev->config_len - sizeof(val)))
  551. return;
  552. memcpy(vdev->config + addr, &val, sizeof(val));
  553. if (vdev->set_config)
  554. vdev->set_config(vdev, vdev->config);
  555. }
  556. static void virtio_map(PCIDevice *pci_dev, int region_num,
  557. uint32_t addr, uint32_t size, int type)
  558. {
  559. VirtIODevice *vdev = to_virtio_device(pci_dev);
  560. int i;
  561. vdev->addr = addr;
  562. for (i = 0; i < 3; i++) {
  563. register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev);
  564. register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev);
  565. }
  566. if (vdev->config_len) {
  567. register_ioport_write(addr + 20, vdev->config_len, 1,
  568. virtio_config_writeb, vdev);
  569. register_ioport_write(addr + 20, vdev->config_len, 2,
  570. virtio_config_writew, vdev);
  571. register_ioport_write(addr + 20, vdev->config_len, 4,
  572. virtio_config_writel, vdev);
  573. register_ioport_read(addr + 20, vdev->config_len, 1,
  574. virtio_config_readb, vdev);
  575. register_ioport_read(addr + 20, vdev->config_len, 2,
  576. virtio_config_readw, vdev);
  577. register_ioport_read(addr + 20, vdev->config_len, 4,
  578. virtio_config_readl, vdev);
  579. vdev->get_config(vdev, vdev->config);
  580. }
  581. }
  582. VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
  583. void (*handle_output)(VirtIODevice *, VirtQueue *))
  584. {
  585. int i;
  586. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  587. if (vdev->vq[i].vring.num == 0)
  588. break;
  589. }
  590. if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
  591. abort();
  592. vdev->vq[i].vring.num = queue_size;
  593. vdev->vq[i].handle_output = handle_output;
  594. return &vdev->vq[i];
  595. }
  596. void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
  597. {
  598. /* Always notify when queue is empty (when feature acknowledge) */
  599. if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
  600. (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
  601. (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
  602. return;
  603. vdev->isr |= 0x01;
  604. virtio_update_irq(vdev);
  605. }
  606. void virtio_notify_config(VirtIODevice *vdev)
  607. {
  608. if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
  609. return;
  610. vdev->isr |= 0x03;
  611. virtio_update_irq(vdev);
  612. }
  613. void virtio_save(VirtIODevice *vdev, QEMUFile *f)
  614. {
  615. int i;
  616. pci_device_save(&vdev->pci_dev, f);
  617. qemu_put_be32s(f, &vdev->addr);
  618. qemu_put_8s(f, &vdev->status);
  619. qemu_put_8s(f, &vdev->isr);
  620. qemu_put_be16s(f, &vdev->queue_sel);
  621. qemu_put_be32s(f, &vdev->features);
  622. qemu_put_be32(f, vdev->config_len);
  623. qemu_put_buffer(f, vdev->config, vdev->config_len);
  624. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  625. if (vdev->vq[i].vring.num == 0)
  626. break;
  627. }
  628. qemu_put_be32(f, i);
  629. for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
  630. if (vdev->vq[i].vring.num == 0)
  631. break;
  632. qemu_put_be32(f, vdev->vq[i].vring.num);
  633. qemu_put_be32s(f, &vdev->vq[i].pfn);
  634. qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
  635. }
  636. }
  637. void virtio_load(VirtIODevice *vdev, QEMUFile *f)
  638. {
  639. int num, i;
  640. pci_device_load(&vdev->pci_dev, f);
  641. qemu_get_be32s(f, &vdev->addr);
  642. qemu_get_8s(f, &vdev->status);
  643. qemu_get_8s(f, &vdev->isr);
  644. qemu_get_be16s(f, &vdev->queue_sel);
  645. qemu_get_be32s(f, &vdev->features);
  646. vdev->config_len = qemu_get_be32(f);
  647. qemu_get_buffer(f, vdev->config, vdev->config_len);
  648. num = qemu_get_be32(f);
  649. for (i = 0; i < num; i++) {
  650. vdev->vq[i].vring.num = qemu_get_be32(f);
  651. qemu_get_be32s(f, &vdev->vq[i].pfn);
  652. qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
  653. if (vdev->vq[i].pfn) {
  654. target_phys_addr_t pa;
  655. pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
  656. virtqueue_init(&vdev->vq[i], pa);
  657. }
  658. }
  659. virtio_update_irq(vdev);
  660. }
  661. void virtio_cleanup(VirtIODevice *vdev)
  662. {
  663. if (vdev->config)
  664. qemu_free(vdev->config);
  665. qemu_free(vdev->vq);
  666. }
  667. VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name,
  668. uint16_t vendor, uint16_t device,
  669. uint16_t subvendor, uint16_t subdevice,
  670. uint16_t class_code, uint8_t pif,
  671. size_t config_size, size_t struct_size)
  672. {
  673. VirtIODevice *vdev;
  674. PCIDevice *pci_dev;
  675. uint8_t *config;
  676. uint32_t size;
  677. pci_dev = pci_register_device(bus, name, struct_size,
  678. -1, NULL, NULL);
  679. if (!pci_dev)
  680. return NULL;
  681. vdev = to_virtio_device(pci_dev);
  682. vdev->status = 0;
  683. vdev->isr = 0;
  684. vdev->queue_sel = 0;
  685. vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
  686. config = pci_dev->config;
  687. pci_config_set_vendor_id(config, vendor);
  688. pci_config_set_device_id(config, device);
  689. config[0x08] = VIRTIO_PCI_ABI_VERSION;
  690. config[0x09] = pif;
  691. pci_config_set_class(config, class_code);
  692. config[0x0e] = 0x00;
  693. config[0x2c] = subvendor & 0xFF;
  694. config[0x2d] = (subvendor >> 8) & 0xFF;
  695. config[0x2e] = subdevice & 0xFF;
  696. config[0x2f] = (subdevice >> 8) & 0xFF;
  697. config[0x3d] = 1;
  698. vdev->name = name;
  699. vdev->config_len = config_size;
  700. if (vdev->config_len)
  701. vdev->config = qemu_mallocz(config_size);
  702. else
  703. vdev->config = NULL;
  704. size = 20 + config_size;
  705. if (size & (size-1))
  706. size = 1 << qemu_fls(size);
  707. pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
  708. virtio_map);
  709. qemu_register_reset(virtio_reset, vdev);
  710. return vdev;
  711. }