ivshmem.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172
  1. /*
  2. * Inter-VM Shared Memory PCI device.
  3. *
  4. * Author:
  5. * Cam Macdonell <cam@cs.ualberta.ca>
  6. *
  7. * Based On: cirrus_vga.c
  8. * Copyright (c) 2004 Fabrice Bellard
  9. * Copyright (c) 2004 Makoto Suzuki (suzu)
  10. *
  11. * and rtl8139.c
  12. * Copyright (c) 2006 Igor Kovalenko
  13. *
  14. * This code is licensed under the GNU GPL v2.
  15. *
  16. * Contributions after 2012-01-13 are licensed under the terms of the
  17. * GNU GPL, version 2 or (at your option) any later version.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "qemu/units.h"
  21. #include "qapi/error.h"
  22. #include "qemu/cutils.h"
  23. #include "hw/pci/pci.h"
  24. #include "hw/qdev-properties.h"
  25. #include "hw/pci/msi.h"
  26. #include "hw/pci/msix.h"
  27. #include "sysemu/kvm.h"
  28. #include "migration/blocker.h"
  29. #include "migration/vmstate.h"
  30. #include "qemu/error-report.h"
  31. #include "qemu/event_notifier.h"
  32. #include "qemu/module.h"
  33. #include "qom/object_interfaces.h"
  34. #include "chardev/char-fe.h"
  35. #include "sysemu/hostmem.h"
  36. #include "sysemu/qtest.h"
  37. #include "qapi/visitor.h"
  38. #include "hw/misc/ivshmem.h"
  39. #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
  40. #define PCI_DEVICE_ID_IVSHMEM 0x1110
  41. #define IVSHMEM_MAX_PEERS UINT16_MAX
  42. #define IVSHMEM_IOEVENTFD 0
  43. #define IVSHMEM_MSI 1
  44. #define IVSHMEM_REG_BAR_SIZE 0x100
  45. #define IVSHMEM_DEBUG 0
  46. #define IVSHMEM_DPRINTF(fmt, ...) \
  47. do { \
  48. if (IVSHMEM_DEBUG) { \
  49. printf("IVSHMEM: " fmt, ## __VA_ARGS__); \
  50. } \
  51. } while (0)
  52. #define TYPE_IVSHMEM_COMMON "ivshmem-common"
  53. #define IVSHMEM_COMMON(obj) \
  54. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
  55. #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  56. #define IVSHMEM_PLAIN(obj) \
  57. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
  58. #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  59. #define IVSHMEM_DOORBELL(obj) \
  60. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
  61. #define TYPE_IVSHMEM "ivshmem"
  62. #define IVSHMEM(obj) \
  63. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
  64. typedef struct Peer {
  65. int nb_eventfds;
  66. EventNotifier *eventfds;
  67. } Peer;
  68. typedef struct MSIVector {
  69. PCIDevice *pdev;
  70. int virq;
  71. bool unmasked;
  72. } MSIVector;
  73. typedef struct IVShmemState {
  74. /*< private >*/
  75. PCIDevice parent_obj;
  76. /*< public >*/
  77. uint32_t features;
  78. /* exactly one of these two may be set */
  79. HostMemoryBackend *hostmem; /* with interrupts */
  80. CharBackend server_chr; /* without interrupts */
  81. /* registers */
  82. uint32_t intrmask;
  83. uint32_t intrstatus;
  84. int vm_id;
  85. /* BARs */
  86. MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */
  87. MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
  88. MemoryRegion server_bar2; /* used with server_chr */
  89. /* interrupt support */
  90. Peer *peers;
  91. int nb_peers; /* space in @peers[] */
  92. uint32_t vectors;
  93. MSIVector *msi_vectors;
  94. uint64_t msg_buf; /* buffer for receiving server messages */
  95. int msg_buffered_bytes; /* #bytes in @msg_buf */
  96. /* migration stuff */
  97. OnOffAuto master;
  98. Error *migration_blocker;
  99. } IVShmemState;
  100. /* registers for the Inter-VM shared memory device */
  101. enum ivshmem_registers {
  102. INTRMASK = 0,
  103. INTRSTATUS = 4,
  104. IVPOSITION = 8,
  105. DOORBELL = 12,
  106. };
  107. static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
  108. unsigned int feature) {
  109. return (ivs->features & (1 << feature));
  110. }
  111. static inline bool ivshmem_is_master(IVShmemState *s)
  112. {
  113. assert(s->master != ON_OFF_AUTO_AUTO);
  114. return s->master == ON_OFF_AUTO_ON;
  115. }
  116. static void ivshmem_update_irq(IVShmemState *s)
  117. {
  118. PCIDevice *d = PCI_DEVICE(s);
  119. uint32_t isr = s->intrstatus & s->intrmask;
  120. /*
  121. * Do nothing unless the device actually uses INTx. Here's how
  122. * the device variants signal interrupts, what they put in PCI
  123. * config space:
  124. * Device variant Interrupt Interrupt Pin MSI-X cap.
  125. * ivshmem-plain none 0 no
  126. * ivshmem-doorbell MSI-X 1 yes(1)
  127. * ivshmem,msi=off INTx 1 no
  128. * ivshmem,msi=on MSI-X 1(2) yes(1)
  129. * (1) if guest enabled MSI-X
  130. * (2) the device lies
  131. * Leads to the condition for doing nothing:
  132. */
  133. if (ivshmem_has_feature(s, IVSHMEM_MSI)
  134. || !d->config[PCI_INTERRUPT_PIN]) {
  135. return;
  136. }
  137. /* don't print ISR resets */
  138. if (isr) {
  139. IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n",
  140. isr ? 1 : 0, s->intrstatus, s->intrmask);
  141. }
  142. pci_set_irq(d, isr != 0);
  143. }
  144. static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
  145. {
  146. IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
  147. s->intrmask = val;
  148. ivshmem_update_irq(s);
  149. }
  150. static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
  151. {
  152. uint32_t ret = s->intrmask;
  153. IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
  154. return ret;
  155. }
  156. static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
  157. {
  158. IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
  159. s->intrstatus = val;
  160. ivshmem_update_irq(s);
  161. }
  162. static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
  163. {
  164. uint32_t ret = s->intrstatus;
  165. /* reading ISR clears all interrupts */
  166. s->intrstatus = 0;
  167. ivshmem_update_irq(s);
  168. return ret;
  169. }
  170. static void ivshmem_io_write(void *opaque, hwaddr addr,
  171. uint64_t val, unsigned size)
  172. {
  173. IVShmemState *s = opaque;
  174. uint16_t dest = val >> 16;
  175. uint16_t vector = val & 0xff;
  176. addr &= 0xfc;
  177. IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
  178. switch (addr)
  179. {
  180. case INTRMASK:
  181. ivshmem_IntrMask_write(s, val);
  182. break;
  183. case INTRSTATUS:
  184. ivshmem_IntrStatus_write(s, val);
  185. break;
  186. case DOORBELL:
  187. /* check that dest VM ID is reasonable */
  188. if (dest >= s->nb_peers) {
  189. IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
  190. break;
  191. }
  192. /* check doorbell range */
  193. if (vector < s->peers[dest].nb_eventfds) {
  194. IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
  195. event_notifier_set(&s->peers[dest].eventfds[vector]);
  196. } else {
  197. IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
  198. vector, dest);
  199. }
  200. break;
  201. default:
  202. IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
  203. }
  204. }
  205. static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
  206. unsigned size)
  207. {
  208. IVShmemState *s = opaque;
  209. uint32_t ret;
  210. switch (addr)
  211. {
  212. case INTRMASK:
  213. ret = ivshmem_IntrMask_read(s);
  214. break;
  215. case INTRSTATUS:
  216. ret = ivshmem_IntrStatus_read(s);
  217. break;
  218. case IVPOSITION:
  219. ret = s->vm_id;
  220. break;
  221. default:
  222. IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
  223. ret = 0;
  224. }
  225. return ret;
  226. }
  227. static const MemoryRegionOps ivshmem_mmio_ops = {
  228. .read = ivshmem_io_read,
  229. .write = ivshmem_io_write,
  230. .endianness = DEVICE_NATIVE_ENDIAN,
  231. .impl = {
  232. .min_access_size = 4,
  233. .max_access_size = 4,
  234. },
  235. };
  236. static void ivshmem_vector_notify(void *opaque)
  237. {
  238. MSIVector *entry = opaque;
  239. PCIDevice *pdev = entry->pdev;
  240. IVShmemState *s = IVSHMEM_COMMON(pdev);
  241. int vector = entry - s->msi_vectors;
  242. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  243. if (!event_notifier_test_and_clear(n)) {
  244. return;
  245. }
  246. IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
  247. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  248. if (msix_enabled(pdev)) {
  249. msix_notify(pdev, vector);
  250. }
  251. } else {
  252. ivshmem_IntrStatus_write(s, 1);
  253. }
  254. }
  255. static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
  256. MSIMessage msg)
  257. {
  258. IVShmemState *s = IVSHMEM_COMMON(dev);
  259. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  260. MSIVector *v = &s->msi_vectors[vector];
  261. int ret;
  262. IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
  263. if (!v->pdev) {
  264. error_report("ivshmem: vector %d route does not exist", vector);
  265. return -EINVAL;
  266. }
  267. assert(!v->unmasked);
  268. ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
  269. if (ret < 0) {
  270. return ret;
  271. }
  272. kvm_irqchip_commit_routes(kvm_state);
  273. ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
  274. if (ret < 0) {
  275. return ret;
  276. }
  277. v->unmasked = true;
  278. return 0;
  279. }
  280. static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
  281. {
  282. IVShmemState *s = IVSHMEM_COMMON(dev);
  283. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  284. MSIVector *v = &s->msi_vectors[vector];
  285. int ret;
  286. IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
  287. if (!v->pdev) {
  288. error_report("ivshmem: vector %d route does not exist", vector);
  289. return;
  290. }
  291. assert(v->unmasked);
  292. ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
  293. if (ret < 0) {
  294. error_report("remove_irqfd_notifier_gsi failed");
  295. return;
  296. }
  297. v->unmasked = false;
  298. }
  299. static void ivshmem_vector_poll(PCIDevice *dev,
  300. unsigned int vector_start,
  301. unsigned int vector_end)
  302. {
  303. IVShmemState *s = IVSHMEM_COMMON(dev);
  304. unsigned int vector;
  305. IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
  306. vector_end = MIN(vector_end, s->vectors);
  307. for (vector = vector_start; vector < vector_end; vector++) {
  308. EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
  309. if (!msix_is_masked(dev, vector)) {
  310. continue;
  311. }
  312. if (event_notifier_test_and_clear(notifier)) {
  313. msix_set_pending(dev, vector);
  314. }
  315. }
  316. }
  317. static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
  318. int vector)
  319. {
  320. int eventfd = event_notifier_get_fd(n);
  321. assert(!s->msi_vectors[vector].pdev);
  322. s->msi_vectors[vector].pdev = PCI_DEVICE(s);
  323. qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
  324. NULL, &s->msi_vectors[vector]);
  325. }
  326. static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
  327. {
  328. memory_region_add_eventfd(&s->ivshmem_mmio,
  329. DOORBELL,
  330. 4,
  331. true,
  332. (posn << 16) | i,
  333. &s->peers[posn].eventfds[i]);
  334. }
  335. static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
  336. {
  337. memory_region_del_eventfd(&s->ivshmem_mmio,
  338. DOORBELL,
  339. 4,
  340. true,
  341. (posn << 16) | i,
  342. &s->peers[posn].eventfds[i]);
  343. }
  344. static void close_peer_eventfds(IVShmemState *s, int posn)
  345. {
  346. int i, n;
  347. assert(posn >= 0 && posn < s->nb_peers);
  348. n = s->peers[posn].nb_eventfds;
  349. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  350. memory_region_transaction_begin();
  351. for (i = 0; i < n; i++) {
  352. ivshmem_del_eventfd(s, posn, i);
  353. }
  354. memory_region_transaction_commit();
  355. }
  356. for (i = 0; i < n; i++) {
  357. event_notifier_cleanup(&s->peers[posn].eventfds[i]);
  358. }
  359. g_free(s->peers[posn].eventfds);
  360. s->peers[posn].nb_eventfds = 0;
  361. }
  362. static void resize_peers(IVShmemState *s, int nb_peers)
  363. {
  364. int old_nb_peers = s->nb_peers;
  365. int i;
  366. assert(nb_peers > old_nb_peers);
  367. IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
  368. s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
  369. s->nb_peers = nb_peers;
  370. for (i = old_nb_peers; i < nb_peers; i++) {
  371. s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
  372. s->peers[i].nb_eventfds = 0;
  373. }
  374. }
  375. static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
  376. Error **errp)
  377. {
  378. PCIDevice *pdev = PCI_DEVICE(s);
  379. int ret;
  380. IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
  381. assert(!s->msi_vectors[vector].pdev);
  382. ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
  383. if (ret < 0) {
  384. error_setg(errp, "kvm_irqchip_add_msi_route failed");
  385. return;
  386. }
  387. s->msi_vectors[vector].virq = ret;
  388. s->msi_vectors[vector].pdev = pdev;
  389. }
  390. static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
  391. {
  392. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  393. bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
  394. ivshmem_has_feature(s, IVSHMEM_MSI);
  395. PCIDevice *pdev = PCI_DEVICE(s);
  396. Error *err = NULL;
  397. IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
  398. if (!with_irqfd) {
  399. IVSHMEM_DPRINTF("with eventfd\n");
  400. watch_vector_notifier(s, n, vector);
  401. } else if (msix_enabled(pdev)) {
  402. IVSHMEM_DPRINTF("with irqfd\n");
  403. ivshmem_add_kvm_msi_virq(s, vector, &err);
  404. if (err) {
  405. error_propagate(errp, err);
  406. return;
  407. }
  408. if (!msix_is_masked(pdev, vector)) {
  409. kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
  410. s->msi_vectors[vector].virq);
  411. /* TODO handle error */
  412. }
  413. } else {
  414. /* it will be delayed until msix is enabled, in write_config */
  415. IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
  416. }
  417. }
  418. static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
  419. {
  420. Error *local_err = NULL;
  421. struct stat buf;
  422. size_t size;
  423. if (s->ivshmem_bar2) {
  424. error_setg(errp, "server sent unexpected shared memory message");
  425. close(fd);
  426. return;
  427. }
  428. if (fstat(fd, &buf) < 0) {
  429. error_setg_errno(errp, errno,
  430. "can't determine size of shared memory sent by server");
  431. close(fd);
  432. return;
  433. }
  434. size = buf.st_size;
  435. /* mmap the region and map into the BAR2 */
  436. memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
  437. "ivshmem.bar2", size, true, fd, &local_err);
  438. if (local_err) {
  439. error_propagate(errp, local_err);
  440. return;
  441. }
  442. s->ivshmem_bar2 = &s->server_bar2;
  443. }
  444. static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
  445. Error **errp)
  446. {
  447. IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
  448. if (posn >= s->nb_peers || posn == s->vm_id) {
  449. error_setg(errp, "invalid peer %d", posn);
  450. return;
  451. }
  452. close_peer_eventfds(s, posn);
  453. }
  454. static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
  455. Error **errp)
  456. {
  457. Peer *peer = &s->peers[posn];
  458. int vector;
  459. /*
  460. * The N-th connect message for this peer comes with the file
  461. * descriptor for vector N-1. Count messages to find the vector.
  462. */
  463. if (peer->nb_eventfds >= s->vectors) {
  464. error_setg(errp, "Too many eventfd received, device has %d vectors",
  465. s->vectors);
  466. close(fd);
  467. return;
  468. }
  469. vector = peer->nb_eventfds++;
  470. IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
  471. event_notifier_init_fd(&peer->eventfds[vector], fd);
  472. fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
  473. if (posn == s->vm_id) {
  474. setup_interrupt(s, vector, errp);
  475. /* TODO do we need to handle the error? */
  476. }
  477. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  478. ivshmem_add_eventfd(s, posn, vector);
  479. }
  480. }
  481. static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
  482. {
  483. IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
  484. if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
  485. error_setg(errp, "server sent invalid message %" PRId64, msg);
  486. close(fd);
  487. return;
  488. }
  489. if (msg == -1) {
  490. process_msg_shmem(s, fd, errp);
  491. return;
  492. }
  493. if (msg >= s->nb_peers) {
  494. resize_peers(s, msg + 1);
  495. }
  496. if (fd >= 0) {
  497. process_msg_connect(s, msg, fd, errp);
  498. } else {
  499. process_msg_disconnect(s, msg, errp);
  500. }
  501. }
  502. static int ivshmem_can_receive(void *opaque)
  503. {
  504. IVShmemState *s = opaque;
  505. assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
  506. return sizeof(s->msg_buf) - s->msg_buffered_bytes;
  507. }
  508. static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
  509. {
  510. IVShmemState *s = opaque;
  511. Error *err = NULL;
  512. int fd;
  513. int64_t msg;
  514. assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
  515. memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
  516. s->msg_buffered_bytes += size;
  517. if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
  518. return;
  519. }
  520. msg = le64_to_cpu(s->msg_buf);
  521. s->msg_buffered_bytes = 0;
  522. fd = qemu_chr_fe_get_msgfd(&s->server_chr);
  523. process_msg(s, msg, fd, &err);
  524. if (err) {
  525. error_report_err(err);
  526. }
  527. }
  528. static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
  529. {
  530. int64_t msg;
  531. int n, ret;
  532. n = 0;
  533. do {
  534. ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
  535. sizeof(msg) - n);
  536. if (ret < 0) {
  537. if (ret == -EINTR) {
  538. continue;
  539. }
  540. error_setg_errno(errp, -ret, "read from server failed");
  541. return INT64_MIN;
  542. }
  543. n += ret;
  544. } while (n < sizeof(msg));
  545. *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
  546. return le64_to_cpu(msg);
  547. }
  548. static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
  549. {
  550. Error *err = NULL;
  551. int64_t msg;
  552. int fd;
  553. msg = ivshmem_recv_msg(s, &fd, &err);
  554. if (err) {
  555. error_propagate(errp, err);
  556. return;
  557. }
  558. if (msg != IVSHMEM_PROTOCOL_VERSION) {
  559. error_setg(errp, "server sent version %" PRId64 ", expecting %d",
  560. msg, IVSHMEM_PROTOCOL_VERSION);
  561. return;
  562. }
  563. if (fd != -1) {
  564. error_setg(errp, "server sent invalid version message");
  565. return;
  566. }
  567. /*
  568. * ivshmem-server sends the remaining initial messages in a fixed
  569. * order, but the device has always accepted them in any order.
  570. * Stay as compatible as practical, just in case people use
  571. * servers that behave differently.
  572. */
  573. /*
  574. * ivshmem_device_spec.txt has always required the ID message
  575. * right here, and ivshmem-server has always complied. However,
  576. * older versions of the device accepted it out of order, but
  577. * broke when an interrupt setup message arrived before it.
  578. */
  579. msg = ivshmem_recv_msg(s, &fd, &err);
  580. if (err) {
  581. error_propagate(errp, err);
  582. return;
  583. }
  584. if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
  585. error_setg(errp, "server sent invalid ID message");
  586. return;
  587. }
  588. s->vm_id = msg;
  589. /*
  590. * Receive more messages until we got shared memory.
  591. */
  592. do {
  593. msg = ivshmem_recv_msg(s, &fd, &err);
  594. if (err) {
  595. error_propagate(errp, err);
  596. return;
  597. }
  598. process_msg(s, msg, fd, &err);
  599. if (err) {
  600. error_propagate(errp, err);
  601. return;
  602. }
  603. } while (msg != -1);
  604. /*
  605. * This function must either map the shared memory or fail. The
  606. * loop above ensures that: it terminates normally only after it
  607. * successfully processed the server's shared memory message.
  608. * Assert that actually mapped the shared memory:
  609. */
  610. assert(s->ivshmem_bar2);
  611. }
  612. /* Select the MSI-X vectors used by device.
  613. * ivshmem maps events to vectors statically, so
  614. * we just enable all vectors on init and after reset. */
  615. static void ivshmem_msix_vector_use(IVShmemState *s)
  616. {
  617. PCIDevice *d = PCI_DEVICE(s);
  618. int i;
  619. for (i = 0; i < s->vectors; i++) {
  620. msix_vector_use(d, i);
  621. }
  622. }
  623. static void ivshmem_disable_irqfd(IVShmemState *s);
  624. static void ivshmem_reset(DeviceState *d)
  625. {
  626. IVShmemState *s = IVSHMEM_COMMON(d);
  627. ivshmem_disable_irqfd(s);
  628. s->intrstatus = 0;
  629. s->intrmask = 0;
  630. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  631. ivshmem_msix_vector_use(s);
  632. }
  633. }
  634. static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
  635. {
  636. /* allocate QEMU callback data for receiving interrupts */
  637. s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
  638. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  639. if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
  640. return -1;
  641. }
  642. IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
  643. ivshmem_msix_vector_use(s);
  644. }
  645. return 0;
  646. }
  647. static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
  648. {
  649. IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
  650. if (s->msi_vectors[vector].pdev == NULL) {
  651. return;
  652. }
  653. /* it was cleaned when masked in the frontend. */
  654. kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
  655. s->msi_vectors[vector].pdev = NULL;
  656. }
  657. static void ivshmem_enable_irqfd(IVShmemState *s)
  658. {
  659. PCIDevice *pdev = PCI_DEVICE(s);
  660. int i;
  661. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  662. Error *err = NULL;
  663. ivshmem_add_kvm_msi_virq(s, i, &err);
  664. if (err) {
  665. error_report_err(err);
  666. goto undo;
  667. }
  668. }
  669. if (msix_set_vector_notifiers(pdev,
  670. ivshmem_vector_unmask,
  671. ivshmem_vector_mask,
  672. ivshmem_vector_poll)) {
  673. error_report("ivshmem: msix_set_vector_notifiers failed");
  674. goto undo;
  675. }
  676. return;
  677. undo:
  678. while (--i >= 0) {
  679. ivshmem_remove_kvm_msi_virq(s, i);
  680. }
  681. }
  682. static void ivshmem_disable_irqfd(IVShmemState *s)
  683. {
  684. PCIDevice *pdev = PCI_DEVICE(s);
  685. int i;
  686. if (!pdev->msix_vector_use_notifier) {
  687. return;
  688. }
  689. msix_unset_vector_notifiers(pdev);
  690. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  691. /*
  692. * MSI-X is already disabled here so msix_unset_vector_notifiers()
  693. * didn't call our release notifier. Do it now to keep our masks and
  694. * unmasks balanced.
  695. */
  696. if (s->msi_vectors[i].unmasked) {
  697. ivshmem_vector_mask(pdev, i);
  698. }
  699. ivshmem_remove_kvm_msi_virq(s, i);
  700. }
  701. }
  702. static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
  703. uint32_t val, int len)
  704. {
  705. IVShmemState *s = IVSHMEM_COMMON(pdev);
  706. int is_enabled, was_enabled = msix_enabled(pdev);
  707. pci_default_write_config(pdev, address, val, len);
  708. is_enabled = msix_enabled(pdev);
  709. if (kvm_msi_via_irqfd_enabled()) {
  710. if (!was_enabled && is_enabled) {
  711. ivshmem_enable_irqfd(s);
  712. } else if (was_enabled && !is_enabled) {
  713. ivshmem_disable_irqfd(s);
  714. }
  715. }
  716. }
  717. static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
  718. {
  719. IVShmemState *s = IVSHMEM_COMMON(dev);
  720. Error *err = NULL;
  721. uint8_t *pci_conf;
  722. Error *local_err = NULL;
  723. /* IRQFD requires MSI */
  724. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
  725. !ivshmem_has_feature(s, IVSHMEM_MSI)) {
  726. error_setg(errp, "ioeventfd/irqfd requires MSI");
  727. return;
  728. }
  729. pci_conf = dev->config;
  730. pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
  731. memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
  732. "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
  733. /* region for registers*/
  734. pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
  735. &s->ivshmem_mmio);
  736. if (s->hostmem != NULL) {
  737. IVSHMEM_DPRINTF("using hostmem\n");
  738. s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
  739. host_memory_backend_set_mapped(s->hostmem, true);
  740. } else {
  741. Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
  742. assert(chr);
  743. IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
  744. chr->filename);
  745. /* we allocate enough space for 16 peers and grow as needed */
  746. resize_peers(s, 16);
  747. /*
  748. * Receive setup messages from server synchronously.
  749. * Older versions did it asynchronously, but that creates a
  750. * number of entertaining race conditions.
  751. */
  752. ivshmem_recv_setup(s, &err);
  753. if (err) {
  754. error_propagate(errp, err);
  755. return;
  756. }
  757. if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
  758. error_setg(errp,
  759. "master must connect to the server before any peers");
  760. return;
  761. }
  762. qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
  763. ivshmem_read, NULL, NULL, s, NULL, true);
  764. if (ivshmem_setup_interrupts(s, errp) < 0) {
  765. error_prepend(errp, "Failed to initialize interrupts: ");
  766. return;
  767. }
  768. }
  769. if (s->master == ON_OFF_AUTO_AUTO) {
  770. s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
  771. }
  772. if (!ivshmem_is_master(s)) {
  773. error_setg(&s->migration_blocker,
  774. "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
  775. migrate_add_blocker(s->migration_blocker, &local_err);
  776. if (local_err) {
  777. error_propagate(errp, local_err);
  778. error_free(s->migration_blocker);
  779. return;
  780. }
  781. }
  782. vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
  783. pci_register_bar(PCI_DEVICE(s), 2,
  784. PCI_BASE_ADDRESS_SPACE_MEMORY |
  785. PCI_BASE_ADDRESS_MEM_PREFETCH |
  786. PCI_BASE_ADDRESS_MEM_TYPE_64,
  787. s->ivshmem_bar2);
  788. }
  789. static void ivshmem_exit(PCIDevice *dev)
  790. {
  791. IVShmemState *s = IVSHMEM_COMMON(dev);
  792. int i;
  793. if (s->migration_blocker) {
  794. migrate_del_blocker(s->migration_blocker);
  795. error_free(s->migration_blocker);
  796. }
  797. if (memory_region_is_mapped(s->ivshmem_bar2)) {
  798. if (!s->hostmem) {
  799. void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
  800. int fd;
  801. if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
  802. error_report("Failed to munmap shared memory %s",
  803. strerror(errno));
  804. }
  805. fd = memory_region_get_fd(s->ivshmem_bar2);
  806. close(fd);
  807. }
  808. vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
  809. }
  810. if (s->hostmem) {
  811. host_memory_backend_set_mapped(s->hostmem, false);
  812. }
  813. if (s->peers) {
  814. for (i = 0; i < s->nb_peers; i++) {
  815. close_peer_eventfds(s, i);
  816. }
  817. g_free(s->peers);
  818. }
  819. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  820. msix_uninit_exclusive_bar(dev);
  821. }
  822. g_free(s->msi_vectors);
  823. }
  824. static int ivshmem_pre_load(void *opaque)
  825. {
  826. IVShmemState *s = opaque;
  827. if (!ivshmem_is_master(s)) {
  828. error_report("'peer' devices are not migratable");
  829. return -EINVAL;
  830. }
  831. return 0;
  832. }
  833. static int ivshmem_post_load(void *opaque, int version_id)
  834. {
  835. IVShmemState *s = opaque;
  836. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  837. ivshmem_msix_vector_use(s);
  838. }
  839. return 0;
  840. }
  841. static void ivshmem_common_class_init(ObjectClass *klass, void *data)
  842. {
  843. DeviceClass *dc = DEVICE_CLASS(klass);
  844. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  845. k->realize = ivshmem_common_realize;
  846. k->exit = ivshmem_exit;
  847. k->config_write = ivshmem_write_config;
  848. k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
  849. k->device_id = PCI_DEVICE_ID_IVSHMEM;
  850. k->class_id = PCI_CLASS_MEMORY_RAM;
  851. k->revision = 1;
  852. dc->reset = ivshmem_reset;
  853. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  854. dc->desc = "Inter-VM shared memory";
  855. }
  856. static const TypeInfo ivshmem_common_info = {
  857. .name = TYPE_IVSHMEM_COMMON,
  858. .parent = TYPE_PCI_DEVICE,
  859. .instance_size = sizeof(IVShmemState),
  860. .abstract = true,
  861. .class_init = ivshmem_common_class_init,
  862. .interfaces = (InterfaceInfo[]) {
  863. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  864. { },
  865. },
  866. };
  867. static const VMStateDescription ivshmem_plain_vmsd = {
  868. .name = TYPE_IVSHMEM_PLAIN,
  869. .version_id = 0,
  870. .minimum_version_id = 0,
  871. .pre_load = ivshmem_pre_load,
  872. .post_load = ivshmem_post_load,
  873. .fields = (VMStateField[]) {
  874. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  875. VMSTATE_UINT32(intrstatus, IVShmemState),
  876. VMSTATE_UINT32(intrmask, IVShmemState),
  877. VMSTATE_END_OF_LIST()
  878. },
  879. };
  880. static Property ivshmem_plain_properties[] = {
  881. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  882. DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
  883. HostMemoryBackend *),
  884. DEFINE_PROP_END_OF_LIST(),
  885. };
  886. static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
  887. {
  888. IVShmemState *s = IVSHMEM_COMMON(dev);
  889. if (!s->hostmem) {
  890. error_setg(errp, "You must specify a 'memdev'");
  891. return;
  892. } else if (host_memory_backend_is_mapped(s->hostmem)) {
  893. char *path = object_get_canonical_path_component(OBJECT(s->hostmem));
  894. error_setg(errp, "can't use already busy memdev: %s", path);
  895. g_free(path);
  896. return;
  897. }
  898. ivshmem_common_realize(dev, errp);
  899. }
  900. static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
  901. {
  902. DeviceClass *dc = DEVICE_CLASS(klass);
  903. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  904. k->realize = ivshmem_plain_realize;
  905. dc->props = ivshmem_plain_properties;
  906. dc->vmsd = &ivshmem_plain_vmsd;
  907. }
  908. static const TypeInfo ivshmem_plain_info = {
  909. .name = TYPE_IVSHMEM_PLAIN,
  910. .parent = TYPE_IVSHMEM_COMMON,
  911. .instance_size = sizeof(IVShmemState),
  912. .class_init = ivshmem_plain_class_init,
  913. };
  914. static const VMStateDescription ivshmem_doorbell_vmsd = {
  915. .name = TYPE_IVSHMEM_DOORBELL,
  916. .version_id = 0,
  917. .minimum_version_id = 0,
  918. .pre_load = ivshmem_pre_load,
  919. .post_load = ivshmem_post_load,
  920. .fields = (VMStateField[]) {
  921. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  922. VMSTATE_MSIX(parent_obj, IVShmemState),
  923. VMSTATE_UINT32(intrstatus, IVShmemState),
  924. VMSTATE_UINT32(intrmask, IVShmemState),
  925. VMSTATE_END_OF_LIST()
  926. },
  927. };
  928. static Property ivshmem_doorbell_properties[] = {
  929. DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
  930. DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
  931. DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
  932. true),
  933. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  934. DEFINE_PROP_END_OF_LIST(),
  935. };
  936. static void ivshmem_doorbell_init(Object *obj)
  937. {
  938. IVShmemState *s = IVSHMEM_DOORBELL(obj);
  939. s->features |= (1 << IVSHMEM_MSI);
  940. }
  941. static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
  942. {
  943. IVShmemState *s = IVSHMEM_COMMON(dev);
  944. if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
  945. error_setg(errp, "You must specify a 'chardev'");
  946. return;
  947. }
  948. ivshmem_common_realize(dev, errp);
  949. }
  950. static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
  951. {
  952. DeviceClass *dc = DEVICE_CLASS(klass);
  953. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  954. k->realize = ivshmem_doorbell_realize;
  955. dc->props = ivshmem_doorbell_properties;
  956. dc->vmsd = &ivshmem_doorbell_vmsd;
  957. }
  958. static const TypeInfo ivshmem_doorbell_info = {
  959. .name = TYPE_IVSHMEM_DOORBELL,
  960. .parent = TYPE_IVSHMEM_COMMON,
  961. .instance_size = sizeof(IVShmemState),
  962. .instance_init = ivshmem_doorbell_init,
  963. .class_init = ivshmem_doorbell_class_init,
  964. };
  965. static void ivshmem_register_types(void)
  966. {
  967. type_register_static(&ivshmem_common_info);
  968. type_register_static(&ivshmem_plain_info);
  969. type_register_static(&ivshmem_doorbell_info);
  970. }
  971. type_init(ivshmem_register_types)