ivshmem.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. /*
  2. * Inter-VM Shared Memory PCI device.
  3. *
  4. * Author:
  5. * Cam Macdonell <cam@cs.ualberta.ca>
  6. *
  7. * Based On: cirrus_vga.c
  8. * Copyright (c) 2004 Fabrice Bellard
  9. * Copyright (c) 2004 Makoto Suzuki (suzu)
  10. *
  11. * and rtl8139.c
  12. * Copyright (c) 2006 Igor Kovalenko
  13. *
  14. * This code is licensed under the GNU GPL v2.
  15. *
  16. * Contributions after 2012-01-13 are licensed under the terms of the
  17. * GNU GPL, version 2 or (at your option) any later version.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "qemu/units.h"
  21. #include "qapi/error.h"
  22. #include "qemu/cutils.h"
  23. #include "hw/pci/pci.h"
  24. #include "hw/qdev-properties.h"
  25. #include "hw/pci/msi.h"
  26. #include "hw/pci/msix.h"
  27. #include "sysemu/kvm.h"
  28. #include "migration/blocker.h"
  29. #include "migration/vmstate.h"
  30. #include "qemu/error-report.h"
  31. #include "qemu/event_notifier.h"
  32. #include "qemu/module.h"
  33. #include "qom/object_interfaces.h"
  34. #include "chardev/char-fe.h"
  35. #include "sysemu/hostmem.h"
  36. #include "sysemu/qtest.h"
  37. #include "qapi/visitor.h"
  38. #include "hw/misc/ivshmem.h"
  39. #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
  40. #define PCI_DEVICE_ID_IVSHMEM 0x1110
  41. #define IVSHMEM_MAX_PEERS UINT16_MAX
  42. #define IVSHMEM_IOEVENTFD 0
  43. #define IVSHMEM_MSI 1
  44. #define IVSHMEM_REG_BAR_SIZE 0x100
  45. #define IVSHMEM_DEBUG 0
  46. #define IVSHMEM_DPRINTF(fmt, ...) \
  47. do { \
  48. if (IVSHMEM_DEBUG) { \
  49. printf("IVSHMEM: " fmt, ## __VA_ARGS__); \
  50. } \
  51. } while (0)
  52. #define TYPE_IVSHMEM_COMMON "ivshmem-common"
  53. #define IVSHMEM_COMMON(obj) \
  54. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_COMMON)
  55. #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  56. #define IVSHMEM_PLAIN(obj) \
  57. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_PLAIN)
  58. #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  59. #define IVSHMEM_DOORBELL(obj) \
  60. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM_DOORBELL)
  61. #define TYPE_IVSHMEM "ivshmem"
  62. #define IVSHMEM(obj) \
  63. OBJECT_CHECK(IVShmemState, (obj), TYPE_IVSHMEM)
  64. typedef struct Peer {
  65. int nb_eventfds;
  66. EventNotifier *eventfds;
  67. } Peer;
  68. typedef struct MSIVector {
  69. PCIDevice *pdev;
  70. int virq;
  71. bool unmasked;
  72. } MSIVector;
  73. typedef struct IVShmemState {
  74. /*< private >*/
  75. PCIDevice parent_obj;
  76. /*< public >*/
  77. uint32_t features;
  78. /* exactly one of these two may be set */
  79. HostMemoryBackend *hostmem; /* with interrupts */
  80. CharBackend server_chr; /* without interrupts */
  81. /* registers */
  82. uint32_t intrmask;
  83. uint32_t intrstatus;
  84. int vm_id;
  85. /* BARs */
  86. MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */
  87. MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
  88. MemoryRegion server_bar2; /* used with server_chr */
  89. /* interrupt support */
  90. Peer *peers;
  91. int nb_peers; /* space in @peers[] */
  92. uint32_t vectors;
  93. MSIVector *msi_vectors;
  94. uint64_t msg_buf; /* buffer for receiving server messages */
  95. int msg_buffered_bytes; /* #bytes in @msg_buf */
  96. /* migration stuff */
  97. OnOffAuto master;
  98. Error *migration_blocker;
  99. } IVShmemState;
  100. /* registers for the Inter-VM shared memory device */
  101. enum ivshmem_registers {
  102. INTRMASK = 0,
  103. INTRSTATUS = 4,
  104. IVPOSITION = 8,
  105. DOORBELL = 12,
  106. };
  107. static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
  108. unsigned int feature) {
  109. return (ivs->features & (1 << feature));
  110. }
  111. static inline bool ivshmem_is_master(IVShmemState *s)
  112. {
  113. assert(s->master != ON_OFF_AUTO_AUTO);
  114. return s->master == ON_OFF_AUTO_ON;
  115. }
  116. static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
  117. {
  118. IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
  119. s->intrmask = val;
  120. }
  121. static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
  122. {
  123. uint32_t ret = s->intrmask;
  124. IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
  125. return ret;
  126. }
  127. static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
  128. {
  129. IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
  130. s->intrstatus = val;
  131. }
  132. static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
  133. {
  134. uint32_t ret = s->intrstatus;
  135. /* reading ISR clears all interrupts */
  136. s->intrstatus = 0;
  137. return ret;
  138. }
  139. static void ivshmem_io_write(void *opaque, hwaddr addr,
  140. uint64_t val, unsigned size)
  141. {
  142. IVShmemState *s = opaque;
  143. uint16_t dest = val >> 16;
  144. uint16_t vector = val & 0xff;
  145. addr &= 0xfc;
  146. IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr);
  147. switch (addr)
  148. {
  149. case INTRMASK:
  150. ivshmem_IntrMask_write(s, val);
  151. break;
  152. case INTRSTATUS:
  153. ivshmem_IntrStatus_write(s, val);
  154. break;
  155. case DOORBELL:
  156. /* check that dest VM ID is reasonable */
  157. if (dest >= s->nb_peers) {
  158. IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
  159. break;
  160. }
  161. /* check doorbell range */
  162. if (vector < s->peers[dest].nb_eventfds) {
  163. IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
  164. event_notifier_set(&s->peers[dest].eventfds[vector]);
  165. } else {
  166. IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
  167. vector, dest);
  168. }
  169. break;
  170. default:
  171. IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr);
  172. }
  173. }
  174. static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
  175. unsigned size)
  176. {
  177. IVShmemState *s = opaque;
  178. uint32_t ret;
  179. switch (addr)
  180. {
  181. case INTRMASK:
  182. ret = ivshmem_IntrMask_read(s);
  183. break;
  184. case INTRSTATUS:
  185. ret = ivshmem_IntrStatus_read(s);
  186. break;
  187. case IVPOSITION:
  188. ret = s->vm_id;
  189. break;
  190. default:
  191. IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr);
  192. ret = 0;
  193. }
  194. return ret;
  195. }
  196. static const MemoryRegionOps ivshmem_mmio_ops = {
  197. .read = ivshmem_io_read,
  198. .write = ivshmem_io_write,
  199. .endianness = DEVICE_NATIVE_ENDIAN,
  200. .impl = {
  201. .min_access_size = 4,
  202. .max_access_size = 4,
  203. },
  204. };
  205. static void ivshmem_vector_notify(void *opaque)
  206. {
  207. MSIVector *entry = opaque;
  208. PCIDevice *pdev = entry->pdev;
  209. IVShmemState *s = IVSHMEM_COMMON(pdev);
  210. int vector = entry - s->msi_vectors;
  211. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  212. if (!event_notifier_test_and_clear(n)) {
  213. return;
  214. }
  215. IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
  216. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  217. if (msix_enabled(pdev)) {
  218. msix_notify(pdev, vector);
  219. }
  220. } else {
  221. ivshmem_IntrStatus_write(s, 1);
  222. }
  223. }
  224. static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
  225. MSIMessage msg)
  226. {
  227. IVShmemState *s = IVSHMEM_COMMON(dev);
  228. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  229. MSIVector *v = &s->msi_vectors[vector];
  230. int ret;
  231. IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
  232. if (!v->pdev) {
  233. error_report("ivshmem: vector %d route does not exist", vector);
  234. return -EINVAL;
  235. }
  236. assert(!v->unmasked);
  237. ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
  238. if (ret < 0) {
  239. return ret;
  240. }
  241. kvm_irqchip_commit_routes(kvm_state);
  242. ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
  243. if (ret < 0) {
  244. return ret;
  245. }
  246. v->unmasked = true;
  247. return 0;
  248. }
  249. static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
  250. {
  251. IVShmemState *s = IVSHMEM_COMMON(dev);
  252. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  253. MSIVector *v = &s->msi_vectors[vector];
  254. int ret;
  255. IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
  256. if (!v->pdev) {
  257. error_report("ivshmem: vector %d route does not exist", vector);
  258. return;
  259. }
  260. assert(v->unmasked);
  261. ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
  262. if (ret < 0) {
  263. error_report("remove_irqfd_notifier_gsi failed");
  264. return;
  265. }
  266. v->unmasked = false;
  267. }
  268. static void ivshmem_vector_poll(PCIDevice *dev,
  269. unsigned int vector_start,
  270. unsigned int vector_end)
  271. {
  272. IVShmemState *s = IVSHMEM_COMMON(dev);
  273. unsigned int vector;
  274. IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
  275. vector_end = MIN(vector_end, s->vectors);
  276. for (vector = vector_start; vector < vector_end; vector++) {
  277. EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
  278. if (!msix_is_masked(dev, vector)) {
  279. continue;
  280. }
  281. if (event_notifier_test_and_clear(notifier)) {
  282. msix_set_pending(dev, vector);
  283. }
  284. }
  285. }
  286. static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
  287. int vector)
  288. {
  289. int eventfd = event_notifier_get_fd(n);
  290. assert(!s->msi_vectors[vector].pdev);
  291. s->msi_vectors[vector].pdev = PCI_DEVICE(s);
  292. qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
  293. NULL, &s->msi_vectors[vector]);
  294. }
  295. static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
  296. {
  297. memory_region_add_eventfd(&s->ivshmem_mmio,
  298. DOORBELL,
  299. 4,
  300. true,
  301. (posn << 16) | i,
  302. &s->peers[posn].eventfds[i]);
  303. }
  304. static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
  305. {
  306. memory_region_del_eventfd(&s->ivshmem_mmio,
  307. DOORBELL,
  308. 4,
  309. true,
  310. (posn << 16) | i,
  311. &s->peers[posn].eventfds[i]);
  312. }
  313. static void close_peer_eventfds(IVShmemState *s, int posn)
  314. {
  315. int i, n;
  316. assert(posn >= 0 && posn < s->nb_peers);
  317. n = s->peers[posn].nb_eventfds;
  318. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  319. memory_region_transaction_begin();
  320. for (i = 0; i < n; i++) {
  321. ivshmem_del_eventfd(s, posn, i);
  322. }
  323. memory_region_transaction_commit();
  324. }
  325. for (i = 0; i < n; i++) {
  326. event_notifier_cleanup(&s->peers[posn].eventfds[i]);
  327. }
  328. g_free(s->peers[posn].eventfds);
  329. s->peers[posn].nb_eventfds = 0;
  330. }
  331. static void resize_peers(IVShmemState *s, int nb_peers)
  332. {
  333. int old_nb_peers = s->nb_peers;
  334. int i;
  335. assert(nb_peers > old_nb_peers);
  336. IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
  337. s->peers = g_realloc(s->peers, nb_peers * sizeof(Peer));
  338. s->nb_peers = nb_peers;
  339. for (i = old_nb_peers; i < nb_peers; i++) {
  340. s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
  341. s->peers[i].nb_eventfds = 0;
  342. }
  343. }
  344. static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
  345. Error **errp)
  346. {
  347. PCIDevice *pdev = PCI_DEVICE(s);
  348. int ret;
  349. IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
  350. assert(!s->msi_vectors[vector].pdev);
  351. ret = kvm_irqchip_add_msi_route(kvm_state, vector, pdev);
  352. if (ret < 0) {
  353. error_setg(errp, "kvm_irqchip_add_msi_route failed");
  354. return;
  355. }
  356. s->msi_vectors[vector].virq = ret;
  357. s->msi_vectors[vector].pdev = pdev;
  358. }
  359. static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
  360. {
  361. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  362. bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
  363. ivshmem_has_feature(s, IVSHMEM_MSI);
  364. PCIDevice *pdev = PCI_DEVICE(s);
  365. Error *err = NULL;
  366. IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
  367. if (!with_irqfd) {
  368. IVSHMEM_DPRINTF("with eventfd\n");
  369. watch_vector_notifier(s, n, vector);
  370. } else if (msix_enabled(pdev)) {
  371. IVSHMEM_DPRINTF("with irqfd\n");
  372. ivshmem_add_kvm_msi_virq(s, vector, &err);
  373. if (err) {
  374. error_propagate(errp, err);
  375. return;
  376. }
  377. if (!msix_is_masked(pdev, vector)) {
  378. kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
  379. s->msi_vectors[vector].virq);
  380. /* TODO handle error */
  381. }
  382. } else {
  383. /* it will be delayed until msix is enabled, in write_config */
  384. IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
  385. }
  386. }
  387. static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
  388. {
  389. Error *local_err = NULL;
  390. struct stat buf;
  391. size_t size;
  392. if (s->ivshmem_bar2) {
  393. error_setg(errp, "server sent unexpected shared memory message");
  394. close(fd);
  395. return;
  396. }
  397. if (fstat(fd, &buf) < 0) {
  398. error_setg_errno(errp, errno,
  399. "can't determine size of shared memory sent by server");
  400. close(fd);
  401. return;
  402. }
  403. size = buf.st_size;
  404. /* mmap the region and map into the BAR2 */
  405. memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
  406. "ivshmem.bar2", size, true, fd, &local_err);
  407. if (local_err) {
  408. error_propagate(errp, local_err);
  409. return;
  410. }
  411. s->ivshmem_bar2 = &s->server_bar2;
  412. }
  413. static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
  414. Error **errp)
  415. {
  416. IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
  417. if (posn >= s->nb_peers || posn == s->vm_id) {
  418. error_setg(errp, "invalid peer %d", posn);
  419. return;
  420. }
  421. close_peer_eventfds(s, posn);
  422. }
  423. static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
  424. Error **errp)
  425. {
  426. Peer *peer = &s->peers[posn];
  427. int vector;
  428. /*
  429. * The N-th connect message for this peer comes with the file
  430. * descriptor for vector N-1. Count messages to find the vector.
  431. */
  432. if (peer->nb_eventfds >= s->vectors) {
  433. error_setg(errp, "Too many eventfd received, device has %d vectors",
  434. s->vectors);
  435. close(fd);
  436. return;
  437. }
  438. vector = peer->nb_eventfds++;
  439. IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
  440. event_notifier_init_fd(&peer->eventfds[vector], fd);
  441. fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
  442. if (posn == s->vm_id) {
  443. setup_interrupt(s, vector, errp);
  444. /* TODO do we need to handle the error? */
  445. }
  446. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  447. ivshmem_add_eventfd(s, posn, vector);
  448. }
  449. }
  450. static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
  451. {
  452. IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
  453. if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
  454. error_setg(errp, "server sent invalid message %" PRId64, msg);
  455. close(fd);
  456. return;
  457. }
  458. if (msg == -1) {
  459. process_msg_shmem(s, fd, errp);
  460. return;
  461. }
  462. if (msg >= s->nb_peers) {
  463. resize_peers(s, msg + 1);
  464. }
  465. if (fd >= 0) {
  466. process_msg_connect(s, msg, fd, errp);
  467. } else {
  468. process_msg_disconnect(s, msg, errp);
  469. }
  470. }
  471. static int ivshmem_can_receive(void *opaque)
  472. {
  473. IVShmemState *s = opaque;
  474. assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
  475. return sizeof(s->msg_buf) - s->msg_buffered_bytes;
  476. }
  477. static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
  478. {
  479. IVShmemState *s = opaque;
  480. Error *err = NULL;
  481. int fd;
  482. int64_t msg;
  483. assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
  484. memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
  485. s->msg_buffered_bytes += size;
  486. if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
  487. return;
  488. }
  489. msg = le64_to_cpu(s->msg_buf);
  490. s->msg_buffered_bytes = 0;
  491. fd = qemu_chr_fe_get_msgfd(&s->server_chr);
  492. process_msg(s, msg, fd, &err);
  493. if (err) {
  494. error_report_err(err);
  495. }
  496. }
  497. static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
  498. {
  499. int64_t msg;
  500. int n, ret;
  501. n = 0;
  502. do {
  503. ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
  504. sizeof(msg) - n);
  505. if (ret < 0) {
  506. if (ret == -EINTR) {
  507. continue;
  508. }
  509. error_setg_errno(errp, -ret, "read from server failed");
  510. return INT64_MIN;
  511. }
  512. n += ret;
  513. } while (n < sizeof(msg));
  514. *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
  515. return le64_to_cpu(msg);
  516. }
  517. static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
  518. {
  519. Error *err = NULL;
  520. int64_t msg;
  521. int fd;
  522. msg = ivshmem_recv_msg(s, &fd, &err);
  523. if (err) {
  524. error_propagate(errp, err);
  525. return;
  526. }
  527. if (msg != IVSHMEM_PROTOCOL_VERSION) {
  528. error_setg(errp, "server sent version %" PRId64 ", expecting %d",
  529. msg, IVSHMEM_PROTOCOL_VERSION);
  530. return;
  531. }
  532. if (fd != -1) {
  533. error_setg(errp, "server sent invalid version message");
  534. return;
  535. }
  536. /*
  537. * ivshmem-server sends the remaining initial messages in a fixed
  538. * order, but the device has always accepted them in any order.
  539. * Stay as compatible as practical, just in case people use
  540. * servers that behave differently.
  541. */
  542. /*
  543. * ivshmem_device_spec.txt has always required the ID message
  544. * right here, and ivshmem-server has always complied. However,
  545. * older versions of the device accepted it out of order, but
  546. * broke when an interrupt setup message arrived before it.
  547. */
  548. msg = ivshmem_recv_msg(s, &fd, &err);
  549. if (err) {
  550. error_propagate(errp, err);
  551. return;
  552. }
  553. if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
  554. error_setg(errp, "server sent invalid ID message");
  555. return;
  556. }
  557. s->vm_id = msg;
  558. /*
  559. * Receive more messages until we got shared memory.
  560. */
  561. do {
  562. msg = ivshmem_recv_msg(s, &fd, &err);
  563. if (err) {
  564. error_propagate(errp, err);
  565. return;
  566. }
  567. process_msg(s, msg, fd, &err);
  568. if (err) {
  569. error_propagate(errp, err);
  570. return;
  571. }
  572. } while (msg != -1);
  573. /*
  574. * This function must either map the shared memory or fail. The
  575. * loop above ensures that: it terminates normally only after it
  576. * successfully processed the server's shared memory message.
  577. * Assert that actually mapped the shared memory:
  578. */
  579. assert(s->ivshmem_bar2);
  580. }
  581. /* Select the MSI-X vectors used by device.
  582. * ivshmem maps events to vectors statically, so
  583. * we just enable all vectors on init and after reset. */
  584. static void ivshmem_msix_vector_use(IVShmemState *s)
  585. {
  586. PCIDevice *d = PCI_DEVICE(s);
  587. int i;
  588. for (i = 0; i < s->vectors; i++) {
  589. msix_vector_use(d, i);
  590. }
  591. }
  592. static void ivshmem_disable_irqfd(IVShmemState *s);
  593. static void ivshmem_reset(DeviceState *d)
  594. {
  595. IVShmemState *s = IVSHMEM_COMMON(d);
  596. ivshmem_disable_irqfd(s);
  597. s->intrstatus = 0;
  598. s->intrmask = 0;
  599. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  600. ivshmem_msix_vector_use(s);
  601. }
  602. }
  603. static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
  604. {
  605. /* allocate QEMU callback data for receiving interrupts */
  606. s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector));
  607. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  608. if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
  609. return -1;
  610. }
  611. IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
  612. ivshmem_msix_vector_use(s);
  613. }
  614. return 0;
  615. }
  616. static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
  617. {
  618. IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
  619. if (s->msi_vectors[vector].pdev == NULL) {
  620. return;
  621. }
  622. /* it was cleaned when masked in the frontend. */
  623. kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
  624. s->msi_vectors[vector].pdev = NULL;
  625. }
  626. static void ivshmem_enable_irqfd(IVShmemState *s)
  627. {
  628. PCIDevice *pdev = PCI_DEVICE(s);
  629. int i;
  630. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  631. Error *err = NULL;
  632. ivshmem_add_kvm_msi_virq(s, i, &err);
  633. if (err) {
  634. error_report_err(err);
  635. goto undo;
  636. }
  637. }
  638. if (msix_set_vector_notifiers(pdev,
  639. ivshmem_vector_unmask,
  640. ivshmem_vector_mask,
  641. ivshmem_vector_poll)) {
  642. error_report("ivshmem: msix_set_vector_notifiers failed");
  643. goto undo;
  644. }
  645. return;
  646. undo:
  647. while (--i >= 0) {
  648. ivshmem_remove_kvm_msi_virq(s, i);
  649. }
  650. }
  651. static void ivshmem_disable_irqfd(IVShmemState *s)
  652. {
  653. PCIDevice *pdev = PCI_DEVICE(s);
  654. int i;
  655. if (!pdev->msix_vector_use_notifier) {
  656. return;
  657. }
  658. msix_unset_vector_notifiers(pdev);
  659. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  660. /*
  661. * MSI-X is already disabled here so msix_unset_vector_notifiers()
  662. * didn't call our release notifier. Do it now to keep our masks and
  663. * unmasks balanced.
  664. */
  665. if (s->msi_vectors[i].unmasked) {
  666. ivshmem_vector_mask(pdev, i);
  667. }
  668. ivshmem_remove_kvm_msi_virq(s, i);
  669. }
  670. }
  671. static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
  672. uint32_t val, int len)
  673. {
  674. IVShmemState *s = IVSHMEM_COMMON(pdev);
  675. int is_enabled, was_enabled = msix_enabled(pdev);
  676. pci_default_write_config(pdev, address, val, len);
  677. is_enabled = msix_enabled(pdev);
  678. if (kvm_msi_via_irqfd_enabled()) {
  679. if (!was_enabled && is_enabled) {
  680. ivshmem_enable_irqfd(s);
  681. } else if (was_enabled && !is_enabled) {
  682. ivshmem_disable_irqfd(s);
  683. }
  684. }
  685. }
  686. static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
  687. {
  688. IVShmemState *s = IVSHMEM_COMMON(dev);
  689. Error *err = NULL;
  690. uint8_t *pci_conf;
  691. /* IRQFD requires MSI */
  692. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
  693. !ivshmem_has_feature(s, IVSHMEM_MSI)) {
  694. error_setg(errp, "ioeventfd/irqfd requires MSI");
  695. return;
  696. }
  697. pci_conf = dev->config;
  698. pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
  699. memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
  700. "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
  701. /* region for registers*/
  702. pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
  703. &s->ivshmem_mmio);
  704. if (s->hostmem != NULL) {
  705. IVSHMEM_DPRINTF("using hostmem\n");
  706. s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
  707. host_memory_backend_set_mapped(s->hostmem, true);
  708. } else {
  709. Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
  710. assert(chr);
  711. IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
  712. chr->filename);
  713. /* we allocate enough space for 16 peers and grow as needed */
  714. resize_peers(s, 16);
  715. /*
  716. * Receive setup messages from server synchronously.
  717. * Older versions did it asynchronously, but that creates a
  718. * number of entertaining race conditions.
  719. */
  720. ivshmem_recv_setup(s, &err);
  721. if (err) {
  722. error_propagate(errp, err);
  723. return;
  724. }
  725. if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
  726. error_setg(errp,
  727. "master must connect to the server before any peers");
  728. return;
  729. }
  730. qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
  731. ivshmem_read, NULL, NULL, s, NULL, true);
  732. if (ivshmem_setup_interrupts(s, errp) < 0) {
  733. error_prepend(errp, "Failed to initialize interrupts: ");
  734. return;
  735. }
  736. }
  737. if (s->master == ON_OFF_AUTO_AUTO) {
  738. s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
  739. }
  740. if (!ivshmem_is_master(s)) {
  741. error_setg(&s->migration_blocker,
  742. "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
  743. if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
  744. error_free(s->migration_blocker);
  745. return;
  746. }
  747. }
  748. vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
  749. pci_register_bar(PCI_DEVICE(s), 2,
  750. PCI_BASE_ADDRESS_SPACE_MEMORY |
  751. PCI_BASE_ADDRESS_MEM_PREFETCH |
  752. PCI_BASE_ADDRESS_MEM_TYPE_64,
  753. s->ivshmem_bar2);
  754. }
  755. static void ivshmem_exit(PCIDevice *dev)
  756. {
  757. IVShmemState *s = IVSHMEM_COMMON(dev);
  758. int i;
  759. if (s->migration_blocker) {
  760. migrate_del_blocker(s->migration_blocker);
  761. error_free(s->migration_blocker);
  762. }
  763. if (memory_region_is_mapped(s->ivshmem_bar2)) {
  764. if (!s->hostmem) {
  765. void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
  766. int fd;
  767. if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
  768. error_report("Failed to munmap shared memory %s",
  769. strerror(errno));
  770. }
  771. fd = memory_region_get_fd(s->ivshmem_bar2);
  772. close(fd);
  773. }
  774. vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
  775. }
  776. if (s->hostmem) {
  777. host_memory_backend_set_mapped(s->hostmem, false);
  778. }
  779. if (s->peers) {
  780. for (i = 0; i < s->nb_peers; i++) {
  781. close_peer_eventfds(s, i);
  782. }
  783. g_free(s->peers);
  784. }
  785. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  786. msix_uninit_exclusive_bar(dev);
  787. }
  788. g_free(s->msi_vectors);
  789. }
  790. static int ivshmem_pre_load(void *opaque)
  791. {
  792. IVShmemState *s = opaque;
  793. if (!ivshmem_is_master(s)) {
  794. error_report("'peer' devices are not migratable");
  795. return -EINVAL;
  796. }
  797. return 0;
  798. }
  799. static int ivshmem_post_load(void *opaque, int version_id)
  800. {
  801. IVShmemState *s = opaque;
  802. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  803. ivshmem_msix_vector_use(s);
  804. }
  805. return 0;
  806. }
  807. static void ivshmem_common_class_init(ObjectClass *klass, void *data)
  808. {
  809. DeviceClass *dc = DEVICE_CLASS(klass);
  810. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  811. k->realize = ivshmem_common_realize;
  812. k->exit = ivshmem_exit;
  813. k->config_write = ivshmem_write_config;
  814. k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
  815. k->device_id = PCI_DEVICE_ID_IVSHMEM;
  816. k->class_id = PCI_CLASS_MEMORY_RAM;
  817. k->revision = 1;
  818. dc->reset = ivshmem_reset;
  819. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  820. dc->desc = "Inter-VM shared memory";
  821. }
  822. static const TypeInfo ivshmem_common_info = {
  823. .name = TYPE_IVSHMEM_COMMON,
  824. .parent = TYPE_PCI_DEVICE,
  825. .instance_size = sizeof(IVShmemState),
  826. .abstract = true,
  827. .class_init = ivshmem_common_class_init,
  828. .interfaces = (InterfaceInfo[]) {
  829. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  830. { },
  831. },
  832. };
  833. static const VMStateDescription ivshmem_plain_vmsd = {
  834. .name = TYPE_IVSHMEM_PLAIN,
  835. .version_id = 0,
  836. .minimum_version_id = 0,
  837. .pre_load = ivshmem_pre_load,
  838. .post_load = ivshmem_post_load,
  839. .fields = (VMStateField[]) {
  840. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  841. VMSTATE_UINT32(intrstatus, IVShmemState),
  842. VMSTATE_UINT32(intrmask, IVShmemState),
  843. VMSTATE_END_OF_LIST()
  844. },
  845. };
  846. static Property ivshmem_plain_properties[] = {
  847. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  848. DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
  849. HostMemoryBackend *),
  850. DEFINE_PROP_END_OF_LIST(),
  851. };
  852. static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
  853. {
  854. IVShmemState *s = IVSHMEM_COMMON(dev);
  855. if (!s->hostmem) {
  856. error_setg(errp, "You must specify a 'memdev'");
  857. return;
  858. } else if (host_memory_backend_is_mapped(s->hostmem)) {
  859. error_setg(errp, "can't use already busy memdev: %s",
  860. object_get_canonical_path_component(OBJECT(s->hostmem)));
  861. return;
  862. }
  863. ivshmem_common_realize(dev, errp);
  864. }
  865. static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
  866. {
  867. DeviceClass *dc = DEVICE_CLASS(klass);
  868. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  869. k->realize = ivshmem_plain_realize;
  870. device_class_set_props(dc, ivshmem_plain_properties);
  871. dc->vmsd = &ivshmem_plain_vmsd;
  872. }
  873. static const TypeInfo ivshmem_plain_info = {
  874. .name = TYPE_IVSHMEM_PLAIN,
  875. .parent = TYPE_IVSHMEM_COMMON,
  876. .instance_size = sizeof(IVShmemState),
  877. .class_init = ivshmem_plain_class_init,
  878. };
  879. static const VMStateDescription ivshmem_doorbell_vmsd = {
  880. .name = TYPE_IVSHMEM_DOORBELL,
  881. .version_id = 0,
  882. .minimum_version_id = 0,
  883. .pre_load = ivshmem_pre_load,
  884. .post_load = ivshmem_post_load,
  885. .fields = (VMStateField[]) {
  886. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  887. VMSTATE_MSIX(parent_obj, IVShmemState),
  888. VMSTATE_UINT32(intrstatus, IVShmemState),
  889. VMSTATE_UINT32(intrmask, IVShmemState),
  890. VMSTATE_END_OF_LIST()
  891. },
  892. };
  893. static Property ivshmem_doorbell_properties[] = {
  894. DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
  895. DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
  896. DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
  897. true),
  898. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  899. DEFINE_PROP_END_OF_LIST(),
  900. };
  901. static void ivshmem_doorbell_init(Object *obj)
  902. {
  903. IVShmemState *s = IVSHMEM_DOORBELL(obj);
  904. s->features |= (1 << IVSHMEM_MSI);
  905. }
  906. static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
  907. {
  908. IVShmemState *s = IVSHMEM_COMMON(dev);
  909. if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
  910. error_setg(errp, "You must specify a 'chardev'");
  911. return;
  912. }
  913. ivshmem_common_realize(dev, errp);
  914. }
  915. static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
  916. {
  917. DeviceClass *dc = DEVICE_CLASS(klass);
  918. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  919. k->realize = ivshmem_doorbell_realize;
  920. device_class_set_props(dc, ivshmem_doorbell_properties);
  921. dc->vmsd = &ivshmem_doorbell_vmsd;
  922. }
  923. static const TypeInfo ivshmem_doorbell_info = {
  924. .name = TYPE_IVSHMEM_DOORBELL,
  925. .parent = TYPE_IVSHMEM_COMMON,
  926. .instance_size = sizeof(IVShmemState),
  927. .instance_init = ivshmem_doorbell_init,
  928. .class_init = ivshmem_doorbell_class_init,
  929. };
  930. static void ivshmem_register_types(void)
  931. {
  932. type_register_static(&ivshmem_common_info);
  933. type_register_static(&ivshmem_plain_info);
  934. type_register_static(&ivshmem_doorbell_info);
  935. }
  936. type_init(ivshmem_register_types)