2
0

ivshmem.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. /*
  2. * Inter-VM Shared Memory PCI device.
  3. *
  4. * Author:
  5. * Cam Macdonell <cam@cs.ualberta.ca>
  6. *
  7. * Based On: cirrus_vga.c
  8. * Copyright (c) 2004 Fabrice Bellard
  9. * Copyright (c) 2004 Makoto Suzuki (suzu)
  10. *
  11. * and rtl8139.c
  12. * Copyright (c) 2006 Igor Kovalenko
  13. *
  14. * This code is licensed under the GNU GPL v2.
  15. *
  16. * Contributions after 2012-01-13 are licensed under the terms of the
  17. * GNU GPL, version 2 or (at your option) any later version.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "qemu/units.h"
  21. #include "qapi/error.h"
  22. #include "qemu/cutils.h"
  23. #include "hw/pci/pci.h"
  24. #include "hw/qdev-properties.h"
  25. #include "hw/qdev-properties-system.h"
  26. #include "hw/pci/msi.h"
  27. #include "hw/pci/msix.h"
  28. #include "sysemu/kvm.h"
  29. #include "migration/blocker.h"
  30. #include "migration/vmstate.h"
  31. #include "qemu/error-report.h"
  32. #include "qemu/event_notifier.h"
  33. #include "qemu/module.h"
  34. #include "qom/object_interfaces.h"
  35. #include "chardev/char-fe.h"
  36. #include "sysemu/hostmem.h"
  37. #include "qapi/visitor.h"
  38. #include "hw/misc/ivshmem.h"
  39. #include "qom/object.h"
  40. #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
  41. #define PCI_DEVICE_ID_IVSHMEM 0x1110
  42. #define IVSHMEM_MAX_PEERS UINT16_MAX
  43. #define IVSHMEM_IOEVENTFD 0
  44. #define IVSHMEM_MSI 1
  45. #define IVSHMEM_REG_BAR_SIZE 0x100
  46. #define IVSHMEM_DEBUG 0
  47. #define IVSHMEM_DPRINTF(fmt, ...) \
  48. do { \
  49. if (IVSHMEM_DEBUG) { \
  50. printf("IVSHMEM: " fmt, ## __VA_ARGS__); \
  51. } \
  52. } while (0)
  53. #define TYPE_IVSHMEM_COMMON "ivshmem-common"
  54. typedef struct IVShmemState IVShmemState;
  55. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_COMMON,
  56. TYPE_IVSHMEM_COMMON)
  57. #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  58. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_PLAIN,
  59. TYPE_IVSHMEM_PLAIN)
  60. #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  61. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_DOORBELL,
  62. TYPE_IVSHMEM_DOORBELL)
  63. #define TYPE_IVSHMEM "ivshmem"
  64. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM,
  65. TYPE_IVSHMEM)
  66. typedef struct Peer {
  67. int nb_eventfds;
  68. EventNotifier *eventfds;
  69. } Peer;
  70. typedef struct MSIVector {
  71. PCIDevice *pdev;
  72. int virq;
  73. bool unmasked;
  74. } MSIVector;
  75. struct IVShmemState {
  76. /*< private >*/
  77. PCIDevice parent_obj;
  78. /*< public >*/
  79. uint32_t features;
  80. /* exactly one of these two may be set */
  81. HostMemoryBackend *hostmem; /* with interrupts */
  82. CharBackend server_chr; /* without interrupts */
  83. /* registers */
  84. uint32_t intrmask;
  85. uint32_t intrstatus;
  86. int vm_id;
  87. /* BARs */
  88. MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */
  89. MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
  90. MemoryRegion server_bar2; /* used with server_chr */
  91. /* interrupt support */
  92. Peer *peers;
  93. int nb_peers; /* space in @peers[] */
  94. uint32_t vectors;
  95. MSIVector *msi_vectors;
  96. uint64_t msg_buf; /* buffer for receiving server messages */
  97. int msg_buffered_bytes; /* #bytes in @msg_buf */
  98. /* migration stuff */
  99. OnOffAuto master;
  100. Error *migration_blocker;
  101. };
  102. /* registers for the Inter-VM shared memory device */
  103. enum ivshmem_registers {
  104. INTRMASK = 0,
  105. INTRSTATUS = 4,
  106. IVPOSITION = 8,
  107. DOORBELL = 12,
  108. };
  109. static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
  110. unsigned int feature) {
  111. return (ivs->features & (1 << feature));
  112. }
  113. static inline bool ivshmem_is_master(IVShmemState *s)
  114. {
  115. assert(s->master != ON_OFF_AUTO_AUTO);
  116. return s->master == ON_OFF_AUTO_ON;
  117. }
  118. static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
  119. {
  120. IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
  121. s->intrmask = val;
  122. }
  123. static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
  124. {
  125. uint32_t ret = s->intrmask;
  126. IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
  127. return ret;
  128. }
  129. static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
  130. {
  131. IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
  132. s->intrstatus = val;
  133. }
  134. static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
  135. {
  136. uint32_t ret = s->intrstatus;
  137. /* reading ISR clears all interrupts */
  138. s->intrstatus = 0;
  139. return ret;
  140. }
  141. static void ivshmem_io_write(void *opaque, hwaddr addr,
  142. uint64_t val, unsigned size)
  143. {
  144. IVShmemState *s = opaque;
  145. uint16_t dest = val >> 16;
  146. uint16_t vector = val & 0xff;
  147. addr &= 0xfc;
  148. IVSHMEM_DPRINTF("writing to addr " HWADDR_FMT_plx "\n", addr);
  149. switch (addr)
  150. {
  151. case INTRMASK:
  152. ivshmem_IntrMask_write(s, val);
  153. break;
  154. case INTRSTATUS:
  155. ivshmem_IntrStatus_write(s, val);
  156. break;
  157. case DOORBELL:
  158. /* check that dest VM ID is reasonable */
  159. if (dest >= s->nb_peers) {
  160. IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
  161. break;
  162. }
  163. /* check doorbell range */
  164. if (vector < s->peers[dest].nb_eventfds) {
  165. IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
  166. event_notifier_set(&s->peers[dest].eventfds[vector]);
  167. } else {
  168. IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
  169. vector, dest);
  170. }
  171. break;
  172. default:
  173. IVSHMEM_DPRINTF("Unhandled write " HWADDR_FMT_plx "\n", addr);
  174. }
  175. }
  176. static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
  177. unsigned size)
  178. {
  179. IVShmemState *s = opaque;
  180. uint32_t ret;
  181. switch (addr)
  182. {
  183. case INTRMASK:
  184. ret = ivshmem_IntrMask_read(s);
  185. break;
  186. case INTRSTATUS:
  187. ret = ivshmem_IntrStatus_read(s);
  188. break;
  189. case IVPOSITION:
  190. ret = s->vm_id;
  191. break;
  192. default:
  193. IVSHMEM_DPRINTF("why are we reading " HWADDR_FMT_plx "\n", addr);
  194. ret = 0;
  195. }
  196. return ret;
  197. }
  198. static const MemoryRegionOps ivshmem_mmio_ops = {
  199. .read = ivshmem_io_read,
  200. .write = ivshmem_io_write,
  201. .endianness = DEVICE_LITTLE_ENDIAN,
  202. .impl = {
  203. .min_access_size = 4,
  204. .max_access_size = 4,
  205. },
  206. };
  207. static void ivshmem_vector_notify(void *opaque)
  208. {
  209. MSIVector *entry = opaque;
  210. PCIDevice *pdev = entry->pdev;
  211. IVShmemState *s = IVSHMEM_COMMON(pdev);
  212. int vector = entry - s->msi_vectors;
  213. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  214. if (!event_notifier_test_and_clear(n)) {
  215. return;
  216. }
  217. IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
  218. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  219. if (msix_enabled(pdev)) {
  220. msix_notify(pdev, vector);
  221. }
  222. } else {
  223. ivshmem_IntrStatus_write(s, 1);
  224. }
  225. }
  226. static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
  227. MSIMessage msg)
  228. {
  229. IVShmemState *s = IVSHMEM_COMMON(dev);
  230. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  231. MSIVector *v = &s->msi_vectors[vector];
  232. int ret;
  233. IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
  234. if (!v->pdev) {
  235. error_report("ivshmem: vector %d route does not exist", vector);
  236. return -EINVAL;
  237. }
  238. assert(!v->unmasked);
  239. ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
  240. if (ret < 0) {
  241. return ret;
  242. }
  243. kvm_irqchip_commit_routes(kvm_state);
  244. ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
  245. if (ret < 0) {
  246. return ret;
  247. }
  248. v->unmasked = true;
  249. return 0;
  250. }
  251. static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
  252. {
  253. IVShmemState *s = IVSHMEM_COMMON(dev);
  254. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  255. MSIVector *v = &s->msi_vectors[vector];
  256. int ret;
  257. IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
  258. if (!v->pdev) {
  259. error_report("ivshmem: vector %d route does not exist", vector);
  260. return;
  261. }
  262. assert(v->unmasked);
  263. ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
  264. if (ret < 0) {
  265. error_report("remove_irqfd_notifier_gsi failed");
  266. return;
  267. }
  268. v->unmasked = false;
  269. }
  270. static void ivshmem_vector_poll(PCIDevice *dev,
  271. unsigned int vector_start,
  272. unsigned int vector_end)
  273. {
  274. IVShmemState *s = IVSHMEM_COMMON(dev);
  275. unsigned int vector;
  276. IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
  277. vector_end = MIN(vector_end, s->vectors);
  278. for (vector = vector_start; vector < vector_end; vector++) {
  279. EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
  280. if (!msix_is_masked(dev, vector)) {
  281. continue;
  282. }
  283. if (event_notifier_test_and_clear(notifier)) {
  284. msix_set_pending(dev, vector);
  285. }
  286. }
  287. }
  288. static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
  289. int vector)
  290. {
  291. int eventfd = event_notifier_get_fd(n);
  292. assert(!s->msi_vectors[vector].pdev);
  293. s->msi_vectors[vector].pdev = PCI_DEVICE(s);
  294. qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
  295. NULL, &s->msi_vectors[vector]);
  296. }
  297. static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
  298. {
  299. memory_region_add_eventfd(&s->ivshmem_mmio,
  300. DOORBELL,
  301. 4,
  302. true,
  303. (posn << 16) | i,
  304. &s->peers[posn].eventfds[i]);
  305. }
  306. static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
  307. {
  308. memory_region_del_eventfd(&s->ivshmem_mmio,
  309. DOORBELL,
  310. 4,
  311. true,
  312. (posn << 16) | i,
  313. &s->peers[posn].eventfds[i]);
  314. }
  315. static void close_peer_eventfds(IVShmemState *s, int posn)
  316. {
  317. int i, n;
  318. assert(posn >= 0 && posn < s->nb_peers);
  319. n = s->peers[posn].nb_eventfds;
  320. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  321. memory_region_transaction_begin();
  322. for (i = 0; i < n; i++) {
  323. ivshmem_del_eventfd(s, posn, i);
  324. }
  325. memory_region_transaction_commit();
  326. }
  327. for (i = 0; i < n; i++) {
  328. event_notifier_cleanup(&s->peers[posn].eventfds[i]);
  329. }
  330. g_free(s->peers[posn].eventfds);
  331. s->peers[posn].nb_eventfds = 0;
  332. }
  333. static void resize_peers(IVShmemState *s, int nb_peers)
  334. {
  335. int old_nb_peers = s->nb_peers;
  336. int i;
  337. assert(nb_peers > old_nb_peers);
  338. IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
  339. s->peers = g_renew(Peer, s->peers, nb_peers);
  340. s->nb_peers = nb_peers;
  341. for (i = old_nb_peers; i < nb_peers; i++) {
  342. s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
  343. s->peers[i].nb_eventfds = 0;
  344. }
  345. }
  346. static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
  347. Error **errp)
  348. {
  349. PCIDevice *pdev = PCI_DEVICE(s);
  350. KVMRouteChange c;
  351. int ret;
  352. IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
  353. assert(!s->msi_vectors[vector].pdev);
  354. c = kvm_irqchip_begin_route_changes(kvm_state);
  355. ret = kvm_irqchip_add_msi_route(&c, vector, pdev);
  356. if (ret < 0) {
  357. error_setg(errp, "kvm_irqchip_add_msi_route failed");
  358. return;
  359. }
  360. kvm_irqchip_commit_route_changes(&c);
  361. s->msi_vectors[vector].virq = ret;
  362. s->msi_vectors[vector].pdev = pdev;
  363. }
  364. static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
  365. {
  366. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  367. bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
  368. ivshmem_has_feature(s, IVSHMEM_MSI);
  369. PCIDevice *pdev = PCI_DEVICE(s);
  370. Error *err = NULL;
  371. IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
  372. if (!with_irqfd) {
  373. IVSHMEM_DPRINTF("with eventfd\n");
  374. watch_vector_notifier(s, n, vector);
  375. } else if (msix_enabled(pdev)) {
  376. IVSHMEM_DPRINTF("with irqfd\n");
  377. ivshmem_add_kvm_msi_virq(s, vector, &err);
  378. if (err) {
  379. error_propagate(errp, err);
  380. return;
  381. }
  382. if (!msix_is_masked(pdev, vector)) {
  383. kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
  384. s->msi_vectors[vector].virq);
  385. /* TODO handle error */
  386. }
  387. } else {
  388. /* it will be delayed until msix is enabled, in write_config */
  389. IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
  390. }
  391. }
  392. static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
  393. {
  394. Error *local_err = NULL;
  395. struct stat buf;
  396. size_t size;
  397. if (s->ivshmem_bar2) {
  398. error_setg(errp, "server sent unexpected shared memory message");
  399. close(fd);
  400. return;
  401. }
  402. if (fstat(fd, &buf) < 0) {
  403. error_setg_errno(errp, errno,
  404. "can't determine size of shared memory sent by server");
  405. close(fd);
  406. return;
  407. }
  408. size = buf.st_size;
  409. /* mmap the region and map into the BAR2 */
  410. memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
  411. size, RAM_SHARED, fd, 0, &local_err);
  412. if (local_err) {
  413. error_propagate(errp, local_err);
  414. return;
  415. }
  416. s->ivshmem_bar2 = &s->server_bar2;
  417. }
  418. static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
  419. Error **errp)
  420. {
  421. IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
  422. if (posn >= s->nb_peers || posn == s->vm_id) {
  423. error_setg(errp, "invalid peer %d", posn);
  424. return;
  425. }
  426. close_peer_eventfds(s, posn);
  427. }
  428. static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
  429. Error **errp)
  430. {
  431. Peer *peer = &s->peers[posn];
  432. int vector;
  433. /*
  434. * The N-th connect message for this peer comes with the file
  435. * descriptor for vector N-1. Count messages to find the vector.
  436. */
  437. if (peer->nb_eventfds >= s->vectors) {
  438. error_setg(errp, "Too many eventfd received, device has %d vectors",
  439. s->vectors);
  440. close(fd);
  441. return;
  442. }
  443. vector = peer->nb_eventfds++;
  444. IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
  445. event_notifier_init_fd(&peer->eventfds[vector], fd);
  446. g_unix_set_fd_nonblocking(fd, true, NULL); /* msix/irqfd poll non block */
  447. if (posn == s->vm_id) {
  448. setup_interrupt(s, vector, errp);
  449. /* TODO do we need to handle the error? */
  450. }
  451. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  452. ivshmem_add_eventfd(s, posn, vector);
  453. }
  454. }
  455. static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
  456. {
  457. IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
  458. if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
  459. error_setg(errp, "server sent invalid message %" PRId64, msg);
  460. close(fd);
  461. return;
  462. }
  463. if (msg == -1) {
  464. process_msg_shmem(s, fd, errp);
  465. return;
  466. }
  467. if (msg >= s->nb_peers) {
  468. resize_peers(s, msg + 1);
  469. }
  470. if (fd >= 0) {
  471. process_msg_connect(s, msg, fd, errp);
  472. } else {
  473. process_msg_disconnect(s, msg, errp);
  474. }
  475. }
  476. static int ivshmem_can_receive(void *opaque)
  477. {
  478. IVShmemState *s = opaque;
  479. assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
  480. return sizeof(s->msg_buf) - s->msg_buffered_bytes;
  481. }
  482. static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
  483. {
  484. IVShmemState *s = opaque;
  485. Error *err = NULL;
  486. int fd;
  487. int64_t msg;
  488. assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
  489. memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
  490. s->msg_buffered_bytes += size;
  491. if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
  492. return;
  493. }
  494. msg = le64_to_cpu(s->msg_buf);
  495. s->msg_buffered_bytes = 0;
  496. fd = qemu_chr_fe_get_msgfd(&s->server_chr);
  497. process_msg(s, msg, fd, &err);
  498. if (err) {
  499. error_report_err(err);
  500. }
  501. }
  502. static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
  503. {
  504. int64_t msg;
  505. int n, ret;
  506. n = 0;
  507. do {
  508. ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
  509. sizeof(msg) - n);
  510. if (ret < 0) {
  511. if (ret == -EINTR) {
  512. continue;
  513. }
  514. error_setg_errno(errp, -ret, "read from server failed");
  515. return INT64_MIN;
  516. }
  517. n += ret;
  518. } while (n < sizeof(msg));
  519. *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
  520. return le64_to_cpu(msg);
  521. }
  522. static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
  523. {
  524. Error *err = NULL;
  525. int64_t msg;
  526. int fd;
  527. msg = ivshmem_recv_msg(s, &fd, &err);
  528. if (err) {
  529. error_propagate(errp, err);
  530. return;
  531. }
  532. if (msg != IVSHMEM_PROTOCOL_VERSION) {
  533. error_setg(errp, "server sent version %" PRId64 ", expecting %d",
  534. msg, IVSHMEM_PROTOCOL_VERSION);
  535. return;
  536. }
  537. if (fd != -1) {
  538. error_setg(errp, "server sent invalid version message");
  539. return;
  540. }
  541. /*
  542. * ivshmem-server sends the remaining initial messages in a fixed
  543. * order, but the device has always accepted them in any order.
  544. * Stay as compatible as practical, just in case people use
  545. * servers that behave differently.
  546. */
  547. /*
  548. * ivshmem_device_spec.txt has always required the ID message
  549. * right here, and ivshmem-server has always complied. However,
  550. * older versions of the device accepted it out of order, but
  551. * broke when an interrupt setup message arrived before it.
  552. */
  553. msg = ivshmem_recv_msg(s, &fd, &err);
  554. if (err) {
  555. error_propagate(errp, err);
  556. return;
  557. }
  558. if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
  559. error_setg(errp, "server sent invalid ID message");
  560. return;
  561. }
  562. s->vm_id = msg;
  563. /*
  564. * Receive more messages until we got shared memory.
  565. */
  566. do {
  567. msg = ivshmem_recv_msg(s, &fd, &err);
  568. if (err) {
  569. error_propagate(errp, err);
  570. return;
  571. }
  572. process_msg(s, msg, fd, &err);
  573. if (err) {
  574. error_propagate(errp, err);
  575. return;
  576. }
  577. } while (msg != -1);
  578. /*
  579. * This function must either map the shared memory or fail. The
  580. * loop above ensures that: it terminates normally only after it
  581. * successfully processed the server's shared memory message.
  582. * Assert that actually mapped the shared memory:
  583. */
  584. assert(s->ivshmem_bar2);
  585. }
  586. /* Select the MSI-X vectors used by device.
  587. * ivshmem maps events to vectors statically, so
  588. * we just enable all vectors on init and after reset. */
  589. static void ivshmem_msix_vector_use(IVShmemState *s)
  590. {
  591. PCIDevice *d = PCI_DEVICE(s);
  592. int i;
  593. for (i = 0; i < s->vectors; i++) {
  594. msix_vector_use(d, i);
  595. }
  596. }
  597. static void ivshmem_disable_irqfd(IVShmemState *s);
  598. static void ivshmem_reset(DeviceState *d)
  599. {
  600. IVShmemState *s = IVSHMEM_COMMON(d);
  601. ivshmem_disable_irqfd(s);
  602. s->intrstatus = 0;
  603. s->intrmask = 0;
  604. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  605. ivshmem_msix_vector_use(s);
  606. }
  607. }
  608. static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
  609. {
  610. /* allocate QEMU callback data for receiving interrupts */
  611. s->msi_vectors = g_new0(MSIVector, s->vectors);
  612. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  613. if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
  614. return -1;
  615. }
  616. IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
  617. ivshmem_msix_vector_use(s);
  618. }
  619. return 0;
  620. }
  621. static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
  622. {
  623. IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
  624. if (s->msi_vectors[vector].pdev == NULL) {
  625. return;
  626. }
  627. /* it was cleaned when masked in the frontend. */
  628. kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
  629. s->msi_vectors[vector].pdev = NULL;
  630. }
  631. static void ivshmem_enable_irqfd(IVShmemState *s)
  632. {
  633. PCIDevice *pdev = PCI_DEVICE(s);
  634. int i;
  635. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  636. Error *err = NULL;
  637. ivshmem_add_kvm_msi_virq(s, i, &err);
  638. if (err) {
  639. error_report_err(err);
  640. goto undo;
  641. }
  642. }
  643. if (msix_set_vector_notifiers(pdev,
  644. ivshmem_vector_unmask,
  645. ivshmem_vector_mask,
  646. ivshmem_vector_poll)) {
  647. error_report("ivshmem: msix_set_vector_notifiers failed");
  648. goto undo;
  649. }
  650. return;
  651. undo:
  652. while (--i >= 0) {
  653. ivshmem_remove_kvm_msi_virq(s, i);
  654. }
  655. }
  656. static void ivshmem_disable_irqfd(IVShmemState *s)
  657. {
  658. PCIDevice *pdev = PCI_DEVICE(s);
  659. int i;
  660. if (!pdev->msix_vector_use_notifier) {
  661. return;
  662. }
  663. msix_unset_vector_notifiers(pdev);
  664. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  665. /*
  666. * MSI-X is already disabled here so msix_unset_vector_notifiers()
  667. * didn't call our release notifier. Do it now to keep our masks and
  668. * unmasks balanced.
  669. */
  670. if (s->msi_vectors[i].unmasked) {
  671. ivshmem_vector_mask(pdev, i);
  672. }
  673. ivshmem_remove_kvm_msi_virq(s, i);
  674. }
  675. }
  676. static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
  677. uint32_t val, int len)
  678. {
  679. IVShmemState *s = IVSHMEM_COMMON(pdev);
  680. int is_enabled, was_enabled = msix_enabled(pdev);
  681. pci_default_write_config(pdev, address, val, len);
  682. is_enabled = msix_enabled(pdev);
  683. if (kvm_msi_via_irqfd_enabled()) {
  684. if (!was_enabled && is_enabled) {
  685. ivshmem_enable_irqfd(s);
  686. } else if (was_enabled && !is_enabled) {
  687. ivshmem_disable_irqfd(s);
  688. }
  689. }
  690. }
  691. static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
  692. {
  693. IVShmemState *s = IVSHMEM_COMMON(dev);
  694. Error *err = NULL;
  695. uint8_t *pci_conf;
  696. /* IRQFD requires MSI */
  697. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
  698. !ivshmem_has_feature(s, IVSHMEM_MSI)) {
  699. error_setg(errp, "ioeventfd/irqfd requires MSI");
  700. return;
  701. }
  702. pci_conf = dev->config;
  703. pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
  704. memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
  705. "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
  706. /* region for registers*/
  707. pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
  708. &s->ivshmem_mmio);
  709. if (s->hostmem != NULL) {
  710. IVSHMEM_DPRINTF("using hostmem\n");
  711. s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
  712. host_memory_backend_set_mapped(s->hostmem, true);
  713. } else {
  714. Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
  715. assert(chr);
  716. IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
  717. chr->filename);
  718. /* we allocate enough space for 16 peers and grow as needed */
  719. resize_peers(s, 16);
  720. /*
  721. * Receive setup messages from server synchronously.
  722. * Older versions did it asynchronously, but that creates a
  723. * number of entertaining race conditions.
  724. */
  725. ivshmem_recv_setup(s, &err);
  726. if (err) {
  727. error_propagate(errp, err);
  728. return;
  729. }
  730. if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
  731. error_setg(errp,
  732. "master must connect to the server before any peers");
  733. return;
  734. }
  735. qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
  736. ivshmem_read, NULL, NULL, s, NULL, true);
  737. if (ivshmem_setup_interrupts(s, errp) < 0) {
  738. error_prepend(errp, "Failed to initialize interrupts: ");
  739. return;
  740. }
  741. }
  742. if (s->master == ON_OFF_AUTO_AUTO) {
  743. s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
  744. }
  745. if (!ivshmem_is_master(s)) {
  746. error_setg(&s->migration_blocker,
  747. "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
  748. if (migrate_add_blocker(s->migration_blocker, errp) < 0) {
  749. error_free(s->migration_blocker);
  750. return;
  751. }
  752. }
  753. vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
  754. pci_register_bar(PCI_DEVICE(s), 2,
  755. PCI_BASE_ADDRESS_SPACE_MEMORY |
  756. PCI_BASE_ADDRESS_MEM_PREFETCH |
  757. PCI_BASE_ADDRESS_MEM_TYPE_64,
  758. s->ivshmem_bar2);
  759. }
  760. static void ivshmem_exit(PCIDevice *dev)
  761. {
  762. IVShmemState *s = IVSHMEM_COMMON(dev);
  763. int i;
  764. if (s->migration_blocker) {
  765. migrate_del_blocker(s->migration_blocker);
  766. error_free(s->migration_blocker);
  767. }
  768. if (memory_region_is_mapped(s->ivshmem_bar2)) {
  769. if (!s->hostmem) {
  770. void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
  771. int fd;
  772. if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
  773. error_report("Failed to munmap shared memory %s",
  774. strerror(errno));
  775. }
  776. fd = memory_region_get_fd(s->ivshmem_bar2);
  777. close(fd);
  778. }
  779. vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
  780. }
  781. if (s->hostmem) {
  782. host_memory_backend_set_mapped(s->hostmem, false);
  783. }
  784. if (s->peers) {
  785. for (i = 0; i < s->nb_peers; i++) {
  786. close_peer_eventfds(s, i);
  787. }
  788. g_free(s->peers);
  789. }
  790. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  791. msix_uninit_exclusive_bar(dev);
  792. }
  793. g_free(s->msi_vectors);
  794. }
  795. static int ivshmem_pre_load(void *opaque)
  796. {
  797. IVShmemState *s = opaque;
  798. if (!ivshmem_is_master(s)) {
  799. error_report("'peer' devices are not migratable");
  800. return -EINVAL;
  801. }
  802. return 0;
  803. }
  804. static int ivshmem_post_load(void *opaque, int version_id)
  805. {
  806. IVShmemState *s = opaque;
  807. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  808. ivshmem_msix_vector_use(s);
  809. }
  810. return 0;
  811. }
  812. static void ivshmem_common_class_init(ObjectClass *klass, void *data)
  813. {
  814. DeviceClass *dc = DEVICE_CLASS(klass);
  815. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  816. k->realize = ivshmem_common_realize;
  817. k->exit = ivshmem_exit;
  818. k->config_write = ivshmem_write_config;
  819. k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
  820. k->device_id = PCI_DEVICE_ID_IVSHMEM;
  821. k->class_id = PCI_CLASS_MEMORY_RAM;
  822. k->revision = 1;
  823. dc->reset = ivshmem_reset;
  824. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  825. dc->desc = "Inter-VM shared memory";
  826. }
  827. static const TypeInfo ivshmem_common_info = {
  828. .name = TYPE_IVSHMEM_COMMON,
  829. .parent = TYPE_PCI_DEVICE,
  830. .instance_size = sizeof(IVShmemState),
  831. .abstract = true,
  832. .class_init = ivshmem_common_class_init,
  833. .interfaces = (InterfaceInfo[]) {
  834. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  835. { },
  836. },
  837. };
  838. static const VMStateDescription ivshmem_plain_vmsd = {
  839. .name = TYPE_IVSHMEM_PLAIN,
  840. .version_id = 0,
  841. .minimum_version_id = 0,
  842. .pre_load = ivshmem_pre_load,
  843. .post_load = ivshmem_post_load,
  844. .fields = (VMStateField[]) {
  845. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  846. VMSTATE_UINT32(intrstatus, IVShmemState),
  847. VMSTATE_UINT32(intrmask, IVShmemState),
  848. VMSTATE_END_OF_LIST()
  849. },
  850. };
  851. static Property ivshmem_plain_properties[] = {
  852. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  853. DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
  854. HostMemoryBackend *),
  855. DEFINE_PROP_END_OF_LIST(),
  856. };
  857. static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
  858. {
  859. IVShmemState *s = IVSHMEM_COMMON(dev);
  860. if (!s->hostmem) {
  861. error_setg(errp, "You must specify a 'memdev'");
  862. return;
  863. } else if (host_memory_backend_is_mapped(s->hostmem)) {
  864. error_setg(errp, "can't use already busy memdev: %s",
  865. object_get_canonical_path_component(OBJECT(s->hostmem)));
  866. return;
  867. }
  868. ivshmem_common_realize(dev, errp);
  869. }
  870. static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
  871. {
  872. DeviceClass *dc = DEVICE_CLASS(klass);
  873. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  874. k->realize = ivshmem_plain_realize;
  875. device_class_set_props(dc, ivshmem_plain_properties);
  876. dc->vmsd = &ivshmem_plain_vmsd;
  877. }
  878. static const TypeInfo ivshmem_plain_info = {
  879. .name = TYPE_IVSHMEM_PLAIN,
  880. .parent = TYPE_IVSHMEM_COMMON,
  881. .instance_size = sizeof(IVShmemState),
  882. .class_init = ivshmem_plain_class_init,
  883. };
  884. static const VMStateDescription ivshmem_doorbell_vmsd = {
  885. .name = TYPE_IVSHMEM_DOORBELL,
  886. .version_id = 0,
  887. .minimum_version_id = 0,
  888. .pre_load = ivshmem_pre_load,
  889. .post_load = ivshmem_post_load,
  890. .fields = (VMStateField[]) {
  891. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  892. VMSTATE_MSIX(parent_obj, IVShmemState),
  893. VMSTATE_UINT32(intrstatus, IVShmemState),
  894. VMSTATE_UINT32(intrmask, IVShmemState),
  895. VMSTATE_END_OF_LIST()
  896. },
  897. };
  898. static Property ivshmem_doorbell_properties[] = {
  899. DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
  900. DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
  901. DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
  902. true),
  903. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  904. DEFINE_PROP_END_OF_LIST(),
  905. };
  906. static void ivshmem_doorbell_init(Object *obj)
  907. {
  908. IVShmemState *s = IVSHMEM_DOORBELL(obj);
  909. s->features |= (1 << IVSHMEM_MSI);
  910. }
  911. static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
  912. {
  913. IVShmemState *s = IVSHMEM_COMMON(dev);
  914. if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
  915. error_setg(errp, "You must specify a 'chardev'");
  916. return;
  917. }
  918. ivshmem_common_realize(dev, errp);
  919. }
  920. static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
  921. {
  922. DeviceClass *dc = DEVICE_CLASS(klass);
  923. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  924. k->realize = ivshmem_doorbell_realize;
  925. device_class_set_props(dc, ivshmem_doorbell_properties);
  926. dc->vmsd = &ivshmem_doorbell_vmsd;
  927. }
  928. static const TypeInfo ivshmem_doorbell_info = {
  929. .name = TYPE_IVSHMEM_DOORBELL,
  930. .parent = TYPE_IVSHMEM_COMMON,
  931. .instance_size = sizeof(IVShmemState),
  932. .instance_init = ivshmem_doorbell_init,
  933. .class_init = ivshmem_doorbell_class_init,
  934. };
  935. static void ivshmem_register_types(void)
  936. {
  937. type_register_static(&ivshmem_common_info);
  938. type_register_static(&ivshmem_plain_info);
  939. type_register_static(&ivshmem_doorbell_info);
  940. }
  941. type_init(ivshmem_register_types)