ivshmem-pci.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131
  1. /*
  2. * Inter-VM Shared Memory PCI device.
  3. *
  4. * Author:
  5. * Cam Macdonell <cam@cs.ualberta.ca>
  6. *
  7. * Based On: cirrus_vga.c
  8. * Copyright (c) 2004 Fabrice Bellard
  9. * Copyright (c) 2004 Makoto Suzuki (suzu)
  10. *
  11. * and rtl8139.c
  12. * Copyright (c) 2006 Igor Kovalenko
  13. *
  14. * This code is licensed under the GNU GPL v2.
  15. *
  16. * Contributions after 2012-01-13 are licensed under the terms of the
  17. * GNU GPL, version 2 or (at your option) any later version.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "qemu/units.h"
  21. #include "qapi/error.h"
  22. #include "qemu/cutils.h"
  23. #include "hw/pci/pci.h"
  24. #include "hw/qdev-properties.h"
  25. #include "hw/qdev-properties-system.h"
  26. #include "hw/pci/msi.h"
  27. #include "hw/pci/msix.h"
  28. #include "system/kvm.h"
  29. #include "migration/blocker.h"
  30. #include "migration/vmstate.h"
  31. #include "qemu/error-report.h"
  32. #include "qemu/event_notifier.h"
  33. #include "qemu/module.h"
  34. #include "qom/object_interfaces.h"
  35. #include "chardev/char-fe.h"
  36. #include "system/hostmem.h"
  37. #include "qapi/visitor.h"
  38. #include "hw/misc/ivshmem.h"
  39. #include "qom/object.h"
  40. #define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
  41. #define PCI_DEVICE_ID_IVSHMEM 0x1110
  42. #define IVSHMEM_MAX_PEERS UINT16_MAX
  43. #define IVSHMEM_IOEVENTFD 0
  44. #define IVSHMEM_MSI 1
  45. #define IVSHMEM_REG_BAR_SIZE 0x100
  46. #define IVSHMEM_DEBUG 0
  47. #define IVSHMEM_DPRINTF(fmt, ...) \
  48. do { \
  49. if (IVSHMEM_DEBUG) { \
  50. printf("IVSHMEM: " fmt, ## __VA_ARGS__); \
  51. } \
  52. } while (0)
  53. #define TYPE_IVSHMEM_COMMON "ivshmem-common"
  54. typedef struct IVShmemState IVShmemState;
  55. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_COMMON,
  56. TYPE_IVSHMEM_COMMON)
  57. #define TYPE_IVSHMEM_PLAIN "ivshmem-plain"
  58. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_PLAIN,
  59. TYPE_IVSHMEM_PLAIN)
  60. #define TYPE_IVSHMEM_DOORBELL "ivshmem-doorbell"
  61. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM_DOORBELL,
  62. TYPE_IVSHMEM_DOORBELL)
  63. #define TYPE_IVSHMEM "ivshmem"
  64. DECLARE_INSTANCE_CHECKER(IVShmemState, IVSHMEM,
  65. TYPE_IVSHMEM)
  66. typedef struct Peer {
  67. int nb_eventfds;
  68. EventNotifier *eventfds;
  69. } Peer;
  70. typedef struct MSIVector {
  71. PCIDevice *pdev;
  72. int virq;
  73. bool unmasked;
  74. } MSIVector;
  75. struct IVShmemState {
  76. /*< private >*/
  77. PCIDevice parent_obj;
  78. /*< public >*/
  79. uint32_t features;
  80. /* exactly one of these two may be set */
  81. HostMemoryBackend *hostmem; /* with interrupts */
  82. CharBackend server_chr; /* without interrupts */
  83. /* registers */
  84. uint32_t intrmask;
  85. uint32_t intrstatus;
  86. int vm_id;
  87. /* BARs */
  88. MemoryRegion ivshmem_mmio; /* BAR 0 (registers) */
  89. MemoryRegion *ivshmem_bar2; /* BAR 2 (shared memory) */
  90. MemoryRegion server_bar2; /* used with server_chr */
  91. /* interrupt support */
  92. Peer *peers;
  93. int nb_peers; /* space in @peers[] */
  94. uint32_t vectors;
  95. MSIVector *msi_vectors;
  96. uint64_t msg_buf; /* buffer for receiving server messages */
  97. int msg_buffered_bytes; /* #bytes in @msg_buf */
  98. /* migration stuff */
  99. OnOffAuto master;
  100. Error *migration_blocker;
  101. };
  102. /* registers for the Inter-VM shared memory device */
  103. enum ivshmem_registers {
  104. INTRMASK = 0,
  105. INTRSTATUS = 4,
  106. IVPOSITION = 8,
  107. DOORBELL = 12,
  108. };
  109. static inline uint32_t ivshmem_has_feature(IVShmemState *ivs,
  110. unsigned int feature) {
  111. return (ivs->features & (1 << feature));
  112. }
  113. static inline bool ivshmem_is_master(IVShmemState *s)
  114. {
  115. assert(s->master != ON_OFF_AUTO_AUTO);
  116. return s->master == ON_OFF_AUTO_ON;
  117. }
  118. static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val)
  119. {
  120. IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val);
  121. s->intrmask = val;
  122. }
  123. static uint32_t ivshmem_IntrMask_read(IVShmemState *s)
  124. {
  125. uint32_t ret = s->intrmask;
  126. IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret);
  127. return ret;
  128. }
  129. static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val)
  130. {
  131. IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val);
  132. s->intrstatus = val;
  133. }
  134. static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
  135. {
  136. uint32_t ret = s->intrstatus;
  137. /* reading ISR clears all interrupts */
  138. s->intrstatus = 0;
  139. return ret;
  140. }
  141. static void ivshmem_io_write(void *opaque, hwaddr addr,
  142. uint64_t val, unsigned size)
  143. {
  144. IVShmemState *s = opaque;
  145. uint16_t dest = val >> 16;
  146. uint16_t vector = val & 0xff;
  147. addr &= 0xfc;
  148. IVSHMEM_DPRINTF("writing to addr " HWADDR_FMT_plx "\n", addr);
  149. switch (addr)
  150. {
  151. case INTRMASK:
  152. ivshmem_IntrMask_write(s, val);
  153. break;
  154. case INTRSTATUS:
  155. ivshmem_IntrStatus_write(s, val);
  156. break;
  157. case DOORBELL:
  158. /* check that dest VM ID is reasonable */
  159. if (dest >= s->nb_peers) {
  160. IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest);
  161. break;
  162. }
  163. /* check doorbell range */
  164. if (vector < s->peers[dest].nb_eventfds) {
  165. IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector);
  166. event_notifier_set(&s->peers[dest].eventfds[vector]);
  167. } else {
  168. IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n",
  169. vector, dest);
  170. }
  171. break;
  172. default:
  173. IVSHMEM_DPRINTF("Unhandled write " HWADDR_FMT_plx "\n", addr);
  174. }
  175. }
  176. static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
  177. unsigned size)
  178. {
  179. IVShmemState *s = opaque;
  180. uint32_t ret;
  181. switch (addr)
  182. {
  183. case INTRMASK:
  184. ret = ivshmem_IntrMask_read(s);
  185. break;
  186. case INTRSTATUS:
  187. ret = ivshmem_IntrStatus_read(s);
  188. break;
  189. case IVPOSITION:
  190. ret = s->vm_id;
  191. break;
  192. default:
  193. IVSHMEM_DPRINTF("why are we reading " HWADDR_FMT_plx "\n", addr);
  194. ret = 0;
  195. }
  196. return ret;
  197. }
  198. static const MemoryRegionOps ivshmem_mmio_ops = {
  199. .read = ivshmem_io_read,
  200. .write = ivshmem_io_write,
  201. .endianness = DEVICE_LITTLE_ENDIAN,
  202. .impl = {
  203. .min_access_size = 4,
  204. .max_access_size = 4,
  205. },
  206. };
  207. static void ivshmem_vector_notify(void *opaque)
  208. {
  209. MSIVector *entry = opaque;
  210. PCIDevice *pdev = entry->pdev;
  211. IVShmemState *s = IVSHMEM_COMMON(pdev);
  212. int vector = entry - s->msi_vectors;
  213. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  214. if (!event_notifier_test_and_clear(n)) {
  215. return;
  216. }
  217. IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector);
  218. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  219. if (msix_enabled(pdev)) {
  220. msix_notify(pdev, vector);
  221. }
  222. } else {
  223. ivshmem_IntrStatus_write(s, 1);
  224. }
  225. }
  226. static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector,
  227. MSIMessage msg)
  228. {
  229. IVShmemState *s = IVSHMEM_COMMON(dev);
  230. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  231. MSIVector *v = &s->msi_vectors[vector];
  232. int ret;
  233. IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector);
  234. if (!v->pdev) {
  235. error_report("ivshmem: vector %d route does not exist", vector);
  236. return -EINVAL;
  237. }
  238. assert(!v->unmasked);
  239. ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev);
  240. if (ret < 0) {
  241. return ret;
  242. }
  243. kvm_irqchip_commit_routes(kvm_state);
  244. ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq);
  245. if (ret < 0) {
  246. return ret;
  247. }
  248. v->unmasked = true;
  249. return 0;
  250. }
  251. static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector)
  252. {
  253. IVShmemState *s = IVSHMEM_COMMON(dev);
  254. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  255. MSIVector *v = &s->msi_vectors[vector];
  256. int ret;
  257. IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector);
  258. if (!v->pdev) {
  259. error_report("ivshmem: vector %d route does not exist", vector);
  260. return;
  261. }
  262. assert(v->unmasked);
  263. ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, v->virq);
  264. if (ret < 0) {
  265. error_report("remove_irqfd_notifier_gsi failed");
  266. return;
  267. }
  268. v->unmasked = false;
  269. }
  270. static void ivshmem_vector_poll(PCIDevice *dev,
  271. unsigned int vector_start,
  272. unsigned int vector_end)
  273. {
  274. IVShmemState *s = IVSHMEM_COMMON(dev);
  275. unsigned int vector;
  276. IVSHMEM_DPRINTF("vector poll %p %d-%d\n", dev, vector_start, vector_end);
  277. vector_end = MIN(vector_end, s->vectors);
  278. for (vector = vector_start; vector < vector_end; vector++) {
  279. EventNotifier *notifier = &s->peers[s->vm_id].eventfds[vector];
  280. if (!msix_is_masked(dev, vector)) {
  281. continue;
  282. }
  283. if (event_notifier_test_and_clear(notifier)) {
  284. msix_set_pending(dev, vector);
  285. }
  286. }
  287. }
  288. static void watch_vector_notifier(IVShmemState *s, EventNotifier *n,
  289. int vector)
  290. {
  291. int eventfd = event_notifier_get_fd(n);
  292. assert(!s->msi_vectors[vector].pdev);
  293. s->msi_vectors[vector].pdev = PCI_DEVICE(s);
  294. qemu_set_fd_handler(eventfd, ivshmem_vector_notify,
  295. NULL, &s->msi_vectors[vector]);
  296. }
  297. static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
  298. {
  299. memory_region_add_eventfd(&s->ivshmem_mmio,
  300. DOORBELL,
  301. 4,
  302. true,
  303. (posn << 16) | i,
  304. &s->peers[posn].eventfds[i]);
  305. }
  306. static void ivshmem_del_eventfd(IVShmemState *s, int posn, int i)
  307. {
  308. memory_region_del_eventfd(&s->ivshmem_mmio,
  309. DOORBELL,
  310. 4,
  311. true,
  312. (posn << 16) | i,
  313. &s->peers[posn].eventfds[i]);
  314. }
  315. static void close_peer_eventfds(IVShmemState *s, int posn)
  316. {
  317. int i, n;
  318. assert(posn >= 0 && posn < s->nb_peers);
  319. n = s->peers[posn].nb_eventfds;
  320. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  321. memory_region_transaction_begin();
  322. for (i = 0; i < n; i++) {
  323. ivshmem_del_eventfd(s, posn, i);
  324. }
  325. memory_region_transaction_commit();
  326. }
  327. for (i = 0; i < n; i++) {
  328. event_notifier_cleanup(&s->peers[posn].eventfds[i]);
  329. }
  330. g_free(s->peers[posn].eventfds);
  331. s->peers[posn].nb_eventfds = 0;
  332. }
  333. static void resize_peers(IVShmemState *s, int nb_peers)
  334. {
  335. int old_nb_peers = s->nb_peers;
  336. int i;
  337. assert(nb_peers > old_nb_peers);
  338. IVSHMEM_DPRINTF("bumping storage to %d peers\n", nb_peers);
  339. s->peers = g_renew(Peer, s->peers, nb_peers);
  340. s->nb_peers = nb_peers;
  341. for (i = old_nb_peers; i < nb_peers; i++) {
  342. s->peers[i].eventfds = g_new0(EventNotifier, s->vectors);
  343. s->peers[i].nb_eventfds = 0;
  344. }
  345. }
  346. static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
  347. Error **errp)
  348. {
  349. PCIDevice *pdev = PCI_DEVICE(s);
  350. KVMRouteChange c;
  351. int ret;
  352. IVSHMEM_DPRINTF("ivshmem_add_kvm_msi_virq vector:%d\n", vector);
  353. assert(!s->msi_vectors[vector].pdev);
  354. c = kvm_irqchip_begin_route_changes(kvm_state);
  355. ret = kvm_irqchip_add_msi_route(&c, vector, pdev);
  356. if (ret < 0) {
  357. error_setg(errp, "kvm_irqchip_add_msi_route failed");
  358. return;
  359. }
  360. kvm_irqchip_commit_route_changes(&c);
  361. s->msi_vectors[vector].virq = ret;
  362. s->msi_vectors[vector].pdev = pdev;
  363. }
  364. static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
  365. {
  366. EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
  367. bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
  368. ivshmem_has_feature(s, IVSHMEM_MSI);
  369. PCIDevice *pdev = PCI_DEVICE(s);
  370. Error *err = NULL;
  371. IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
  372. if (!with_irqfd) {
  373. IVSHMEM_DPRINTF("with eventfd\n");
  374. watch_vector_notifier(s, n, vector);
  375. } else if (msix_enabled(pdev)) {
  376. IVSHMEM_DPRINTF("with irqfd\n");
  377. ivshmem_add_kvm_msi_virq(s, vector, &err);
  378. if (err) {
  379. error_propagate(errp, err);
  380. return;
  381. }
  382. if (!msix_is_masked(pdev, vector)) {
  383. kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
  384. s->msi_vectors[vector].virq);
  385. /* TODO handle error */
  386. }
  387. } else {
  388. /* it will be delayed until msix is enabled, in write_config */
  389. IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n");
  390. }
  391. }
  392. static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
  393. {
  394. struct stat buf;
  395. size_t size;
  396. if (s->ivshmem_bar2) {
  397. error_setg(errp, "server sent unexpected shared memory message");
  398. close(fd);
  399. return;
  400. }
  401. if (fstat(fd, &buf) < 0) {
  402. error_setg_errno(errp, errno,
  403. "can't determine size of shared memory sent by server");
  404. close(fd);
  405. return;
  406. }
  407. size = buf.st_size;
  408. /* mmap the region and map into the BAR2 */
  409. if (!memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
  410. "ivshmem.bar2", size, RAM_SHARED,
  411. fd, 0, errp)) {
  412. return;
  413. }
  414. s->ivshmem_bar2 = &s->server_bar2;
  415. }
  416. static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
  417. Error **errp)
  418. {
  419. IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
  420. if (posn >= s->nb_peers || posn == s->vm_id) {
  421. error_setg(errp, "invalid peer %d", posn);
  422. return;
  423. }
  424. close_peer_eventfds(s, posn);
  425. }
  426. static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
  427. Error **errp)
  428. {
  429. Peer *peer = &s->peers[posn];
  430. int vector;
  431. /*
  432. * The N-th connect message for this peer comes with the file
  433. * descriptor for vector N-1. Count messages to find the vector.
  434. */
  435. if (peer->nb_eventfds >= s->vectors) {
  436. error_setg(errp, "Too many eventfd received, device has %d vectors",
  437. s->vectors);
  438. close(fd);
  439. return;
  440. }
  441. vector = peer->nb_eventfds++;
  442. IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
  443. event_notifier_init_fd(&peer->eventfds[vector], fd);
  444. g_unix_set_fd_nonblocking(fd, true, NULL); /* msix/irqfd poll non block */
  445. if (posn == s->vm_id) {
  446. setup_interrupt(s, vector, errp);
  447. /* TODO do we need to handle the error? */
  448. }
  449. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
  450. ivshmem_add_eventfd(s, posn, vector);
  451. }
  452. }
  453. static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
  454. {
  455. IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
  456. if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
  457. error_setg(errp, "server sent invalid message %" PRId64, msg);
  458. close(fd);
  459. return;
  460. }
  461. if (msg == -1) {
  462. process_msg_shmem(s, fd, errp);
  463. return;
  464. }
  465. if (msg >= s->nb_peers) {
  466. resize_peers(s, msg + 1);
  467. }
  468. if (fd >= 0) {
  469. process_msg_connect(s, msg, fd, errp);
  470. } else {
  471. process_msg_disconnect(s, msg, errp);
  472. }
  473. }
  474. static int ivshmem_can_receive(void *opaque)
  475. {
  476. IVShmemState *s = opaque;
  477. assert(s->msg_buffered_bytes < sizeof(s->msg_buf));
  478. return sizeof(s->msg_buf) - s->msg_buffered_bytes;
  479. }
  480. static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
  481. {
  482. IVShmemState *s = opaque;
  483. Error *err = NULL;
  484. int fd;
  485. int64_t msg;
  486. assert(size >= 0 && s->msg_buffered_bytes + size <= sizeof(s->msg_buf));
  487. memcpy((unsigned char *)&s->msg_buf + s->msg_buffered_bytes, buf, size);
  488. s->msg_buffered_bytes += size;
  489. if (s->msg_buffered_bytes < sizeof(s->msg_buf)) {
  490. return;
  491. }
  492. msg = le64_to_cpu(s->msg_buf);
  493. s->msg_buffered_bytes = 0;
  494. fd = qemu_chr_fe_get_msgfd(&s->server_chr);
  495. process_msg(s, msg, fd, &err);
  496. if (err) {
  497. error_report_err(err);
  498. }
  499. }
  500. static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
  501. {
  502. int64_t msg;
  503. int n, ret;
  504. n = 0;
  505. do {
  506. ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
  507. sizeof(msg) - n);
  508. if (ret < 0) {
  509. if (ret == -EINTR) {
  510. continue;
  511. }
  512. error_setg_errno(errp, -ret, "read from server failed");
  513. return INT64_MIN;
  514. }
  515. n += ret;
  516. } while (n < sizeof(msg));
  517. *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
  518. return le64_to_cpu(msg);
  519. }
  520. static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
  521. {
  522. Error *err = NULL;
  523. int64_t msg;
  524. int fd;
  525. msg = ivshmem_recv_msg(s, &fd, &err);
  526. if (err) {
  527. error_propagate(errp, err);
  528. return;
  529. }
  530. if (msg != IVSHMEM_PROTOCOL_VERSION) {
  531. error_setg(errp, "server sent version %" PRId64 ", expecting %d",
  532. msg, IVSHMEM_PROTOCOL_VERSION);
  533. return;
  534. }
  535. if (fd != -1) {
  536. error_setg(errp, "server sent invalid version message");
  537. return;
  538. }
  539. /*
  540. * ivshmem-server sends the remaining initial messages in a fixed
  541. * order, but the device has always accepted them in any order.
  542. * Stay as compatible as practical, just in case people use
  543. * servers that behave differently.
  544. */
  545. /*
  546. * ivshmem_device_spec.txt has always required the ID message
  547. * right here, and ivshmem-server has always complied. However,
  548. * older versions of the device accepted it out of order, but
  549. * broke when an interrupt setup message arrived before it.
  550. */
  551. msg = ivshmem_recv_msg(s, &fd, &err);
  552. if (err) {
  553. error_propagate(errp, err);
  554. return;
  555. }
  556. if (fd != -1 || msg < 0 || msg > IVSHMEM_MAX_PEERS) {
  557. error_setg(errp, "server sent invalid ID message");
  558. return;
  559. }
  560. s->vm_id = msg;
  561. /*
  562. * Receive more messages until we got shared memory.
  563. */
  564. do {
  565. msg = ivshmem_recv_msg(s, &fd, &err);
  566. if (err) {
  567. error_propagate(errp, err);
  568. return;
  569. }
  570. process_msg(s, msg, fd, &err);
  571. if (err) {
  572. error_propagate(errp, err);
  573. return;
  574. }
  575. } while (msg != -1);
  576. /*
  577. * This function must either map the shared memory or fail. The
  578. * loop above ensures that: it terminates normally only after it
  579. * successfully processed the server's shared memory message.
  580. * Assert that actually mapped the shared memory:
  581. */
  582. assert(s->ivshmem_bar2);
  583. }
  584. /* Select the MSI-X vectors used by device.
  585. * ivshmem maps events to vectors statically, so
  586. * we just enable all vectors on init and after reset. */
  587. static void ivshmem_msix_vector_use(IVShmemState *s)
  588. {
  589. PCIDevice *d = PCI_DEVICE(s);
  590. int i;
  591. for (i = 0; i < s->vectors; i++) {
  592. msix_vector_use(d, i);
  593. }
  594. }
  595. static void ivshmem_disable_irqfd(IVShmemState *s);
  596. static void ivshmem_reset(DeviceState *d)
  597. {
  598. IVShmemState *s = IVSHMEM_COMMON(d);
  599. ivshmem_disable_irqfd(s);
  600. s->intrstatus = 0;
  601. s->intrmask = 0;
  602. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  603. ivshmem_msix_vector_use(s);
  604. }
  605. }
  606. static int ivshmem_setup_interrupts(IVShmemState *s, Error **errp)
  607. {
  608. /* allocate QEMU callback data for receiving interrupts */
  609. s->msi_vectors = g_new0(MSIVector, s->vectors);
  610. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  611. if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1, errp)) {
  612. return -1;
  613. }
  614. IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
  615. ivshmem_msix_vector_use(s);
  616. }
  617. return 0;
  618. }
  619. static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector)
  620. {
  621. IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector);
  622. if (s->msi_vectors[vector].pdev == NULL) {
  623. return;
  624. }
  625. /* it was cleaned when masked in the frontend. */
  626. kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq);
  627. s->msi_vectors[vector].pdev = NULL;
  628. }
  629. static void ivshmem_enable_irqfd(IVShmemState *s)
  630. {
  631. PCIDevice *pdev = PCI_DEVICE(s);
  632. int i;
  633. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  634. Error *err = NULL;
  635. ivshmem_add_kvm_msi_virq(s, i, &err);
  636. if (err) {
  637. error_report_err(err);
  638. goto undo;
  639. }
  640. }
  641. if (msix_set_vector_notifiers(pdev,
  642. ivshmem_vector_unmask,
  643. ivshmem_vector_mask,
  644. ivshmem_vector_poll)) {
  645. error_report("ivshmem: msix_set_vector_notifiers failed");
  646. goto undo;
  647. }
  648. return;
  649. undo:
  650. while (--i >= 0) {
  651. ivshmem_remove_kvm_msi_virq(s, i);
  652. }
  653. }
  654. static void ivshmem_disable_irqfd(IVShmemState *s)
  655. {
  656. PCIDevice *pdev = PCI_DEVICE(s);
  657. int i;
  658. if (!pdev->msix_vector_use_notifier) {
  659. return;
  660. }
  661. msix_unset_vector_notifiers(pdev);
  662. for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
  663. /*
  664. * MSI-X is already disabled here so msix_unset_vector_notifiers()
  665. * didn't call our release notifier. Do it now to keep our masks and
  666. * unmasks balanced.
  667. */
  668. if (s->msi_vectors[i].unmasked) {
  669. ivshmem_vector_mask(pdev, i);
  670. }
  671. ivshmem_remove_kvm_msi_virq(s, i);
  672. }
  673. }
  674. static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
  675. uint32_t val, int len)
  676. {
  677. IVShmemState *s = IVSHMEM_COMMON(pdev);
  678. int is_enabled, was_enabled = msix_enabled(pdev);
  679. pci_default_write_config(pdev, address, val, len);
  680. is_enabled = msix_enabled(pdev);
  681. if (kvm_msi_via_irqfd_enabled()) {
  682. if (!was_enabled && is_enabled) {
  683. ivshmem_enable_irqfd(s);
  684. } else if (was_enabled && !is_enabled) {
  685. ivshmem_disable_irqfd(s);
  686. }
  687. }
  688. }
  689. static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
  690. {
  691. ERRP_GUARD();
  692. IVShmemState *s = IVSHMEM_COMMON(dev);
  693. Error *err = NULL;
  694. uint8_t *pci_conf;
  695. /* IRQFD requires MSI */
  696. if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD) &&
  697. !ivshmem_has_feature(s, IVSHMEM_MSI)) {
  698. error_setg(errp, "ioeventfd/irqfd requires MSI");
  699. return;
  700. }
  701. pci_conf = dev->config;
  702. pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
  703. memory_region_init_io(&s->ivshmem_mmio, OBJECT(s), &ivshmem_mmio_ops, s,
  704. "ivshmem-mmio", IVSHMEM_REG_BAR_SIZE);
  705. /* region for registers*/
  706. pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
  707. &s->ivshmem_mmio);
  708. if (s->hostmem != NULL) {
  709. IVSHMEM_DPRINTF("using hostmem\n");
  710. s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem);
  711. host_memory_backend_set_mapped(s->hostmem, true);
  712. } else {
  713. Chardev *chr = qemu_chr_fe_get_driver(&s->server_chr);
  714. assert(chr);
  715. IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
  716. chr->filename);
  717. /* we allocate enough space for 16 peers and grow as needed */
  718. resize_peers(s, 16);
  719. /*
  720. * Receive setup messages from server synchronously.
  721. * Older versions did it asynchronously, but that creates a
  722. * number of entertaining race conditions.
  723. */
  724. ivshmem_recv_setup(s, &err);
  725. if (err) {
  726. error_propagate(errp, err);
  727. return;
  728. }
  729. if (s->master == ON_OFF_AUTO_ON && s->vm_id != 0) {
  730. error_setg(errp,
  731. "master must connect to the server before any peers");
  732. return;
  733. }
  734. qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
  735. ivshmem_read, NULL, NULL, s, NULL, true);
  736. if (ivshmem_setup_interrupts(s, errp) < 0) {
  737. error_prepend(errp, "Failed to initialize interrupts: ");
  738. return;
  739. }
  740. }
  741. if (s->master == ON_OFF_AUTO_AUTO) {
  742. s->master = s->vm_id == 0 ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
  743. }
  744. if (!ivshmem_is_master(s)) {
  745. error_setg(&s->migration_blocker,
  746. "Migration is disabled when using feature 'peer mode' in device 'ivshmem'");
  747. if (migrate_add_blocker(&s->migration_blocker, errp) < 0) {
  748. return;
  749. }
  750. }
  751. vmstate_register_ram(s->ivshmem_bar2, DEVICE(s));
  752. pci_register_bar(PCI_DEVICE(s), 2,
  753. PCI_BASE_ADDRESS_SPACE_MEMORY |
  754. PCI_BASE_ADDRESS_MEM_PREFETCH |
  755. PCI_BASE_ADDRESS_MEM_TYPE_64,
  756. s->ivshmem_bar2);
  757. }
  758. static void ivshmem_exit(PCIDevice *dev)
  759. {
  760. IVShmemState *s = IVSHMEM_COMMON(dev);
  761. int i;
  762. migrate_del_blocker(&s->migration_blocker);
  763. if (memory_region_is_mapped(s->ivshmem_bar2)) {
  764. if (!s->hostmem) {
  765. void *addr = memory_region_get_ram_ptr(s->ivshmem_bar2);
  766. int fd;
  767. if (munmap(addr, memory_region_size(s->ivshmem_bar2) == -1)) {
  768. error_report("Failed to munmap shared memory %s",
  769. strerror(errno));
  770. }
  771. fd = memory_region_get_fd(s->ivshmem_bar2);
  772. close(fd);
  773. }
  774. vmstate_unregister_ram(s->ivshmem_bar2, DEVICE(dev));
  775. }
  776. if (s->hostmem) {
  777. host_memory_backend_set_mapped(s->hostmem, false);
  778. }
  779. if (s->peers) {
  780. for (i = 0; i < s->nb_peers; i++) {
  781. close_peer_eventfds(s, i);
  782. }
  783. g_free(s->peers);
  784. }
  785. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  786. msix_uninit_exclusive_bar(dev);
  787. }
  788. g_free(s->msi_vectors);
  789. }
  790. static int ivshmem_pre_load(void *opaque)
  791. {
  792. IVShmemState *s = opaque;
  793. if (!ivshmem_is_master(s)) {
  794. error_report("'peer' devices are not migratable");
  795. return -EINVAL;
  796. }
  797. return 0;
  798. }
  799. static int ivshmem_post_load(void *opaque, int version_id)
  800. {
  801. IVShmemState *s = opaque;
  802. if (ivshmem_has_feature(s, IVSHMEM_MSI)) {
  803. ivshmem_msix_vector_use(s);
  804. }
  805. return 0;
  806. }
  807. static void ivshmem_common_class_init(ObjectClass *klass, void *data)
  808. {
  809. DeviceClass *dc = DEVICE_CLASS(klass);
  810. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  811. k->realize = ivshmem_common_realize;
  812. k->exit = ivshmem_exit;
  813. k->config_write = ivshmem_write_config;
  814. k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
  815. k->device_id = PCI_DEVICE_ID_IVSHMEM;
  816. k->class_id = PCI_CLASS_MEMORY_RAM;
  817. k->revision = 1;
  818. device_class_set_legacy_reset(dc, ivshmem_reset);
  819. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  820. dc->desc = "Inter-VM shared memory";
  821. }
  822. static const TypeInfo ivshmem_common_info = {
  823. .name = TYPE_IVSHMEM_COMMON,
  824. .parent = TYPE_PCI_DEVICE,
  825. .instance_size = sizeof(IVShmemState),
  826. .abstract = true,
  827. .class_init = ivshmem_common_class_init,
  828. .interfaces = (InterfaceInfo[]) {
  829. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  830. { },
  831. },
  832. };
  833. static const VMStateDescription ivshmem_plain_vmsd = {
  834. .name = TYPE_IVSHMEM_PLAIN,
  835. .version_id = 0,
  836. .minimum_version_id = 0,
  837. .pre_load = ivshmem_pre_load,
  838. .post_load = ivshmem_post_load,
  839. .fields = (const VMStateField[]) {
  840. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  841. VMSTATE_UINT32(intrstatus, IVShmemState),
  842. VMSTATE_UINT32(intrmask, IVShmemState),
  843. VMSTATE_END_OF_LIST()
  844. },
  845. };
  846. static const Property ivshmem_plain_properties[] = {
  847. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  848. DEFINE_PROP_LINK("memdev", IVShmemState, hostmem, TYPE_MEMORY_BACKEND,
  849. HostMemoryBackend *),
  850. };
  851. static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
  852. {
  853. IVShmemState *s = IVSHMEM_COMMON(dev);
  854. if (!s->hostmem) {
  855. error_setg(errp, "You must specify a 'memdev'");
  856. return;
  857. } else if (host_memory_backend_is_mapped(s->hostmem)) {
  858. error_setg(errp, "can't use already busy memdev: %s",
  859. object_get_canonical_path_component(OBJECT(s->hostmem)));
  860. return;
  861. }
  862. ivshmem_common_realize(dev, errp);
  863. }
  864. static void ivshmem_plain_class_init(ObjectClass *klass, void *data)
  865. {
  866. DeviceClass *dc = DEVICE_CLASS(klass);
  867. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  868. k->realize = ivshmem_plain_realize;
  869. device_class_set_props(dc, ivshmem_plain_properties);
  870. dc->vmsd = &ivshmem_plain_vmsd;
  871. }
  872. static const TypeInfo ivshmem_plain_info = {
  873. .name = TYPE_IVSHMEM_PLAIN,
  874. .parent = TYPE_IVSHMEM_COMMON,
  875. .instance_size = sizeof(IVShmemState),
  876. .class_init = ivshmem_plain_class_init,
  877. };
  878. static const VMStateDescription ivshmem_doorbell_vmsd = {
  879. .name = TYPE_IVSHMEM_DOORBELL,
  880. .version_id = 0,
  881. .minimum_version_id = 0,
  882. .pre_load = ivshmem_pre_load,
  883. .post_load = ivshmem_post_load,
  884. .fields = (const VMStateField[]) {
  885. VMSTATE_PCI_DEVICE(parent_obj, IVShmemState),
  886. VMSTATE_MSIX(parent_obj, IVShmemState),
  887. VMSTATE_UINT32(intrstatus, IVShmemState),
  888. VMSTATE_UINT32(intrmask, IVShmemState),
  889. VMSTATE_END_OF_LIST()
  890. },
  891. };
  892. static const Property ivshmem_doorbell_properties[] = {
  893. DEFINE_PROP_CHR("chardev", IVShmemState, server_chr),
  894. DEFINE_PROP_UINT32("vectors", IVShmemState, vectors, 1),
  895. DEFINE_PROP_BIT("ioeventfd", IVShmemState, features, IVSHMEM_IOEVENTFD,
  896. true),
  897. DEFINE_PROP_ON_OFF_AUTO("master", IVShmemState, master, ON_OFF_AUTO_OFF),
  898. };
  899. static void ivshmem_doorbell_init(Object *obj)
  900. {
  901. IVShmemState *s = IVSHMEM_DOORBELL(obj);
  902. s->features |= (1 << IVSHMEM_MSI);
  903. }
  904. static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
  905. {
  906. IVShmemState *s = IVSHMEM_COMMON(dev);
  907. if (!qemu_chr_fe_backend_connected(&s->server_chr)) {
  908. error_setg(errp, "You must specify a 'chardev'");
  909. return;
  910. }
  911. ivshmem_common_realize(dev, errp);
  912. }
  913. static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data)
  914. {
  915. DeviceClass *dc = DEVICE_CLASS(klass);
  916. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  917. k->realize = ivshmem_doorbell_realize;
  918. device_class_set_props(dc, ivshmem_doorbell_properties);
  919. dc->vmsd = &ivshmem_doorbell_vmsd;
  920. }
  921. static const TypeInfo ivshmem_doorbell_info = {
  922. .name = TYPE_IVSHMEM_DOORBELL,
  923. .parent = TYPE_IVSHMEM_COMMON,
  924. .instance_size = sizeof(IVShmemState),
  925. .instance_init = ivshmem_doorbell_init,
  926. .class_init = ivshmem_doorbell_class_init,
  927. };
  928. static void ivshmem_register_types(void)
  929. {
  930. type_register_static(&ivshmem_common_info);
  931. type_register_static(&ivshmem_plain_info);
  932. type_register_static(&ivshmem_doorbell_info);
  933. }
  934. type_init(ivshmem_register_types)