vfio-user-obj.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. /**
  2. * QEMU vfio-user-server server object
  3. *
  4. * Copyright © 2022 Oracle and/or its affiliates.
  5. *
  6. * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
  7. *
  8. * See the COPYING file in the top-level directory.
  9. *
  10. */
  11. /**
  12. * Usage: add options:
  13. * -machine x-remote,vfio-user=on,auto-shutdown=on
  14. * -device <PCI-device>,id=<pci-dev-id>
  15. * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
  16. * device=<pci-dev-id>
  17. *
  18. * Note that x-vfio-user-server object must be used with x-remote machine only.
  19. * This server could only support PCI devices for now.
  20. *
  21. * type - SocketAddress type - presently "unix" alone is supported. Required
  22. * option
  23. *
  24. * path - named unix socket, it will be created by the server. It is
  25. * a required option
  26. *
  27. * device - id of a device on the server, a required option. PCI devices
  28. * alone are supported presently.
  29. *
  30. * notes - x-vfio-user-server could block IO and monitor during the
  31. * initialization phase.
  32. */
  33. #include "qemu/osdep.h"
  34. #include "qom/object.h"
  35. #include "qom/object_interfaces.h"
  36. #include "qemu/error-report.h"
  37. #include "trace.h"
  38. #include "sysemu/runstate.h"
  39. #include "hw/boards.h"
  40. #include "hw/remote/machine.h"
  41. #include "qapi/error.h"
  42. #include "qapi/qapi-visit-sockets.h"
  43. #include "qapi/qapi-events-misc.h"
  44. #include "qemu/notify.h"
  45. #include "qemu/thread.h"
  46. #include "qemu/main-loop.h"
  47. #include "sysemu/sysemu.h"
  48. #include "libvfio-user.h"
  49. #include "hw/qdev-core.h"
  50. #include "hw/pci/pci.h"
  51. #include "qemu/timer.h"
  52. #include "exec/memory.h"
  53. #include "hw/pci/msi.h"
  54. #include "hw/pci/msix.h"
  55. #include "hw/remote/vfio-user-obj.h"
  56. #define TYPE_VFU_OBJECT "x-vfio-user-server"
  57. OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
  58. /**
  59. * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown
  60. * is set, it aborts the machine on error. Otherwise, it logs an
  61. * error message without aborting.
  62. */
  63. #define VFU_OBJECT_ERROR(o, fmt, ...) \
  64. { \
  65. if (vfu_object_auto_shutdown()) { \
  66. error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
  67. } else { \
  68. error_report((fmt), ## __VA_ARGS__); \
  69. } \
  70. } \
  71. struct VfuObjectClass {
  72. ObjectClass parent_class;
  73. unsigned int nr_devs;
  74. };
  75. struct VfuObject {
  76. /* private */
  77. Object parent;
  78. SocketAddress *socket;
  79. char *device;
  80. Error *err;
  81. Notifier machine_done;
  82. vfu_ctx_t *vfu_ctx;
  83. PCIDevice *pci_dev;
  84. Error *unplug_blocker;
  85. int vfu_poll_fd;
  86. MSITriggerFunc *default_msi_trigger;
  87. MSIPrepareMessageFunc *default_msi_prepare_message;
  88. MSIxPrepareMessageFunc *default_msix_prepare_message;
  89. };
  90. static void vfu_object_init_ctx(VfuObject *o, Error **errp);
  91. static bool vfu_object_auto_shutdown(void)
  92. {
  93. bool auto_shutdown = true;
  94. Error *local_err = NULL;
  95. if (!current_machine) {
  96. return auto_shutdown;
  97. }
  98. auto_shutdown = object_property_get_bool(OBJECT(current_machine),
  99. "auto-shutdown",
  100. &local_err);
  101. /*
  102. * local_err would be set if no such property exists - safe to ignore.
  103. * Unlikely scenario as auto-shutdown is always defined for
  104. * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with
  105. * TYPE_REMOTE_MACHINE
  106. */
  107. if (local_err) {
  108. auto_shutdown = true;
  109. error_free(local_err);
  110. }
  111. return auto_shutdown;
  112. }
  113. static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
  114. void *opaque, Error **errp)
  115. {
  116. VfuObject *o = VFU_OBJECT(obj);
  117. if (o->vfu_ctx) {
  118. error_setg(errp, "vfu: Unable to set socket property - server busy");
  119. return;
  120. }
  121. qapi_free_SocketAddress(o->socket);
  122. o->socket = NULL;
  123. visit_type_SocketAddress(v, name, &o->socket, errp);
  124. if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
  125. error_setg(errp, "vfu: Unsupported socket type - %s",
  126. SocketAddressType_str(o->socket->type));
  127. qapi_free_SocketAddress(o->socket);
  128. o->socket = NULL;
  129. return;
  130. }
  131. trace_vfu_prop("socket", o->socket->u.q_unix.path);
  132. vfu_object_init_ctx(o, errp);
  133. }
  134. static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
  135. {
  136. VfuObject *o = VFU_OBJECT(obj);
  137. if (o->vfu_ctx) {
  138. error_setg(errp, "vfu: Unable to set device property - server busy");
  139. return;
  140. }
  141. g_free(o->device);
  142. o->device = g_strdup(str);
  143. trace_vfu_prop("device", str);
  144. vfu_object_init_ctx(o, errp);
  145. }
  146. static void vfu_object_ctx_run(void *opaque)
  147. {
  148. VfuObject *o = opaque;
  149. const char *vfu_id;
  150. char *vfu_path, *pci_dev_path;
  151. int ret = -1;
  152. while (ret != 0) {
  153. ret = vfu_run_ctx(o->vfu_ctx);
  154. if (ret < 0) {
  155. if (errno == EINTR) {
  156. continue;
  157. } else if (errno == ENOTCONN) {
  158. vfu_id = object_get_canonical_path_component(OBJECT(o));
  159. vfu_path = object_get_canonical_path(OBJECT(o));
  160. g_assert(o->pci_dev);
  161. pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
  162. /* o->device is a required property and is non-NULL here */
  163. g_assert(o->device);
  164. qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
  165. o->device, pci_dev_path);
  166. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  167. o->vfu_poll_fd = -1;
  168. object_unparent(OBJECT(o));
  169. g_free(vfu_path);
  170. g_free(pci_dev_path);
  171. break;
  172. } else {
  173. VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
  174. o->device, strerror(errno));
  175. break;
  176. }
  177. }
  178. }
  179. }
  180. static void vfu_object_attach_ctx(void *opaque)
  181. {
  182. VfuObject *o = opaque;
  183. GPollFD pfds[1];
  184. int ret;
  185. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  186. pfds[0].fd = o->vfu_poll_fd;
  187. pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
  188. retry_attach:
  189. ret = vfu_attach_ctx(o->vfu_ctx);
  190. if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
  191. /**
  192. * vfu_object_attach_ctx can block QEMU's main loop
  193. * during attach - the monitor and other IO
  194. * could be unresponsive during this time.
  195. */
  196. (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
  197. goto retry_attach;
  198. } else if (ret < 0) {
  199. VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
  200. o->device, strerror(errno));
  201. return;
  202. }
  203. o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
  204. if (o->vfu_poll_fd < 0) {
  205. VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
  206. return;
  207. }
  208. qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
  209. }
  210. static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
  211. size_t count, loff_t offset,
  212. const bool is_write)
  213. {
  214. VfuObject *o = vfu_get_private(vfu_ctx);
  215. uint32_t pci_access_width = sizeof(uint32_t);
  216. size_t bytes = count;
  217. uint32_t val = 0;
  218. char *ptr = buf;
  219. int len;
  220. /*
  221. * Writes to the BAR registers would trigger an update to the
  222. * global Memory and IO AddressSpaces. But the remote device
  223. * never uses the global AddressSpaces, therefore overlapping
  224. * memory regions are not a problem
  225. */
  226. while (bytes > 0) {
  227. len = (bytes > pci_access_width) ? pci_access_width : bytes;
  228. if (is_write) {
  229. memcpy(&val, ptr, len);
  230. pci_host_config_write_common(o->pci_dev, offset,
  231. pci_config_size(o->pci_dev),
  232. val, len);
  233. trace_vfu_cfg_write(offset, val);
  234. } else {
  235. val = pci_host_config_read_common(o->pci_dev, offset,
  236. pci_config_size(o->pci_dev), len);
  237. memcpy(ptr, &val, len);
  238. trace_vfu_cfg_read(offset, val);
  239. }
  240. offset += len;
  241. ptr += len;
  242. bytes -= len;
  243. }
  244. return count;
  245. }
  246. static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
  247. {
  248. VfuObject *o = vfu_get_private(vfu_ctx);
  249. AddressSpace *dma_as = NULL;
  250. MemoryRegion *subregion = NULL;
  251. g_autofree char *name = NULL;
  252. struct iovec *iov = &info->iova;
  253. if (!info->vaddr) {
  254. return;
  255. }
  256. name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
  257. (uint64_t)info->vaddr);
  258. subregion = g_new0(MemoryRegion, 1);
  259. memory_region_init_ram_ptr(subregion, NULL, name,
  260. iov->iov_len, info->vaddr);
  261. dma_as = pci_device_iommu_address_space(o->pci_dev);
  262. memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
  263. trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
  264. }
  265. static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
  266. {
  267. VfuObject *o = vfu_get_private(vfu_ctx);
  268. AddressSpace *dma_as = NULL;
  269. MemoryRegion *mr = NULL;
  270. ram_addr_t offset;
  271. mr = memory_region_from_host(info->vaddr, &offset);
  272. if (!mr) {
  273. return;
  274. }
  275. dma_as = pci_device_iommu_address_space(o->pci_dev);
  276. memory_region_del_subregion(dma_as->root, mr);
  277. object_unparent((OBJECT(mr)));
  278. trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
  279. }
  280. static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
  281. hwaddr size, const bool is_write)
  282. {
  283. uint8_t *ptr = buf;
  284. bool release_lock = false;
  285. uint8_t *ram_ptr = NULL;
  286. MemTxResult result;
  287. int access_size;
  288. uint64_t val;
  289. if (memory_access_is_direct(mr, is_write)) {
  290. /**
  291. * Some devices expose a PCI expansion ROM, which could be buffer
  292. * based as compared to other regions which are primarily based on
  293. * MemoryRegionOps. memory_region_find() would already check
  294. * for buffer overflow, we don't need to repeat it here.
  295. */
  296. ram_ptr = memory_region_get_ram_ptr(mr);
  297. if (is_write) {
  298. memcpy((ram_ptr + offset), buf, size);
  299. } else {
  300. memcpy(buf, (ram_ptr + offset), size);
  301. }
  302. return 0;
  303. }
  304. while (size) {
  305. /**
  306. * The read/write logic used below is similar to the ones in
  307. * flatview_read/write_continue()
  308. */
  309. release_lock = prepare_mmio_access(mr);
  310. access_size = memory_access_size(mr, size, offset);
  311. if (is_write) {
  312. val = ldn_he_p(ptr, access_size);
  313. result = memory_region_dispatch_write(mr, offset, val,
  314. size_memop(access_size),
  315. MEMTXATTRS_UNSPECIFIED);
  316. } else {
  317. result = memory_region_dispatch_read(mr, offset, &val,
  318. size_memop(access_size),
  319. MEMTXATTRS_UNSPECIFIED);
  320. stn_he_p(ptr, access_size, val);
  321. }
  322. if (release_lock) {
  323. qemu_mutex_unlock_iothread();
  324. release_lock = false;
  325. }
  326. if (result != MEMTX_OK) {
  327. return -1;
  328. }
  329. size -= access_size;
  330. ptr += access_size;
  331. offset += access_size;
  332. }
  333. return 0;
  334. }
  335. static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
  336. hwaddr bar_offset, char * const buf,
  337. hwaddr len, const bool is_write)
  338. {
  339. MemoryRegionSection section = { 0 };
  340. uint8_t *ptr = (uint8_t *)buf;
  341. MemoryRegion *section_mr = NULL;
  342. uint64_t section_size;
  343. hwaddr section_offset;
  344. hwaddr size = 0;
  345. while (len) {
  346. section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
  347. bar_offset, len);
  348. if (!section.mr) {
  349. warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
  350. return size;
  351. }
  352. section_mr = section.mr;
  353. section_offset = section.offset_within_region;
  354. section_size = int128_get64(section.size);
  355. if (is_write && section_mr->readonly) {
  356. warn_report("vfu: attempting to write to readonly region in "
  357. "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
  358. pci_bar, bar_offset,
  359. (bar_offset + section_size));
  360. memory_region_unref(section_mr);
  361. return size;
  362. }
  363. if (vfu_object_mr_rw(section_mr, ptr, section_offset,
  364. section_size, is_write)) {
  365. warn_report("vfu: failed to %s "
  366. "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
  367. is_write ? "write to" : "read from", bar_offset,
  368. (bar_offset + section_size), pci_bar);
  369. memory_region_unref(section_mr);
  370. return size;
  371. }
  372. size += section_size;
  373. bar_offset += section_size;
  374. ptr += section_size;
  375. len -= section_size;
  376. memory_region_unref(section_mr);
  377. }
  378. return size;
  379. }
  380. /**
  381. * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
  382. *
  383. * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
  384. * define vfu_object_bar2_handler
  385. */
  386. #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
  387. static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
  388. char * const buf, size_t count, \
  389. loff_t offset, const bool is_write) \
  390. { \
  391. VfuObject *o = vfu_get_private(vfu_ctx); \
  392. PCIDevice *pci_dev = o->pci_dev; \
  393. \
  394. return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
  395. buf, count, is_write); \
  396. } \
  397. VFU_OBJECT_BAR_HANDLER(0)
  398. VFU_OBJECT_BAR_HANDLER(1)
  399. VFU_OBJECT_BAR_HANDLER(2)
  400. VFU_OBJECT_BAR_HANDLER(3)
  401. VFU_OBJECT_BAR_HANDLER(4)
  402. VFU_OBJECT_BAR_HANDLER(5)
  403. VFU_OBJECT_BAR_HANDLER(6)
  404. static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
  405. &vfu_object_bar0_handler,
  406. &vfu_object_bar1_handler,
  407. &vfu_object_bar2_handler,
  408. &vfu_object_bar3_handler,
  409. &vfu_object_bar4_handler,
  410. &vfu_object_bar5_handler,
  411. &vfu_object_bar6_handler,
  412. };
  413. /**
  414. * vfu_object_register_bars - Identify active BAR regions of pdev and setup
  415. * callbacks to handle read/write accesses
  416. */
  417. static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
  418. {
  419. int flags = VFU_REGION_FLAG_RW;
  420. int i;
  421. for (i = 0; i < PCI_NUM_REGIONS; i++) {
  422. if (!pdev->io_regions[i].size) {
  423. continue;
  424. }
  425. if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
  426. pdev->io_regions[i].memory->readonly) {
  427. flags &= ~VFU_REGION_FLAG_WRITE;
  428. }
  429. vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
  430. (size_t)pdev->io_regions[i].size,
  431. vfu_object_bar_handlers[i],
  432. flags, NULL, 0, -1, 0);
  433. trace_vfu_bar_register(i, pdev->io_regions[i].addr,
  434. pdev->io_regions[i].size);
  435. }
  436. }
  437. static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
  438. {
  439. int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
  440. pci_dev->devfn);
  441. return pci_bdf;
  442. }
  443. static void vfu_object_set_irq(void *opaque, int pirq, int level)
  444. {
  445. PCIBus *pci_bus = opaque;
  446. PCIDevice *pci_dev = NULL;
  447. vfu_ctx_t *vfu_ctx = NULL;
  448. int pci_bus_num, devfn;
  449. if (level) {
  450. pci_bus_num = PCI_BUS_NUM(pirq);
  451. devfn = PCI_BDF_TO_DEVFN(pirq);
  452. /*
  453. * pci_find_device() performs at O(1) if the device is attached
  454. * to the root PCI bus. Whereas, if the device is attached to a
  455. * secondary PCI bus (such as when a root port is involved),
  456. * finding the parent PCI bus could take O(n)
  457. */
  458. pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
  459. vfu_ctx = pci_dev->irq_opaque;
  460. g_assert(vfu_ctx);
  461. vfu_irq_trigger(vfu_ctx, 0);
  462. }
  463. }
  464. static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
  465. unsigned int vector)
  466. {
  467. MSIMessage msg;
  468. msg.address = 0;
  469. msg.data = vector;
  470. return msg;
  471. }
  472. static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
  473. {
  474. vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
  475. vfu_irq_trigger(vfu_ctx, msg.data);
  476. }
  477. static void vfu_object_setup_msi_cbs(VfuObject *o)
  478. {
  479. o->default_msi_trigger = o->pci_dev->msi_trigger;
  480. o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
  481. o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
  482. o->pci_dev->msi_trigger = vfu_object_msi_trigger;
  483. o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
  484. o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
  485. }
  486. static void vfu_object_restore_msi_cbs(VfuObject *o)
  487. {
  488. o->pci_dev->msi_trigger = o->default_msi_trigger;
  489. o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
  490. o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
  491. }
  492. static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
  493. uint32_t count, bool mask)
  494. {
  495. VfuObject *o = vfu_get_private(vfu_ctx);
  496. uint32_t vector;
  497. for (vector = start; vector < count; vector++) {
  498. msix_set_mask(o->pci_dev, vector, mask);
  499. }
  500. }
  501. static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
  502. uint32_t count, bool mask)
  503. {
  504. VfuObject *o = vfu_get_private(vfu_ctx);
  505. Error *err = NULL;
  506. uint32_t vector;
  507. for (vector = start; vector < count; vector++) {
  508. msi_set_mask(o->pci_dev, vector, mask, &err);
  509. if (err) {
  510. VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
  511. error_get_pretty(err));
  512. error_free(err);
  513. err = NULL;
  514. }
  515. }
  516. }
  517. static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
  518. {
  519. vfu_ctx_t *vfu_ctx = o->vfu_ctx;
  520. int ret;
  521. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
  522. if (ret < 0) {
  523. return ret;
  524. }
  525. if (msix_nr_vectors_allocated(pci_dev)) {
  526. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
  527. msix_nr_vectors_allocated(pci_dev));
  528. vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
  529. &vfu_msix_irq_state);
  530. } else if (msi_nr_vectors_allocated(pci_dev)) {
  531. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
  532. msi_nr_vectors_allocated(pci_dev));
  533. vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
  534. &vfu_msi_irq_state);
  535. }
  536. if (ret < 0) {
  537. return ret;
  538. }
  539. vfu_object_setup_msi_cbs(o);
  540. pci_dev->irq_opaque = vfu_ctx;
  541. return 0;
  542. }
  543. void vfu_object_set_bus_irq(PCIBus *pci_bus)
  544. {
  545. int bus_num = pci_bus_num(pci_bus);
  546. int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
  547. pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf);
  548. pci_bus_map_irqs(pci_bus, vfu_object_map_irq);
  549. }
  550. static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
  551. {
  552. VfuObject *o = vfu_get_private(vfu_ctx);
  553. /* vfu_object_ctx_run() handles lost connection */
  554. if (type == VFU_RESET_LOST_CONN) {
  555. return 0;
  556. }
  557. device_cold_reset(DEVICE(o->pci_dev));
  558. return 0;
  559. }
  560. /*
  561. * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  562. * properties. It also depends on devices instantiated in QEMU. These
  563. * dependencies are not available during the instance_init phase of this
  564. * object's life-cycle. As such, the server is initialized after the
  565. * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
  566. * when the machine is setup, and the dependencies are available.
  567. */
  568. static void vfu_object_machine_done(Notifier *notifier, void *data)
  569. {
  570. VfuObject *o = container_of(notifier, VfuObject, machine_done);
  571. Error *err = NULL;
  572. vfu_object_init_ctx(o, &err);
  573. if (err) {
  574. error_propagate(&error_abort, err);
  575. }
  576. }
  577. /**
  578. * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
  579. * an unplug blocker for the associated PCI device. Setup a FD handler
  580. * to process incoming messages in the context's socket.
  581. *
  582. * The socket and device properties are mandatory, and this function
  583. * will not create the context without them - the setters for these
  584. * properties should call this function when the property is set. The
  585. * machine should also be ready when this function is invoked - it is
  586. * because QEMU objects are initialized before devices, and the
  587. * associated PCI device wouldn't be available at the object
  588. * initialization time. Until these conditions are satisfied, this
  589. * function would return early without performing any task.
  590. */
  591. static void vfu_object_init_ctx(VfuObject *o, Error **errp)
  592. {
  593. DeviceState *dev = NULL;
  594. vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
  595. int ret;
  596. if (o->vfu_ctx || !o->socket || !o->device ||
  597. !phase_check(PHASE_MACHINE_READY)) {
  598. return;
  599. }
  600. if (o->err) {
  601. error_propagate(errp, o->err);
  602. o->err = NULL;
  603. return;
  604. }
  605. o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
  606. LIBVFIO_USER_FLAG_ATTACH_NB,
  607. o, VFU_DEV_TYPE_PCI);
  608. if (o->vfu_ctx == NULL) {
  609. error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
  610. return;
  611. }
  612. dev = qdev_find_recursive(sysbus_get_default(), o->device);
  613. if (dev == NULL) {
  614. error_setg(errp, "vfu: Device %s not found", o->device);
  615. goto fail;
  616. }
  617. if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  618. error_setg(errp, "vfu: %s not a PCI device", o->device);
  619. goto fail;
  620. }
  621. o->pci_dev = PCI_DEVICE(dev);
  622. object_ref(OBJECT(o->pci_dev));
  623. if (pci_is_express(o->pci_dev)) {
  624. pci_type = VFU_PCI_TYPE_EXPRESS;
  625. }
  626. ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
  627. if (ret < 0) {
  628. error_setg(errp,
  629. "vfu: Failed to attach PCI device %s to context - %s",
  630. o->device, strerror(errno));
  631. goto fail;
  632. }
  633. error_setg(&o->unplug_blocker,
  634. "vfu: %s for %s must be deleted before unplugging",
  635. TYPE_VFU_OBJECT, o->device);
  636. qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  637. ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
  638. pci_config_size(o->pci_dev), &vfu_object_cfg_access,
  639. VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
  640. NULL, 0, -1, 0);
  641. if (ret < 0) {
  642. error_setg(errp,
  643. "vfu: Failed to setup config space handlers for %s- %s",
  644. o->device, strerror(errno));
  645. goto fail;
  646. }
  647. ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
  648. if (ret < 0) {
  649. error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
  650. o->device);
  651. goto fail;
  652. }
  653. vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
  654. ret = vfu_object_setup_irqs(o, o->pci_dev);
  655. if (ret < 0) {
  656. error_setg(errp, "vfu: Failed to setup interrupts for %s",
  657. o->device);
  658. goto fail;
  659. }
  660. ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
  661. if (ret < 0) {
  662. error_setg(errp, "vfu: Failed to setup reset callback");
  663. goto fail;
  664. }
  665. ret = vfu_realize_ctx(o->vfu_ctx);
  666. if (ret < 0) {
  667. error_setg(errp, "vfu: Failed to realize device %s- %s",
  668. o->device, strerror(errno));
  669. goto fail;
  670. }
  671. o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
  672. if (o->vfu_poll_fd < 0) {
  673. error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
  674. goto fail;
  675. }
  676. qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
  677. return;
  678. fail:
  679. vfu_destroy_ctx(o->vfu_ctx);
  680. if (o->unplug_blocker && o->pci_dev) {
  681. qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  682. error_free(o->unplug_blocker);
  683. o->unplug_blocker = NULL;
  684. }
  685. if (o->pci_dev) {
  686. vfu_object_restore_msi_cbs(o);
  687. o->pci_dev->irq_opaque = NULL;
  688. object_unref(OBJECT(o->pci_dev));
  689. o->pci_dev = NULL;
  690. }
  691. o->vfu_ctx = NULL;
  692. }
  693. static void vfu_object_init(Object *obj)
  694. {
  695. VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
  696. VfuObject *o = VFU_OBJECT(obj);
  697. k->nr_devs++;
  698. if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
  699. error_setg(&o->err, "vfu: %s only compatible with %s machine",
  700. TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
  701. return;
  702. }
  703. if (!phase_check(PHASE_MACHINE_READY)) {
  704. o->machine_done.notify = vfu_object_machine_done;
  705. qemu_add_machine_init_done_notifier(&o->machine_done);
  706. }
  707. o->vfu_poll_fd = -1;
  708. }
  709. static void vfu_object_finalize(Object *obj)
  710. {
  711. VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
  712. VfuObject *o = VFU_OBJECT(obj);
  713. k->nr_devs--;
  714. qapi_free_SocketAddress(o->socket);
  715. o->socket = NULL;
  716. if (o->vfu_poll_fd != -1) {
  717. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  718. o->vfu_poll_fd = -1;
  719. }
  720. if (o->vfu_ctx) {
  721. vfu_destroy_ctx(o->vfu_ctx);
  722. o->vfu_ctx = NULL;
  723. }
  724. g_free(o->device);
  725. o->device = NULL;
  726. if (o->unplug_blocker && o->pci_dev) {
  727. qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  728. error_free(o->unplug_blocker);
  729. o->unplug_blocker = NULL;
  730. }
  731. if (o->pci_dev) {
  732. vfu_object_restore_msi_cbs(o);
  733. o->pci_dev->irq_opaque = NULL;
  734. object_unref(OBJECT(o->pci_dev));
  735. o->pci_dev = NULL;
  736. }
  737. if (!k->nr_devs && vfu_object_auto_shutdown()) {
  738. qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
  739. }
  740. if (o->machine_done.notify) {
  741. qemu_remove_machine_init_done_notifier(&o->machine_done);
  742. o->machine_done.notify = NULL;
  743. }
  744. }
  745. static void vfu_object_class_init(ObjectClass *klass, void *data)
  746. {
  747. VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
  748. k->nr_devs = 0;
  749. object_class_property_add(klass, "socket", "SocketAddress", NULL,
  750. vfu_object_set_socket, NULL, NULL);
  751. object_class_property_set_description(klass, "socket",
  752. "SocketAddress "
  753. "(ex: type=unix,path=/tmp/sock). "
  754. "Only UNIX is presently supported");
  755. object_class_property_add_str(klass, "device", NULL,
  756. vfu_object_set_device);
  757. object_class_property_set_description(klass, "device",
  758. "device ID - only PCI devices "
  759. "are presently supported");
  760. }
  761. static const TypeInfo vfu_object_info = {
  762. .name = TYPE_VFU_OBJECT,
  763. .parent = TYPE_OBJECT,
  764. .instance_size = sizeof(VfuObject),
  765. .instance_init = vfu_object_init,
  766. .instance_finalize = vfu_object_finalize,
  767. .class_size = sizeof(VfuObjectClass),
  768. .class_init = vfu_object_class_init,
  769. .interfaces = (InterfaceInfo[]) {
  770. { TYPE_USER_CREATABLE },
  771. { }
  772. }
  773. };
  774. static void vfu_register_types(void)
  775. {
  776. type_register_static(&vfu_object_info);
  777. }
  778. type_init(vfu_register_types);