vfio-user-obj.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. /**
  2. * QEMU vfio-user-server server object
  3. *
  4. * Copyright © 2022 Oracle and/or its affiliates.
  5. *
  6. * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
  7. *
  8. * See the COPYING file in the top-level directory.
  9. *
  10. */
  11. /**
  12. * Usage: add options:
  13. * -machine x-remote,vfio-user=on,auto-shutdown=on
  14. * -device <PCI-device>,id=<pci-dev-id>
  15. * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
  16. * device=<pci-dev-id>
  17. *
  18. * Note that x-vfio-user-server object must be used with x-remote machine only.
  19. * This server could only support PCI devices for now.
  20. *
  21. * type - SocketAddress type - presently "unix" alone is supported. Required
  22. * option
  23. *
  24. * path - named unix socket, it will be created by the server. It is
  25. * a required option
  26. *
  27. * device - id of a device on the server, a required option. PCI devices
  28. * alone are supported presently.
  29. *
  30. * notes - x-vfio-user-server could block IO and monitor during the
  31. * initialization phase.
  32. *
  33. * When x-remote machine has the auto-shutdown property
  34. * enabled (default), x-vfio-user-server terminates after the last
  35. * client disconnects. Otherwise, it will continue running until
  36. * explicitly killed.
  37. */
  38. #include "qemu/osdep.h"
  39. #include "qom/object.h"
  40. #include "qom/object_interfaces.h"
  41. #include "qemu/error-report.h"
  42. #include "trace.h"
  43. #include "system/runstate.h"
  44. #include "hw/boards.h"
  45. #include "hw/remote/machine.h"
  46. #include "qapi/error.h"
  47. #include "qapi/qapi-visit-sockets.h"
  48. #include "qapi/qapi-events-misc.h"
  49. #include "qemu/notify.h"
  50. #include "qemu/thread.h"
  51. #include "qemu/main-loop.h"
  52. #include "system/system.h"
  53. #include "libvfio-user.h"
  54. #include "hw/qdev-core.h"
  55. #include "hw/pci/pci.h"
  56. #include "qemu/timer.h"
  57. #include "exec/memory.h"
  58. #include "hw/pci/msi.h"
  59. #include "hw/pci/msix.h"
  60. #include "hw/remote/vfio-user-obj.h"
  61. #define TYPE_VFU_OBJECT "x-vfio-user-server"
  62. OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
  63. /**
  64. * VFU_OBJECT_ERROR - reports an error message.
  65. *
  66. * If auto_shutdown is set, it aborts the machine on error. Otherwise,
  67. * it logs an error message without aborting. auto_shutdown is disabled
  68. * when the server serves clients from multiple VMs; as such, an error
  69. * from one VM shouldn't be able to disrupt other VM's services.
  70. */
  71. #define VFU_OBJECT_ERROR(o, fmt, ...) \
  72. { \
  73. if (vfu_object_auto_shutdown()) { \
  74. error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
  75. } else { \
  76. error_report((fmt), ## __VA_ARGS__); \
  77. } \
  78. } \
  79. struct VfuObjectClass {
  80. ObjectClass parent_class;
  81. unsigned int nr_devs;
  82. };
  83. struct VfuObject {
  84. /* private */
  85. Object parent;
  86. SocketAddress *socket;
  87. char *device;
  88. Error *err;
  89. Notifier machine_done;
  90. vfu_ctx_t *vfu_ctx;
  91. PCIDevice *pci_dev;
  92. Error *unplug_blocker;
  93. int vfu_poll_fd;
  94. MSITriggerFunc *default_msi_trigger;
  95. MSIPrepareMessageFunc *default_msi_prepare_message;
  96. MSIxPrepareMessageFunc *default_msix_prepare_message;
  97. };
  98. static void vfu_object_init_ctx(VfuObject *o, Error **errp);
  99. static bool vfu_object_auto_shutdown(void)
  100. {
  101. bool auto_shutdown = true;
  102. Error *local_err = NULL;
  103. if (!current_machine) {
  104. return auto_shutdown;
  105. }
  106. auto_shutdown = object_property_get_bool(OBJECT(current_machine),
  107. "auto-shutdown",
  108. &local_err);
  109. /*
  110. * local_err would be set if no such property exists - safe to ignore.
  111. * Unlikely scenario as auto-shutdown is always defined for
  112. * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with
  113. * TYPE_REMOTE_MACHINE
  114. */
  115. if (local_err) {
  116. auto_shutdown = true;
  117. error_free(local_err);
  118. }
  119. return auto_shutdown;
  120. }
  121. static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
  122. void *opaque, Error **errp)
  123. {
  124. VfuObject *o = VFU_OBJECT(obj);
  125. if (o->vfu_ctx) {
  126. error_setg(errp, "vfu: Unable to set socket property - server busy");
  127. return;
  128. }
  129. qapi_free_SocketAddress(o->socket);
  130. o->socket = NULL;
  131. visit_type_SocketAddress(v, name, &o->socket, errp);
  132. if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
  133. error_setg(errp, "vfu: Unsupported socket type - %s",
  134. SocketAddressType_str(o->socket->type));
  135. qapi_free_SocketAddress(o->socket);
  136. o->socket = NULL;
  137. return;
  138. }
  139. trace_vfu_prop("socket", o->socket->u.q_unix.path);
  140. vfu_object_init_ctx(o, errp);
  141. }
  142. static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
  143. {
  144. VfuObject *o = VFU_OBJECT(obj);
  145. if (o->vfu_ctx) {
  146. error_setg(errp, "vfu: Unable to set device property - server busy");
  147. return;
  148. }
  149. g_free(o->device);
  150. o->device = g_strdup(str);
  151. trace_vfu_prop("device", str);
  152. vfu_object_init_ctx(o, errp);
  153. }
  154. static void vfu_object_ctx_run(void *opaque)
  155. {
  156. VfuObject *o = opaque;
  157. const char *vfu_id;
  158. char *vfu_path, *pci_dev_path;
  159. int ret = -1;
  160. while (ret != 0) {
  161. ret = vfu_run_ctx(o->vfu_ctx);
  162. if (ret < 0) {
  163. if (errno == EINTR) {
  164. continue;
  165. } else if (errno == ENOTCONN) {
  166. vfu_id = object_get_canonical_path_component(OBJECT(o));
  167. vfu_path = object_get_canonical_path(OBJECT(o));
  168. g_assert(o->pci_dev);
  169. pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
  170. /* o->device is a required property and is non-NULL here */
  171. g_assert(o->device);
  172. qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
  173. o->device, pci_dev_path);
  174. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  175. o->vfu_poll_fd = -1;
  176. object_unparent(OBJECT(o));
  177. g_free(vfu_path);
  178. g_free(pci_dev_path);
  179. break;
  180. } else {
  181. VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
  182. o->device, strerror(errno));
  183. break;
  184. }
  185. }
  186. }
  187. }
  188. static void vfu_object_attach_ctx(void *opaque)
  189. {
  190. VfuObject *o = opaque;
  191. GPollFD pfds[1];
  192. int ret;
  193. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  194. pfds[0].fd = o->vfu_poll_fd;
  195. pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
  196. retry_attach:
  197. ret = vfu_attach_ctx(o->vfu_ctx);
  198. if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
  199. /**
  200. * vfu_object_attach_ctx can block QEMU's main loop
  201. * during attach - the monitor and other IO
  202. * could be unresponsive during this time.
  203. */
  204. (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
  205. goto retry_attach;
  206. } else if (ret < 0) {
  207. VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
  208. o->device, strerror(errno));
  209. return;
  210. }
  211. o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
  212. if (o->vfu_poll_fd < 0) {
  213. VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
  214. return;
  215. }
  216. qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
  217. }
  218. static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
  219. size_t count, loff_t offset,
  220. const bool is_write)
  221. {
  222. VfuObject *o = vfu_get_private(vfu_ctx);
  223. uint32_t pci_access_width = sizeof(uint32_t);
  224. size_t bytes = count;
  225. uint32_t val = 0;
  226. char *ptr = buf;
  227. int len;
  228. /*
  229. * Writes to the BAR registers would trigger an update to the
  230. * global Memory and IO AddressSpaces. But the remote device
  231. * never uses the global AddressSpaces, therefore overlapping
  232. * memory regions are not a problem
  233. */
  234. while (bytes > 0) {
  235. len = (bytes > pci_access_width) ? pci_access_width : bytes;
  236. if (is_write) {
  237. val = ldn_le_p(ptr, len);
  238. pci_host_config_write_common(o->pci_dev, offset,
  239. pci_config_size(o->pci_dev),
  240. val, len);
  241. trace_vfu_cfg_write(offset, val);
  242. } else {
  243. val = pci_host_config_read_common(o->pci_dev, offset,
  244. pci_config_size(o->pci_dev), len);
  245. stn_le_p(ptr, len, val);
  246. trace_vfu_cfg_read(offset, val);
  247. }
  248. offset += len;
  249. ptr += len;
  250. bytes -= len;
  251. }
  252. return count;
  253. }
  254. static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
  255. {
  256. VfuObject *o = vfu_get_private(vfu_ctx);
  257. AddressSpace *dma_as = NULL;
  258. MemoryRegion *subregion = NULL;
  259. g_autofree char *name = NULL;
  260. struct iovec *iov = &info->iova;
  261. if (!info->vaddr) {
  262. return;
  263. }
  264. name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
  265. (uint64_t)info->vaddr);
  266. subregion = g_new0(MemoryRegion, 1);
  267. memory_region_init_ram_ptr(subregion, NULL, name,
  268. iov->iov_len, info->vaddr);
  269. dma_as = pci_device_iommu_address_space(o->pci_dev);
  270. memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
  271. trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
  272. }
  273. static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
  274. {
  275. VfuObject *o = vfu_get_private(vfu_ctx);
  276. AddressSpace *dma_as = NULL;
  277. MemoryRegion *mr = NULL;
  278. ram_addr_t offset;
  279. mr = memory_region_from_host(info->vaddr, &offset);
  280. if (!mr) {
  281. return;
  282. }
  283. dma_as = pci_device_iommu_address_space(o->pci_dev);
  284. memory_region_del_subregion(dma_as->root, mr);
  285. object_unparent((OBJECT(mr)));
  286. trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
  287. }
  288. static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
  289. hwaddr size, const bool is_write)
  290. {
  291. uint8_t *ptr = buf;
  292. bool release_lock = false;
  293. uint8_t *ram_ptr = NULL;
  294. MemTxResult result;
  295. int access_size;
  296. uint64_t val;
  297. if (memory_access_is_direct(mr, is_write)) {
  298. /**
  299. * Some devices expose a PCI expansion ROM, which could be buffer
  300. * based as compared to other regions which are primarily based on
  301. * MemoryRegionOps. memory_region_find() would already check
  302. * for buffer overflow, we don't need to repeat it here.
  303. */
  304. ram_ptr = memory_region_get_ram_ptr(mr);
  305. if (is_write) {
  306. memcpy((ram_ptr + offset), buf, size);
  307. } else {
  308. memcpy(buf, (ram_ptr + offset), size);
  309. }
  310. return 0;
  311. }
  312. while (size) {
  313. /**
  314. * The read/write logic used below is similar to the ones in
  315. * flatview_read/write_continue()
  316. */
  317. release_lock = prepare_mmio_access(mr);
  318. access_size = memory_access_size(mr, size, offset);
  319. if (is_write) {
  320. val = ldn_he_p(ptr, access_size);
  321. result = memory_region_dispatch_write(mr, offset, val,
  322. size_memop(access_size),
  323. MEMTXATTRS_UNSPECIFIED);
  324. } else {
  325. result = memory_region_dispatch_read(mr, offset, &val,
  326. size_memop(access_size),
  327. MEMTXATTRS_UNSPECIFIED);
  328. stn_he_p(ptr, access_size, val);
  329. }
  330. if (release_lock) {
  331. bql_unlock();
  332. release_lock = false;
  333. }
  334. if (result != MEMTX_OK) {
  335. return -1;
  336. }
  337. size -= access_size;
  338. ptr += access_size;
  339. offset += access_size;
  340. }
  341. return 0;
  342. }
  343. static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
  344. hwaddr bar_offset, char * const buf,
  345. hwaddr len, const bool is_write)
  346. {
  347. MemoryRegionSection section = { 0 };
  348. uint8_t *ptr = (uint8_t *)buf;
  349. MemoryRegion *section_mr = NULL;
  350. uint64_t section_size;
  351. hwaddr section_offset;
  352. hwaddr size = 0;
  353. while (len) {
  354. section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
  355. bar_offset, len);
  356. if (!section.mr) {
  357. warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
  358. return size;
  359. }
  360. section_mr = section.mr;
  361. section_offset = section.offset_within_region;
  362. section_size = int128_get64(section.size);
  363. if (is_write && section_mr->readonly) {
  364. warn_report("vfu: attempting to write to readonly region in "
  365. "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
  366. pci_bar, bar_offset,
  367. (bar_offset + section_size));
  368. memory_region_unref(section_mr);
  369. return size;
  370. }
  371. if (vfu_object_mr_rw(section_mr, ptr, section_offset,
  372. section_size, is_write)) {
  373. warn_report("vfu: failed to %s "
  374. "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
  375. is_write ? "write to" : "read from", bar_offset,
  376. (bar_offset + section_size), pci_bar);
  377. memory_region_unref(section_mr);
  378. return size;
  379. }
  380. size += section_size;
  381. bar_offset += section_size;
  382. ptr += section_size;
  383. len -= section_size;
  384. memory_region_unref(section_mr);
  385. }
  386. return size;
  387. }
  388. /**
  389. * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
  390. *
  391. * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
  392. * define vfu_object_bar2_handler
  393. */
  394. #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
  395. static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
  396. char * const buf, size_t count, \
  397. loff_t offset, const bool is_write) \
  398. { \
  399. VfuObject *o = vfu_get_private(vfu_ctx); \
  400. PCIDevice *pci_dev = o->pci_dev; \
  401. \
  402. return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
  403. buf, count, is_write); \
  404. } \
  405. VFU_OBJECT_BAR_HANDLER(0)
  406. VFU_OBJECT_BAR_HANDLER(1)
  407. VFU_OBJECT_BAR_HANDLER(2)
  408. VFU_OBJECT_BAR_HANDLER(3)
  409. VFU_OBJECT_BAR_HANDLER(4)
  410. VFU_OBJECT_BAR_HANDLER(5)
  411. VFU_OBJECT_BAR_HANDLER(6)
  412. static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
  413. &vfu_object_bar0_handler,
  414. &vfu_object_bar1_handler,
  415. &vfu_object_bar2_handler,
  416. &vfu_object_bar3_handler,
  417. &vfu_object_bar4_handler,
  418. &vfu_object_bar5_handler,
  419. &vfu_object_bar6_handler,
  420. };
  421. /**
  422. * vfu_object_register_bars - Identify active BAR regions of pdev and setup
  423. * callbacks to handle read/write accesses
  424. */
  425. static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
  426. {
  427. int flags = VFU_REGION_FLAG_RW;
  428. int i;
  429. for (i = 0; i < PCI_NUM_REGIONS; i++) {
  430. if (!pdev->io_regions[i].size) {
  431. continue;
  432. }
  433. if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
  434. pdev->io_regions[i].memory->readonly) {
  435. flags &= ~VFU_REGION_FLAG_WRITE;
  436. }
  437. vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
  438. (size_t)pdev->io_regions[i].size,
  439. vfu_object_bar_handlers[i],
  440. flags, NULL, 0, -1, 0);
  441. trace_vfu_bar_register(i, pdev->io_regions[i].addr,
  442. pdev->io_regions[i].size);
  443. }
  444. }
  445. static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
  446. {
  447. int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
  448. pci_dev->devfn);
  449. return pci_bdf;
  450. }
  451. static void vfu_object_set_irq(void *opaque, int pirq, int level)
  452. {
  453. PCIBus *pci_bus = opaque;
  454. PCIDevice *pci_dev = NULL;
  455. vfu_ctx_t *vfu_ctx = NULL;
  456. int pci_bus_num, devfn;
  457. if (level) {
  458. pci_bus_num = PCI_BUS_NUM(pirq);
  459. devfn = PCI_BDF_TO_DEVFN(pirq);
  460. /*
  461. * pci_find_device() performs at O(1) if the device is attached
  462. * to the root PCI bus. Whereas, if the device is attached to a
  463. * secondary PCI bus (such as when a root port is involved),
  464. * finding the parent PCI bus could take O(n)
  465. */
  466. pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
  467. vfu_ctx = pci_dev->irq_opaque;
  468. g_assert(vfu_ctx);
  469. vfu_irq_trigger(vfu_ctx, 0);
  470. }
  471. }
  472. static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
  473. unsigned int vector)
  474. {
  475. MSIMessage msg;
  476. msg.address = 0;
  477. msg.data = vector;
  478. return msg;
  479. }
  480. static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
  481. {
  482. vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
  483. vfu_irq_trigger(vfu_ctx, msg.data);
  484. }
  485. static void vfu_object_setup_msi_cbs(VfuObject *o)
  486. {
  487. o->default_msi_trigger = o->pci_dev->msi_trigger;
  488. o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
  489. o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
  490. o->pci_dev->msi_trigger = vfu_object_msi_trigger;
  491. o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
  492. o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
  493. }
  494. static void vfu_object_restore_msi_cbs(VfuObject *o)
  495. {
  496. o->pci_dev->msi_trigger = o->default_msi_trigger;
  497. o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
  498. o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
  499. }
  500. static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
  501. uint32_t count, bool mask)
  502. {
  503. VfuObject *o = vfu_get_private(vfu_ctx);
  504. uint32_t vector;
  505. for (vector = start; vector < count; vector++) {
  506. msix_set_mask(o->pci_dev, vector, mask);
  507. }
  508. }
  509. static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
  510. uint32_t count, bool mask)
  511. {
  512. VfuObject *o = vfu_get_private(vfu_ctx);
  513. Error *err = NULL;
  514. uint32_t vector;
  515. for (vector = start; vector < count; vector++) {
  516. msi_set_mask(o->pci_dev, vector, mask, &err);
  517. if (err) {
  518. VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
  519. error_get_pretty(err));
  520. error_free(err);
  521. err = NULL;
  522. }
  523. }
  524. }
  525. static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
  526. {
  527. vfu_ctx_t *vfu_ctx = o->vfu_ctx;
  528. int ret;
  529. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
  530. if (ret < 0) {
  531. return ret;
  532. }
  533. if (msix_nr_vectors_allocated(pci_dev)) {
  534. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
  535. msix_nr_vectors_allocated(pci_dev));
  536. vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
  537. &vfu_msix_irq_state);
  538. } else if (msi_nr_vectors_allocated(pci_dev)) {
  539. ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
  540. msi_nr_vectors_allocated(pci_dev));
  541. vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
  542. &vfu_msi_irq_state);
  543. }
  544. if (ret < 0) {
  545. return ret;
  546. }
  547. vfu_object_setup_msi_cbs(o);
  548. pci_dev->irq_opaque = vfu_ctx;
  549. return 0;
  550. }
  551. void vfu_object_set_bus_irq(PCIBus *pci_bus)
  552. {
  553. int bus_num = pci_bus_num(pci_bus);
  554. int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
  555. pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf);
  556. pci_bus_map_irqs(pci_bus, vfu_object_map_irq);
  557. }
  558. static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
  559. {
  560. VfuObject *o = vfu_get_private(vfu_ctx);
  561. /* vfu_object_ctx_run() handles lost connection */
  562. if (type == VFU_RESET_LOST_CONN) {
  563. return 0;
  564. }
  565. device_cold_reset(DEVICE(o->pci_dev));
  566. return 0;
  567. }
  568. /*
  569. * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  570. * properties. It also depends on devices instantiated in QEMU. These
  571. * dependencies are not available during the instance_init phase of this
  572. * object's life-cycle. As such, the server is initialized after the
  573. * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
  574. * when the machine is setup, and the dependencies are available.
  575. */
  576. static void vfu_object_machine_done(Notifier *notifier, void *data)
  577. {
  578. VfuObject *o = container_of(notifier, VfuObject, machine_done);
  579. Error *err = NULL;
  580. vfu_object_init_ctx(o, &err);
  581. if (err) {
  582. error_propagate(&error_abort, err);
  583. }
  584. }
  585. /**
  586. * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
  587. * an unplug blocker for the associated PCI device. Setup a FD handler
  588. * to process incoming messages in the context's socket.
  589. *
  590. * The socket and device properties are mandatory, and this function
  591. * will not create the context without them - the setters for these
  592. * properties should call this function when the property is set. The
  593. * machine should also be ready when this function is invoked - it is
  594. * because QEMU objects are initialized before devices, and the
  595. * associated PCI device wouldn't be available at the object
  596. * initialization time. Until these conditions are satisfied, this
  597. * function would return early without performing any task.
  598. */
  599. static void vfu_object_init_ctx(VfuObject *o, Error **errp)
  600. {
  601. DeviceState *dev = NULL;
  602. vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
  603. int ret;
  604. if (o->vfu_ctx || !o->socket || !o->device ||
  605. !phase_check(PHASE_MACHINE_READY)) {
  606. return;
  607. }
  608. if (o->err) {
  609. error_propagate(errp, o->err);
  610. o->err = NULL;
  611. return;
  612. }
  613. o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
  614. LIBVFIO_USER_FLAG_ATTACH_NB,
  615. o, VFU_DEV_TYPE_PCI);
  616. if (o->vfu_ctx == NULL) {
  617. error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
  618. return;
  619. }
  620. dev = qdev_find_recursive(sysbus_get_default(), o->device);
  621. if (dev == NULL) {
  622. error_setg(errp, "vfu: Device %s not found", o->device);
  623. goto fail;
  624. }
  625. if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  626. error_setg(errp, "vfu: %s not a PCI device", o->device);
  627. goto fail;
  628. }
  629. o->pci_dev = PCI_DEVICE(dev);
  630. object_ref(OBJECT(o->pci_dev));
  631. if (pci_is_express(o->pci_dev)) {
  632. pci_type = VFU_PCI_TYPE_EXPRESS;
  633. }
  634. ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
  635. if (ret < 0) {
  636. error_setg(errp,
  637. "vfu: Failed to attach PCI device %s to context - %s",
  638. o->device, strerror(errno));
  639. goto fail;
  640. }
  641. error_setg(&o->unplug_blocker,
  642. "vfu: %s for %s must be deleted before unplugging",
  643. TYPE_VFU_OBJECT, o->device);
  644. qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  645. ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
  646. pci_config_size(o->pci_dev), &vfu_object_cfg_access,
  647. VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
  648. NULL, 0, -1, 0);
  649. if (ret < 0) {
  650. error_setg(errp,
  651. "vfu: Failed to setup config space handlers for %s- %s",
  652. o->device, strerror(errno));
  653. goto fail;
  654. }
  655. ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
  656. if (ret < 0) {
  657. error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
  658. o->device);
  659. goto fail;
  660. }
  661. vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
  662. ret = vfu_object_setup_irqs(o, o->pci_dev);
  663. if (ret < 0) {
  664. error_setg(errp, "vfu: Failed to setup interrupts for %s",
  665. o->device);
  666. goto fail;
  667. }
  668. ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
  669. if (ret < 0) {
  670. error_setg(errp, "vfu: Failed to setup reset callback");
  671. goto fail;
  672. }
  673. ret = vfu_realize_ctx(o->vfu_ctx);
  674. if (ret < 0) {
  675. error_setg(errp, "vfu: Failed to realize device %s- %s",
  676. o->device, strerror(errno));
  677. goto fail;
  678. }
  679. o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
  680. if (o->vfu_poll_fd < 0) {
  681. error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
  682. goto fail;
  683. }
  684. qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
  685. return;
  686. fail:
  687. vfu_destroy_ctx(o->vfu_ctx);
  688. if (o->unplug_blocker && o->pci_dev) {
  689. qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  690. error_free(o->unplug_blocker);
  691. o->unplug_blocker = NULL;
  692. }
  693. if (o->pci_dev) {
  694. vfu_object_restore_msi_cbs(o);
  695. o->pci_dev->irq_opaque = NULL;
  696. object_unref(OBJECT(o->pci_dev));
  697. o->pci_dev = NULL;
  698. }
  699. o->vfu_ctx = NULL;
  700. }
  701. static void vfu_object_init(Object *obj)
  702. {
  703. VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
  704. VfuObject *o = VFU_OBJECT(obj);
  705. k->nr_devs++;
  706. if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
  707. error_setg(&o->err, "vfu: %s only compatible with %s machine",
  708. TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
  709. return;
  710. }
  711. if (!phase_check(PHASE_MACHINE_READY)) {
  712. o->machine_done.notify = vfu_object_machine_done;
  713. qemu_add_machine_init_done_notifier(&o->machine_done);
  714. }
  715. o->vfu_poll_fd = -1;
  716. }
  717. static void vfu_object_finalize(Object *obj)
  718. {
  719. VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
  720. VfuObject *o = VFU_OBJECT(obj);
  721. k->nr_devs--;
  722. qapi_free_SocketAddress(o->socket);
  723. o->socket = NULL;
  724. if (o->vfu_poll_fd != -1) {
  725. qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
  726. o->vfu_poll_fd = -1;
  727. }
  728. if (o->vfu_ctx) {
  729. vfu_destroy_ctx(o->vfu_ctx);
  730. o->vfu_ctx = NULL;
  731. }
  732. g_free(o->device);
  733. o->device = NULL;
  734. if (o->unplug_blocker && o->pci_dev) {
  735. qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
  736. error_free(o->unplug_blocker);
  737. o->unplug_blocker = NULL;
  738. }
  739. if (o->pci_dev) {
  740. vfu_object_restore_msi_cbs(o);
  741. o->pci_dev->irq_opaque = NULL;
  742. object_unref(OBJECT(o->pci_dev));
  743. o->pci_dev = NULL;
  744. }
  745. if (!k->nr_devs && vfu_object_auto_shutdown()) {
  746. qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
  747. }
  748. if (o->machine_done.notify) {
  749. qemu_remove_machine_init_done_notifier(&o->machine_done);
  750. o->machine_done.notify = NULL;
  751. }
  752. }
  753. static void vfu_object_class_init(ObjectClass *klass, void *data)
  754. {
  755. VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
  756. k->nr_devs = 0;
  757. object_class_property_add(klass, "socket", "SocketAddress", NULL,
  758. vfu_object_set_socket, NULL, NULL);
  759. object_class_property_set_description(klass, "socket",
  760. "SocketAddress "
  761. "(ex: type=unix,path=/tmp/sock). "
  762. "Only UNIX is presently supported");
  763. object_class_property_add_str(klass, "device", NULL,
  764. vfu_object_set_device);
  765. object_class_property_set_description(klass, "device",
  766. "device ID - only PCI devices "
  767. "are presently supported");
  768. }
  769. static const TypeInfo vfu_object_info = {
  770. .name = TYPE_VFU_OBJECT,
  771. .parent = TYPE_OBJECT,
  772. .instance_size = sizeof(VfuObject),
  773. .instance_init = vfu_object_init,
  774. .instance_finalize = vfu_object_finalize,
  775. .class_size = sizeof(VfuObjectClass),
  776. .class_init = vfu_object_class_init,
  777. .interfaces = (InterfaceInfo[]) {
  778. { TYPE_USER_CREATABLE },
  779. { }
  780. }
  781. };
  782. static void vfu_register_types(void)
  783. {
  784. type_register_static(&vfu_object_info);
  785. }
  786. type_init(vfu_register_types);