2
0

xen_pt.c 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. /*
  2. * Copyright (c) 2007, Neocleus Corporation.
  3. * Copyright (c) 2007, Intel Corporation.
  4. *
  5. * This work is licensed under the terms of the GNU GPL, version 2. See
  6. * the COPYING file in the top-level directory.
  7. *
  8. * Alex Novik <alex@neocleus.com>
  9. * Allen Kay <allen.m.kay@intel.com>
  10. * Guy Zana <guy@neocleus.com>
  11. *
  12. * This file implements direct PCI assignment to a HVM guest
  13. */
  14. /*
  15. * Interrupt Disable policy:
  16. *
  17. * INTx interrupt:
  18. * Initialize(register_real_device)
  19. * Map INTx(xc_physdev_map_pirq):
  20. * <fail>
  21. * - Set real Interrupt Disable bit to '1'.
  22. * - Set machine_irq and assigned_device->machine_irq to '0'.
  23. * * Don't bind INTx.
  24. *
  25. * Bind INTx(xc_domain_bind_pt_pci_irq):
  26. * <fail>
  27. * - Set real Interrupt Disable bit to '1'.
  28. * - Unmap INTx.
  29. * - Decrement xen_pt_mapped_machine_irq[machine_irq]
  30. * - Set assigned_device->machine_irq to '0'.
  31. *
  32. * Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write)
  33. * Write '0'
  34. * - Set real bit to '0' if assigned_device->machine_irq isn't '0'.
  35. *
  36. * Write '1'
  37. * - Set real bit to '1'.
  38. *
  39. * MSI interrupt:
  40. * Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update)
  41. * Bind MSI(xc_domain_update_msi_irq)
  42. * <fail>
  43. * - Unmap MSI.
  44. * - Set dev->msi->pirq to '-1'.
  45. *
  46. * MSI-X interrupt:
  47. * Initialize MSI-X register(xen_pt_msix_update_one)
  48. * Bind MSI-X(xc_domain_update_msi_irq)
  49. * <fail>
  50. * - Unmap MSI-X.
  51. * - Set entry->pirq to '-1'.
  52. */
  53. #include "qemu/osdep.h"
  54. #include "qapi/error.h"
  55. #include <sys/ioctl.h>
  56. #include "hw/pci/pci.h"
  57. #include "hw/pci/pci_bus.h"
  58. #include "hw/qdev-properties.h"
  59. #include "hw/qdev-properties-system.h"
  60. #include "xen_pt.h"
  61. #include "hw/xen/xen.h"
  62. #include "hw/xen/xen-legacy-backend.h"
  63. #include "qemu/range.h"
  64. static bool has_igd_gfx_passthru;
  65. bool xen_igd_gfx_pt_enabled(void)
  66. {
  67. return has_igd_gfx_passthru;
  68. }
  69. void xen_igd_gfx_pt_set(bool value, Error **errp)
  70. {
  71. has_igd_gfx_passthru = value;
  72. }
  73. #define XEN_PT_NR_IRQS (256)
  74. static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0};
  75. void xen_pt_log(const PCIDevice *d, const char *f, ...)
  76. {
  77. va_list ap;
  78. va_start(ap, f);
  79. if (d) {
  80. fprintf(stderr, "[%02x:%02x.%d] ", pci_dev_bus_num(d),
  81. PCI_SLOT(d->devfn), PCI_FUNC(d->devfn));
  82. }
  83. vfprintf(stderr, f, ap);
  84. va_end(ap);
  85. }
  86. /* Config Space */
  87. static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len)
  88. {
  89. /* check offset range */
  90. if (addr > 0xFF) {
  91. XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. "
  92. "(addr: 0x%02x, len: %d)\n", addr, len);
  93. return -1;
  94. }
  95. /* check read size */
  96. if ((len != 1) && (len != 2) && (len != 4)) {
  97. XEN_PT_ERR(d, "Failed to access register with invalid access length. "
  98. "(addr: 0x%02x, len: %d)\n", addr, len);
  99. return -1;
  100. }
  101. /* check offset alignment */
  102. if (addr & (len - 1)) {
  103. XEN_PT_ERR(d, "Failed to access register with invalid access size "
  104. "alignment. (addr: 0x%02x, len: %d)\n", addr, len);
  105. return -1;
  106. }
  107. return 0;
  108. }
  109. int xen_pt_bar_offset_to_index(uint32_t offset)
  110. {
  111. int index = 0;
  112. /* check Exp ROM BAR */
  113. if (offset == PCI_ROM_ADDRESS) {
  114. return PCI_ROM_SLOT;
  115. }
  116. /* calculate BAR index */
  117. index = (offset - PCI_BASE_ADDRESS_0) >> 2;
  118. if (index >= PCI_NUM_REGIONS) {
  119. return -1;
  120. }
  121. return index;
  122. }
  123. static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len)
  124. {
  125. XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
  126. uint32_t val = 0;
  127. XenPTRegGroup *reg_grp_entry = NULL;
  128. XenPTReg *reg_entry = NULL;
  129. int rc = 0;
  130. int emul_len = 0;
  131. uint32_t find_addr = addr;
  132. if (xen_pt_pci_config_access_check(d, addr, len)) {
  133. goto exit;
  134. }
  135. /* find register group entry */
  136. reg_grp_entry = xen_pt_find_reg_grp(s, addr);
  137. if (reg_grp_entry) {
  138. /* check 0-Hardwired register group */
  139. if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
  140. /* no need to emulate, just return 0 */
  141. val = 0;
  142. goto exit;
  143. }
  144. }
  145. /* read I/O device register value */
  146. rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len);
  147. if (rc < 0) {
  148. XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
  149. memset(&val, 0xff, len);
  150. }
  151. /* just return the I/O device register value for
  152. * passthrough type register group */
  153. if (reg_grp_entry == NULL) {
  154. goto exit;
  155. }
  156. /* adjust the read value to appropriate CFC-CFF window */
  157. val <<= (addr & 3) << 3;
  158. emul_len = len;
  159. /* loop around the guest requested size */
  160. while (emul_len > 0) {
  161. /* find register entry to be emulated */
  162. reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
  163. if (reg_entry) {
  164. XenPTRegInfo *reg = reg_entry->reg;
  165. uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
  166. uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
  167. uint8_t *ptr_val = NULL;
  168. valid_mask <<= (find_addr - real_offset) << 3;
  169. ptr_val = (uint8_t *)&val + (real_offset & 3);
  170. /* do emulation based on register size */
  171. switch (reg->size) {
  172. case 1:
  173. if (reg->u.b.read) {
  174. rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask);
  175. }
  176. break;
  177. case 2:
  178. if (reg->u.w.read) {
  179. rc = reg->u.w.read(s, reg_entry,
  180. (uint16_t *)ptr_val, valid_mask);
  181. }
  182. break;
  183. case 4:
  184. if (reg->u.dw.read) {
  185. rc = reg->u.dw.read(s, reg_entry,
  186. (uint32_t *)ptr_val, valid_mask);
  187. }
  188. break;
  189. }
  190. if (rc < 0) {
  191. xen_shutdown_fatal_error("Internal error: Invalid read "
  192. "emulation. (%s, rc: %d)\n",
  193. __func__, rc);
  194. return 0;
  195. }
  196. /* calculate next address to find */
  197. emul_len -= reg->size;
  198. if (emul_len > 0) {
  199. find_addr = real_offset + reg->size;
  200. }
  201. } else {
  202. /* nothing to do with passthrough type register,
  203. * continue to find next byte */
  204. emul_len--;
  205. find_addr++;
  206. }
  207. }
  208. /* need to shift back before returning them to pci bus emulator */
  209. val >>= ((addr & 3) << 3);
  210. exit:
  211. XEN_PT_LOG_CONFIG(d, addr, val, len);
  212. return val;
  213. }
  214. static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr,
  215. uint32_t val, int len)
  216. {
  217. XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
  218. int index = 0;
  219. XenPTRegGroup *reg_grp_entry = NULL;
  220. int rc = 0;
  221. uint32_t read_val = 0, wb_mask;
  222. int emul_len = 0;
  223. XenPTReg *reg_entry = NULL;
  224. uint32_t find_addr = addr;
  225. XenPTRegInfo *reg = NULL;
  226. bool wp_flag = false;
  227. if (xen_pt_pci_config_access_check(d, addr, len)) {
  228. return;
  229. }
  230. XEN_PT_LOG_CONFIG(d, addr, val, len);
  231. /* check unused BAR register */
  232. index = xen_pt_bar_offset_to_index(addr);
  233. if ((index >= 0) && (val != 0)) {
  234. uint32_t chk = val;
  235. if (index == PCI_ROM_SLOT)
  236. chk |= (uint32_t)~PCI_ROM_ADDRESS_MASK;
  237. if ((chk != XEN_PT_BAR_ALLF) &&
  238. (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) {
  239. XEN_PT_WARN(d, "Guest attempt to set address to unused "
  240. "Base Address Register. (addr: 0x%02x, len: %d)\n",
  241. addr, len);
  242. }
  243. }
  244. /* find register group entry */
  245. reg_grp_entry = xen_pt_find_reg_grp(s, addr);
  246. if (reg_grp_entry) {
  247. /* check 0-Hardwired register group */
  248. if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
  249. /* ignore silently */
  250. XEN_PT_WARN(d, "Access to 0-Hardwired register. "
  251. "(addr: 0x%02x, len: %d)\n", addr, len);
  252. return;
  253. }
  254. }
  255. rc = xen_host_pci_get_block(&s->real_device, addr,
  256. (uint8_t *)&read_val, len);
  257. if (rc < 0) {
  258. XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc);
  259. memset(&read_val, 0xff, len);
  260. wb_mask = 0;
  261. } else {
  262. wb_mask = 0xFFFFFFFF >> ((4 - len) << 3);
  263. }
  264. /* pass directly to the real device for passthrough type register group */
  265. if (reg_grp_entry == NULL) {
  266. if (!s->permissive) {
  267. wb_mask = 0;
  268. wp_flag = true;
  269. }
  270. goto out;
  271. }
  272. memory_region_transaction_begin();
  273. pci_default_write_config(d, addr, val, len);
  274. /* adjust the read and write value to appropriate CFC-CFF window */
  275. read_val <<= (addr & 3) << 3;
  276. val <<= (addr & 3) << 3;
  277. emul_len = len;
  278. /* loop around the guest requested size */
  279. while (emul_len > 0) {
  280. /* find register entry to be emulated */
  281. reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr);
  282. if (reg_entry) {
  283. reg = reg_entry->reg;
  284. uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
  285. uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
  286. uint8_t *ptr_val = NULL;
  287. uint32_t wp_mask = reg->emu_mask | reg->ro_mask;
  288. valid_mask <<= (find_addr - real_offset) << 3;
  289. ptr_val = (uint8_t *)&val + (real_offset & 3);
  290. if (!s->permissive) {
  291. wp_mask |= reg->res_mask;
  292. }
  293. if (wp_mask == (0xFFFFFFFF >> ((4 - reg->size) << 3))) {
  294. wb_mask &= ~((wp_mask >> ((find_addr - real_offset) << 3))
  295. << ((len - emul_len) << 3));
  296. }
  297. /* do emulation based on register size */
  298. switch (reg->size) {
  299. case 1:
  300. if (reg->u.b.write) {
  301. rc = reg->u.b.write(s, reg_entry, ptr_val,
  302. read_val >> ((real_offset & 3) << 3),
  303. valid_mask);
  304. }
  305. break;
  306. case 2:
  307. if (reg->u.w.write) {
  308. rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val,
  309. (read_val >> ((real_offset & 3) << 3)),
  310. valid_mask);
  311. }
  312. break;
  313. case 4:
  314. if (reg->u.dw.write) {
  315. rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val,
  316. (read_val >> ((real_offset & 3) << 3)),
  317. valid_mask);
  318. }
  319. break;
  320. }
  321. if (rc < 0) {
  322. xen_shutdown_fatal_error("Internal error: Invalid write"
  323. " emulation. (%s, rc: %d)\n",
  324. __func__, rc);
  325. return;
  326. }
  327. /* calculate next address to find */
  328. emul_len -= reg->size;
  329. if (emul_len > 0) {
  330. find_addr = real_offset + reg->size;
  331. }
  332. } else {
  333. /* nothing to do with passthrough type register,
  334. * continue to find next byte */
  335. if (!s->permissive) {
  336. wb_mask &= ~(0xff << ((len - emul_len) << 3));
  337. /* Unused BARs will make it here, but we don't want to issue
  338. * warnings for writes to them (bogus writes get dealt with
  339. * above).
  340. */
  341. if (index < 0) {
  342. wp_flag = true;
  343. }
  344. }
  345. emul_len--;
  346. find_addr++;
  347. }
  348. }
  349. /* need to shift back before passing them to xen_host_pci_set_block. */
  350. val >>= (addr & 3) << 3;
  351. memory_region_transaction_commit();
  352. out:
  353. if (wp_flag && !s->permissive_warned) {
  354. s->permissive_warned = true;
  355. xen_pt_log(d, "Write-back to unknown field 0x%02x (partially) inhibited (0x%0*x)\n",
  356. addr, len * 2, wb_mask);
  357. xen_pt_log(d, "If the device doesn't work, try enabling permissive mode\n");
  358. xen_pt_log(d, "(unsafe) and if it helps report the problem to xen-devel\n");
  359. }
  360. for (index = 0; wb_mask; index += len) {
  361. /* unknown regs are passed through */
  362. while (!(wb_mask & 0xff)) {
  363. index++;
  364. wb_mask >>= 8;
  365. }
  366. len = 0;
  367. do {
  368. len++;
  369. wb_mask >>= 8;
  370. } while (wb_mask & 0xff);
  371. rc = xen_host_pci_set_block(&s->real_device, addr + index,
  372. (uint8_t *)&val + index, len);
  373. if (rc < 0) {
  374. XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc);
  375. }
  376. }
  377. }
  378. /* register regions */
  379. static uint64_t xen_pt_bar_read(void *o, hwaddr addr,
  380. unsigned size)
  381. {
  382. PCIDevice *d = o;
  383. /* if this function is called, that probably means that there is a
  384. * misconfiguration of the IOMMU. */
  385. XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"HWADDR_FMT_plx"\n",
  386. addr);
  387. return 0;
  388. }
  389. static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val,
  390. unsigned size)
  391. {
  392. PCIDevice *d = o;
  393. /* Same comment as xen_pt_bar_read function */
  394. XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"HWADDR_FMT_plx"\n",
  395. addr);
  396. }
  397. static const MemoryRegionOps ops = {
  398. .endianness = DEVICE_NATIVE_ENDIAN,
  399. .read = xen_pt_bar_read,
  400. .write = xen_pt_bar_write,
  401. };
  402. static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd)
  403. {
  404. int i = 0;
  405. XenHostPCIDevice *d = &s->real_device;
  406. /* Register PIO/MMIO BARs */
  407. for (i = 0; i < PCI_ROM_SLOT; i++) {
  408. XenHostPCIIORegion *r = &d->io_regions[i];
  409. uint8_t type;
  410. if (r->base_addr == 0 || r->size == 0) {
  411. continue;
  412. }
  413. s->bases[i].access.u = r->base_addr;
  414. if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) {
  415. type = PCI_BASE_ADDRESS_SPACE_IO;
  416. *cmd |= PCI_COMMAND_IO;
  417. } else {
  418. type = PCI_BASE_ADDRESS_SPACE_MEMORY;
  419. if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) {
  420. type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
  421. }
  422. if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) {
  423. type |= PCI_BASE_ADDRESS_MEM_TYPE_64;
  424. }
  425. *cmd |= PCI_COMMAND_MEMORY;
  426. }
  427. memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev,
  428. "xen-pci-pt-bar", r->size);
  429. pci_register_bar(&s->dev, i, type, &s->bar[i]);
  430. XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
  431. " base_addr=0x%08"PRIx64" type: 0x%x)\n",
  432. i, r->size, r->base_addr, type);
  433. }
  434. /* Register expansion ROM address */
  435. if (d->rom.base_addr && d->rom.size) {
  436. uint32_t bar_data = 0;
  437. /* Re-set BAR reported by OS, otherwise ROM can't be read. */
  438. if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) {
  439. return 0;
  440. }
  441. if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) {
  442. bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK;
  443. xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data);
  444. }
  445. s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
  446. memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev,
  447. "xen-pci-pt-rom", d->rom.size);
  448. pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH,
  449. &s->rom);
  450. XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64
  451. " base_addr=0x%08"PRIx64")\n",
  452. d->rom.size, d->rom.base_addr);
  453. }
  454. xen_pt_register_vga_regions(d);
  455. return 0;
  456. }
  457. /* region mapping */
  458. static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr)
  459. {
  460. int i = 0;
  461. for (i = 0; i < PCI_NUM_REGIONS - 1; i++) {
  462. if (mr == &s->bar[i]) {
  463. return i;
  464. }
  465. }
  466. if (mr == &s->rom) {
  467. return PCI_ROM_SLOT;
  468. }
  469. return -1;
  470. }
  471. /*
  472. * This function checks if an io_region overlaps an io_region from another
  473. * device. The io_region to check is provided with (addr, size and type)
  474. * A callback can be provided and will be called for every region that is
  475. * overlapped.
  476. * The return value indicates if the region is overlappsed */
  477. struct CheckBarArgs {
  478. XenPCIPassthroughState *s;
  479. pcibus_t addr;
  480. pcibus_t size;
  481. uint8_t type;
  482. bool rc;
  483. };
  484. static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque)
  485. {
  486. struct CheckBarArgs *arg = opaque;
  487. XenPCIPassthroughState *s = arg->s;
  488. uint8_t type = arg->type;
  489. int i;
  490. if (d->devfn == s->dev.devfn) {
  491. return;
  492. }
  493. /* xxx: This ignores bridges. */
  494. for (i = 0; i < PCI_NUM_REGIONS; i++) {
  495. const PCIIORegion *r = &d->io_regions[i];
  496. if (!r->size) {
  497. continue;
  498. }
  499. if ((type & PCI_BASE_ADDRESS_SPACE_IO)
  500. != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) {
  501. continue;
  502. }
  503. if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) {
  504. XEN_PT_WARN(&s->dev,
  505. "Overlapped to device [%02x:%02x.%d] Region: %i"
  506. " (addr: 0x%"FMT_PCIBUS", len: 0x%"FMT_PCIBUS")\n",
  507. pci_bus_num(bus), PCI_SLOT(d->devfn),
  508. PCI_FUNC(d->devfn), i, r->addr, r->size);
  509. arg->rc = true;
  510. }
  511. }
  512. }
  513. static void xen_pt_region_update(XenPCIPassthroughState *s,
  514. MemoryRegionSection *sec, bool adding)
  515. {
  516. PCIDevice *d = &s->dev;
  517. MemoryRegion *mr = sec->mr;
  518. int bar = -1;
  519. int rc;
  520. int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING;
  521. struct CheckBarArgs args = {
  522. .s = s,
  523. .addr = sec->offset_within_address_space,
  524. .size = int128_get64(sec->size),
  525. .rc = false,
  526. };
  527. bar = xen_pt_bar_from_region(s, mr);
  528. if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) {
  529. return;
  530. }
  531. if (s->msix && &s->msix->mmio == mr) {
  532. if (adding) {
  533. s->msix->mmio_base_addr = sec->offset_within_address_space;
  534. rc = xen_pt_msix_update_remap(s, s->msix->bar_index);
  535. }
  536. return;
  537. }
  538. args.type = d->io_regions[bar].type;
  539. pci_for_each_device_under_bus(pci_get_bus(d),
  540. xen_pt_check_bar_overlap, &args);
  541. if (args.rc) {
  542. XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS
  543. ", len: 0x%"FMT_PCIBUS") is overlapped.\n",
  544. bar, sec->offset_within_address_space,
  545. int128_get64(sec->size));
  546. }
  547. if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) {
  548. uint32_t guest_port = sec->offset_within_address_space;
  549. uint32_t machine_port = s->bases[bar].access.pio_base;
  550. uint32_t size = int128_get64(sec->size);
  551. rc = xc_domain_ioport_mapping(xen_xc, xen_domid,
  552. guest_port, machine_port, size,
  553. op);
  554. if (rc) {
  555. XEN_PT_ERR(d, "%s ioport mapping failed! (err: %i)\n",
  556. adding ? "create new" : "remove old", errno);
  557. }
  558. } else {
  559. pcibus_t guest_addr = sec->offset_within_address_space;
  560. pcibus_t machine_addr = s->bases[bar].access.maddr
  561. + sec->offset_within_region;
  562. pcibus_t size = int128_get64(sec->size);
  563. rc = xc_domain_memory_mapping(xen_xc, xen_domid,
  564. XEN_PFN(guest_addr + XC_PAGE_SIZE - 1),
  565. XEN_PFN(machine_addr + XC_PAGE_SIZE - 1),
  566. XEN_PFN(size + XC_PAGE_SIZE - 1),
  567. op);
  568. if (rc) {
  569. XEN_PT_ERR(d, "%s mem mapping failed! (err: %i)\n",
  570. adding ? "create new" : "remove old", errno);
  571. }
  572. }
  573. }
  574. static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec)
  575. {
  576. XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
  577. memory_listener);
  578. memory_region_ref(sec->mr);
  579. xen_pt_region_update(s, sec, true);
  580. }
  581. static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec)
  582. {
  583. XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
  584. memory_listener);
  585. xen_pt_region_update(s, sec, false);
  586. memory_region_unref(sec->mr);
  587. }
  588. static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
  589. {
  590. XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
  591. io_listener);
  592. memory_region_ref(sec->mr);
  593. xen_pt_region_update(s, sec, true);
  594. }
  595. static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec)
  596. {
  597. XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
  598. io_listener);
  599. xen_pt_region_update(s, sec, false);
  600. memory_region_unref(sec->mr);
  601. }
  602. static const MemoryListener xen_pt_memory_listener = {
  603. .name = "xen-pt-mem",
  604. .region_add = xen_pt_region_add,
  605. .region_del = xen_pt_region_del,
  606. .priority = 10,
  607. };
  608. static const MemoryListener xen_pt_io_listener = {
  609. .name = "xen-pt-io",
  610. .region_add = xen_pt_io_region_add,
  611. .region_del = xen_pt_io_region_del,
  612. .priority = 10,
  613. };
  614. /* destroy. */
  615. static void xen_pt_destroy(PCIDevice *d) {
  616. XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
  617. XenHostPCIDevice *host_dev = &s->real_device;
  618. uint8_t machine_irq = s->machine_irq;
  619. uint8_t intx;
  620. int rc;
  621. if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) {
  622. intx = xen_pt_pci_intx(s);
  623. rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
  624. PT_IRQ_TYPE_PCI,
  625. pci_dev_bus_num(d),
  626. PCI_SLOT(s->dev.devfn),
  627. intx,
  628. 0 /* isa_irq */);
  629. if (rc < 0) {
  630. XEN_PT_ERR(d, "unbinding of interrupt INT%c failed."
  631. " (machine irq: %i, err: %d)"
  632. " But bravely continuing on..\n",
  633. 'a' + intx, machine_irq, errno);
  634. }
  635. }
  636. /* N.B. xen_pt_config_delete takes care of freeing them. */
  637. if (s->msi) {
  638. xen_pt_msi_disable(s);
  639. }
  640. if (s->msix) {
  641. xen_pt_msix_disable(s);
  642. }
  643. if (machine_irq) {
  644. xen_pt_mapped_machine_irq[machine_irq]--;
  645. if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
  646. rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq);
  647. if (rc < 0) {
  648. XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)"
  649. " But bravely continuing on..\n",
  650. machine_irq, errno);
  651. }
  652. }
  653. s->machine_irq = 0;
  654. }
  655. /* delete all emulated config registers */
  656. xen_pt_config_delete(s);
  657. xen_pt_unregister_vga_regions(host_dev);
  658. if (s->listener_set) {
  659. memory_listener_unregister(&s->memory_listener);
  660. memory_listener_unregister(&s->io_listener);
  661. s->listener_set = false;
  662. }
  663. if (!xen_host_pci_device_closed(&s->real_device)) {
  664. xen_host_pci_device_put(&s->real_device);
  665. }
  666. }
  667. /* init */
  668. static void xen_pt_realize(PCIDevice *d, Error **errp)
  669. {
  670. ERRP_GUARD();
  671. XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
  672. int i, rc = 0;
  673. uint8_t machine_irq = 0, scratch;
  674. uint16_t cmd = 0;
  675. int pirq = XEN_PT_UNASSIGNED_PIRQ;
  676. /* register real device */
  677. XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d"
  678. " to devfn 0x%x\n",
  679. s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function,
  680. s->dev.devfn);
  681. s->is_virtfn = s->real_device.is_virtfn;
  682. if (s->is_virtfn) {
  683. XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n",
  684. s->real_device.domain, s->real_device.bus,
  685. s->real_device.dev, s->real_device.func);
  686. }
  687. /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
  688. memset(d->config, 0, PCI_CONFIG_SPACE_SIZE);
  689. s->memory_listener = xen_pt_memory_listener;
  690. s->io_listener = xen_pt_io_listener;
  691. /* Setup VGA bios for passthrough GFX */
  692. if ((s->real_device.domain == XEN_PCI_IGD_DOMAIN) &&
  693. (s->real_device.bus == XEN_PCI_IGD_BUS) &&
  694. (s->real_device.dev == XEN_PCI_IGD_DEV) &&
  695. (s->real_device.func == XEN_PCI_IGD_FN)) {
  696. if (!is_igd_vga_passthrough(&s->real_device)) {
  697. error_setg(errp, "Need to enable igd-passthru if you're trying"
  698. " to passthrough IGD GFX");
  699. xen_host_pci_device_put(&s->real_device);
  700. return;
  701. }
  702. xen_pt_setup_vga(s, &s->real_device, errp);
  703. if (*errp) {
  704. error_append_hint(errp, "Setup VGA BIOS of passthrough"
  705. " GFX failed");
  706. xen_host_pci_device_put(&s->real_device);
  707. return;
  708. }
  709. /* Register ISA bridge for passthrough GFX. */
  710. xen_igd_passthrough_isa_bridge_create(s, &s->real_device);
  711. }
  712. /* Handle real device's MMIO/PIO BARs */
  713. xen_pt_register_regions(s, &cmd);
  714. /* reinitialize each config register to be emulated */
  715. xen_pt_config_init(s, errp);
  716. if (*errp) {
  717. error_append_hint(errp, "PCI Config space initialisation failed");
  718. rc = -1;
  719. goto err_out;
  720. }
  721. /* Bind interrupt */
  722. rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch);
  723. if (rc) {
  724. error_setg_errno(errp, errno, "Failed to read PCI_INTERRUPT_PIN");
  725. goto err_out;
  726. }
  727. if (!scratch) {
  728. XEN_PT_LOG(d, "no pin interrupt\n");
  729. goto out;
  730. }
  731. machine_irq = s->real_device.irq;
  732. if (machine_irq == 0) {
  733. XEN_PT_LOG(d, "machine irq is 0\n");
  734. cmd |= PCI_COMMAND_INTX_DISABLE;
  735. goto out;
  736. }
  737. rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
  738. if (rc < 0) {
  739. XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n",
  740. machine_irq, pirq, errno);
  741. /* Disable PCI intx assertion (turn on bit10 of devctl) */
  742. cmd |= PCI_COMMAND_INTX_DISABLE;
  743. machine_irq = 0;
  744. s->machine_irq = 0;
  745. } else {
  746. machine_irq = pirq;
  747. s->machine_irq = pirq;
  748. xen_pt_mapped_machine_irq[machine_irq]++;
  749. }
  750. /* bind machine_irq to device */
  751. if (machine_irq != 0) {
  752. uint8_t e_intx = xen_pt_pci_intx(s);
  753. rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq,
  754. pci_dev_bus_num(d),
  755. PCI_SLOT(d->devfn),
  756. e_intx);
  757. if (rc < 0) {
  758. XEN_PT_ERR(d, "Binding of interrupt %i failed! (err: %d)\n",
  759. e_intx, errno);
  760. /* Disable PCI intx assertion (turn on bit10 of devctl) */
  761. cmd |= PCI_COMMAND_INTX_DISABLE;
  762. xen_pt_mapped_machine_irq[machine_irq]--;
  763. if (xen_pt_mapped_machine_irq[machine_irq] == 0) {
  764. if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) {
  765. XEN_PT_ERR(d, "Unmapping of machine interrupt %i failed!"
  766. " (err: %d)\n", machine_irq, errno);
  767. }
  768. }
  769. s->machine_irq = 0;
  770. }
  771. }
  772. out:
  773. if (cmd) {
  774. uint16_t val;
  775. rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val);
  776. if (rc) {
  777. error_setg_errno(errp, errno, "Failed to read PCI_COMMAND");
  778. goto err_out;
  779. } else {
  780. val |= cmd;
  781. rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val);
  782. if (rc) {
  783. error_setg_errno(errp, errno, "Failed to write PCI_COMMAND"
  784. " val = 0x%x", val);
  785. goto err_out;
  786. }
  787. }
  788. }
  789. memory_listener_register(&s->memory_listener, &address_space_memory);
  790. memory_listener_register(&s->io_listener, &address_space_io);
  791. s->listener_set = true;
  792. XEN_PT_LOG(d,
  793. "Real physical device %02x:%02x.%d registered successfully\n",
  794. s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function);
  795. return;
  796. err_out:
  797. for (i = 0; i < PCI_ROM_SLOT; i++) {
  798. object_unparent(OBJECT(&s->bar[i]));
  799. }
  800. object_unparent(OBJECT(&s->rom));
  801. xen_pt_destroy(d);
  802. assert(rc);
  803. }
  804. static void xen_pt_unregister_device(PCIDevice *d)
  805. {
  806. xen_pt_destroy(d);
  807. }
  808. static Property xen_pci_passthrough_properties[] = {
  809. DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr),
  810. DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false),
  811. DEFINE_PROP_END_OF_LIST(),
  812. };
  813. static void xen_pci_passthrough_instance_init(Object *obj)
  814. {
  815. /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
  816. * line, therefore, no need to wait to realize like other devices */
  817. PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
  818. }
  819. void xen_igd_reserve_slot(PCIBus *pci_bus)
  820. {
  821. if (!xen_igd_gfx_pt_enabled()) {
  822. return;
  823. }
  824. XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n");
  825. pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK;
  826. }
  827. static void xen_igd_clear_slot(DeviceState *qdev, Error **errp)
  828. {
  829. ERRP_GUARD();
  830. PCIDevice *pci_dev = (PCIDevice *)qdev;
  831. XenPCIPassthroughState *s = XEN_PT_DEVICE(pci_dev);
  832. XenPTDeviceClass *xpdc = XEN_PT_DEVICE_GET_CLASS(s);
  833. PCIBus *pci_bus = pci_get_bus(pci_dev);
  834. xen_host_pci_device_get(&s->real_device,
  835. s->hostaddr.domain, s->hostaddr.bus,
  836. s->hostaddr.slot, s->hostaddr.function,
  837. errp);
  838. if (*errp) {
  839. error_append_hint(errp, "Failed to \"open\" the real pci device");
  840. return;
  841. }
  842. if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) {
  843. xpdc->pci_qdev_realize(qdev, errp);
  844. return;
  845. }
  846. if (is_igd_vga_passthrough(&s->real_device) &&
  847. s->real_device.domain == XEN_PCI_IGD_DOMAIN &&
  848. s->real_device.bus == XEN_PCI_IGD_BUS &&
  849. s->real_device.dev == XEN_PCI_IGD_DEV &&
  850. s->real_device.func == XEN_PCI_IGD_FN &&
  851. s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) {
  852. pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK;
  853. XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n");
  854. }
  855. xpdc->pci_qdev_realize(qdev, errp);
  856. }
  857. static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data)
  858. {
  859. DeviceClass *dc = DEVICE_CLASS(klass);
  860. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  861. XenPTDeviceClass *xpdc = XEN_PT_DEVICE_CLASS(klass);
  862. xpdc->pci_qdev_realize = dc->realize;
  863. dc->realize = xen_igd_clear_slot;
  864. k->realize = xen_pt_realize;
  865. k->exit = xen_pt_unregister_device;
  866. k->config_read = xen_pt_pci_read_config;
  867. k->config_write = xen_pt_pci_write_config;
  868. set_bit(DEVICE_CATEGORY_MISC, dc->categories);
  869. dc->desc = "Assign an host PCI device with Xen";
  870. device_class_set_props(dc, xen_pci_passthrough_properties);
  871. };
  872. static void xen_pci_passthrough_finalize(Object *obj)
  873. {
  874. XenPCIPassthroughState *s = XEN_PT_DEVICE(obj);
  875. xen_pt_msix_delete(s);
  876. }
  877. static const TypeInfo xen_pci_passthrough_info = {
  878. .name = TYPE_XEN_PT_DEVICE,
  879. .parent = TYPE_PCI_DEVICE,
  880. .instance_size = sizeof(XenPCIPassthroughState),
  881. .instance_finalize = xen_pci_passthrough_finalize,
  882. .class_init = xen_pci_passthrough_class_init,
  883. .class_size = sizeof(XenPTDeviceClass),
  884. .instance_init = xen_pci_passthrough_instance_init,
  885. .interfaces = (InterfaceInfo[]) {
  886. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  887. { INTERFACE_PCIE_DEVICE },
  888. { },
  889. },
  890. };
  891. static void xen_pci_passthrough_register_types(void)
  892. {
  893. type_register_static(&xen_pci_passthrough_info);
  894. }
  895. type_init(xen_pci_passthrough_register_types)