pci-quirks.c 80 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319
  1. /*
  2. * device quirks for PCI devices
  3. *
  4. * Copyright Red Hat, Inc. 2012-2015
  5. *
  6. * Authors:
  7. * Alex Williamson <alex.williamson@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "exec/memop.h"
  14. #include "qemu/units.h"
  15. #include "qemu/error-report.h"
  16. #include "qemu/main-loop.h"
  17. #include "qemu/module.h"
  18. #include "qemu/range.h"
  19. #include "qapi/error.h"
  20. #include "qapi/visitor.h"
  21. #include <sys/ioctl.h>
  22. #include "hw/hw.h"
  23. #include "hw/nvram/fw_cfg.h"
  24. #include "hw/qdev-properties.h"
  25. #include "pci.h"
  26. #include "trace.h"
  27. /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
  28. static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
  29. {
  30. return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
  31. (device == PCI_ANY_ID || device == vdev->device_id);
  32. }
  33. static bool vfio_is_vga(VFIOPCIDevice *vdev)
  34. {
  35. PCIDevice *pdev = &vdev->pdev;
  36. uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
  37. return class == PCI_CLASS_DISPLAY_VGA;
  38. }
  39. /*
  40. * List of device ids/vendor ids for which to disable
  41. * option rom loading. This avoids the guest hangs during rom
  42. * execution as noticed with the BCM 57810 card for lack of a
  43. * more better way to handle such issues.
  44. * The user can still override by specifying a romfile or
  45. * rombar=1.
  46. * Please see https://bugs.launchpad.net/qemu/+bug/1284874
  47. * for an analysis of the 57810 card hang. When adding
  48. * a new vendor id/device id combination below, please also add
  49. * your card/environment details and information that could
  50. * help in debugging to the bug tracking this issue
  51. */
  52. static const struct {
  53. uint32_t vendor;
  54. uint32_t device;
  55. } romblacklist[] = {
  56. { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
  57. };
  58. bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
  59. {
  60. int i;
  61. for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
  62. if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
  63. trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
  64. romblacklist[i].vendor,
  65. romblacklist[i].device);
  66. return true;
  67. }
  68. }
  69. return false;
  70. }
  71. /*
  72. * Device specific region quirks (mostly backdoors to PCI config space)
  73. */
  74. /*
  75. * The generic window quirks operate on an address and data register,
  76. * vfio_generic_window_address_quirk handles the address register and
  77. * vfio_generic_window_data_quirk handles the data register. These ops
  78. * pass reads and writes through to hardware until a value matching the
  79. * stored address match/mask is written. When this occurs, the data
  80. * register access emulated PCI config space for the device rather than
  81. * passing through accesses. This enables devices where PCI config space
  82. * is accessible behind a window register to maintain the virtualization
  83. * provided through vfio.
  84. */
  85. typedef struct VFIOConfigWindowMatch {
  86. uint32_t match;
  87. uint32_t mask;
  88. } VFIOConfigWindowMatch;
  89. typedef struct VFIOConfigWindowQuirk {
  90. struct VFIOPCIDevice *vdev;
  91. uint32_t address_val;
  92. uint32_t address_offset;
  93. uint32_t data_offset;
  94. bool window_enabled;
  95. uint8_t bar;
  96. MemoryRegion *addr_mem;
  97. MemoryRegion *data_mem;
  98. uint32_t nr_matches;
  99. VFIOConfigWindowMatch matches[];
  100. } VFIOConfigWindowQuirk;
  101. static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
  102. hwaddr addr,
  103. unsigned size)
  104. {
  105. VFIOConfigWindowQuirk *window = opaque;
  106. VFIOPCIDevice *vdev = window->vdev;
  107. return vfio_region_read(&vdev->bars[window->bar].region,
  108. addr + window->address_offset, size);
  109. }
  110. static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
  111. uint64_t data,
  112. unsigned size)
  113. {
  114. VFIOConfigWindowQuirk *window = opaque;
  115. VFIOPCIDevice *vdev = window->vdev;
  116. int i;
  117. window->window_enabled = false;
  118. vfio_region_write(&vdev->bars[window->bar].region,
  119. addr + window->address_offset, data, size);
  120. for (i = 0; i < window->nr_matches; i++) {
  121. if ((data & ~window->matches[i].mask) == window->matches[i].match) {
  122. window->window_enabled = true;
  123. window->address_val = data & window->matches[i].mask;
  124. trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
  125. memory_region_name(window->addr_mem), data);
  126. break;
  127. }
  128. }
  129. }
  130. static const MemoryRegionOps vfio_generic_window_address_quirk = {
  131. .read = vfio_generic_window_quirk_address_read,
  132. .write = vfio_generic_window_quirk_address_write,
  133. .endianness = DEVICE_LITTLE_ENDIAN,
  134. };
  135. static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
  136. hwaddr addr, unsigned size)
  137. {
  138. VFIOConfigWindowQuirk *window = opaque;
  139. VFIOPCIDevice *vdev = window->vdev;
  140. uint64_t data;
  141. /* Always read data reg, discard if window enabled */
  142. data = vfio_region_read(&vdev->bars[window->bar].region,
  143. addr + window->data_offset, size);
  144. if (window->window_enabled) {
  145. data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
  146. trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
  147. memory_region_name(window->data_mem), data);
  148. }
  149. return data;
  150. }
  151. static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
  152. uint64_t data, unsigned size)
  153. {
  154. VFIOConfigWindowQuirk *window = opaque;
  155. VFIOPCIDevice *vdev = window->vdev;
  156. if (window->window_enabled) {
  157. vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
  158. trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
  159. memory_region_name(window->data_mem), data);
  160. return;
  161. }
  162. vfio_region_write(&vdev->bars[window->bar].region,
  163. addr + window->data_offset, data, size);
  164. }
  165. static const MemoryRegionOps vfio_generic_window_data_quirk = {
  166. .read = vfio_generic_window_quirk_data_read,
  167. .write = vfio_generic_window_quirk_data_write,
  168. .endianness = DEVICE_LITTLE_ENDIAN,
  169. };
  170. /*
  171. * The generic mirror quirk handles devices which expose PCI config space
  172. * through a region within a BAR. When enabled, reads and writes are
  173. * redirected through to emulated PCI config space. XXX if PCI config space
  174. * used memory regions, this could just be an alias.
  175. */
  176. typedef struct VFIOConfigMirrorQuirk {
  177. struct VFIOPCIDevice *vdev;
  178. uint32_t offset;
  179. uint8_t bar;
  180. MemoryRegion *mem;
  181. uint8_t data[];
  182. } VFIOConfigMirrorQuirk;
  183. static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
  184. hwaddr addr, unsigned size)
  185. {
  186. VFIOConfigMirrorQuirk *mirror = opaque;
  187. VFIOPCIDevice *vdev = mirror->vdev;
  188. uint64_t data;
  189. /* Read and discard in case the hardware cares */
  190. (void)vfio_region_read(&vdev->bars[mirror->bar].region,
  191. addr + mirror->offset, size);
  192. data = vfio_pci_read_config(&vdev->pdev, addr, size);
  193. trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
  194. memory_region_name(mirror->mem),
  195. addr, data);
  196. return data;
  197. }
  198. static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
  199. uint64_t data, unsigned size)
  200. {
  201. VFIOConfigMirrorQuirk *mirror = opaque;
  202. VFIOPCIDevice *vdev = mirror->vdev;
  203. vfio_pci_write_config(&vdev->pdev, addr, data, size);
  204. trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
  205. memory_region_name(mirror->mem),
  206. addr, data);
  207. }
  208. static const MemoryRegionOps vfio_generic_mirror_quirk = {
  209. .read = vfio_generic_quirk_mirror_read,
  210. .write = vfio_generic_quirk_mirror_write,
  211. .endianness = DEVICE_LITTLE_ENDIAN,
  212. };
  213. /* Is range1 fully contained within range2? */
  214. static bool vfio_range_contained(uint64_t first1, uint64_t len1,
  215. uint64_t first2, uint64_t len2) {
  216. return (first1 >= first2 && first1 + len1 <= first2 + len2);
  217. }
  218. #define PCI_VENDOR_ID_ATI 0x1002
  219. /*
  220. * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
  221. * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
  222. * BAR4 (older cards like the X550 used BAR1, but we don't care to support
  223. * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
  224. * I/O port BAR address. Originally this was coded to return the virtual BAR
  225. * address only if the physical register read returns the actual BAR address,
  226. * but users have reported greater success if we return the virtual address
  227. * unconditionally.
  228. */
  229. static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
  230. hwaddr addr, unsigned size)
  231. {
  232. VFIOPCIDevice *vdev = opaque;
  233. uint64_t data = vfio_pci_read_config(&vdev->pdev,
  234. PCI_BASE_ADDRESS_4 + 1, size);
  235. trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
  236. return data;
  237. }
  238. static const MemoryRegionOps vfio_ati_3c3_quirk = {
  239. .read = vfio_ati_3c3_quirk_read,
  240. .endianness = DEVICE_LITTLE_ENDIAN,
  241. };
  242. static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
  243. {
  244. VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
  245. QLIST_INIT(&quirk->ioeventfds);
  246. quirk->mem = g_new0(MemoryRegion, nr_mem);
  247. quirk->nr_mem = nr_mem;
  248. return quirk;
  249. }
  250. static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
  251. {
  252. QLIST_REMOVE(ioeventfd, next);
  253. memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
  254. true, ioeventfd->data, &ioeventfd->e);
  255. if (ioeventfd->vfio) {
  256. struct vfio_device_ioeventfd vfio_ioeventfd;
  257. vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
  258. vfio_ioeventfd.flags = ioeventfd->size;
  259. vfio_ioeventfd.data = ioeventfd->data;
  260. vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
  261. ioeventfd->region_addr;
  262. vfio_ioeventfd.fd = -1;
  263. if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd)) {
  264. error_report("Failed to remove vfio ioeventfd for %s+0x%"
  265. HWADDR_PRIx"[%d]:0x%"PRIx64" (%m)",
  266. memory_region_name(ioeventfd->mr), ioeventfd->addr,
  267. ioeventfd->size, ioeventfd->data);
  268. }
  269. } else {
  270. qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
  271. NULL, NULL, NULL);
  272. }
  273. event_notifier_cleanup(&ioeventfd->e);
  274. trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
  275. (uint64_t)ioeventfd->addr, ioeventfd->size,
  276. ioeventfd->data);
  277. g_free(ioeventfd);
  278. }
  279. static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
  280. {
  281. VFIOIOEventFD *ioeventfd, *tmp;
  282. QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
  283. if (ioeventfd->dynamic) {
  284. vfio_ioeventfd_exit(vdev, ioeventfd);
  285. }
  286. }
  287. }
  288. static void vfio_ioeventfd_handler(void *opaque)
  289. {
  290. VFIOIOEventFD *ioeventfd = opaque;
  291. if (event_notifier_test_and_clear(&ioeventfd->e)) {
  292. vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
  293. ioeventfd->data, ioeventfd->size);
  294. trace_vfio_ioeventfd_handler(memory_region_name(ioeventfd->mr),
  295. (uint64_t)ioeventfd->addr, ioeventfd->size,
  296. ioeventfd->data);
  297. }
  298. }
  299. static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
  300. MemoryRegion *mr, hwaddr addr,
  301. unsigned size, uint64_t data,
  302. VFIORegion *region,
  303. hwaddr region_addr, bool dynamic)
  304. {
  305. VFIOIOEventFD *ioeventfd;
  306. if (vdev->no_kvm_ioeventfd) {
  307. return NULL;
  308. }
  309. ioeventfd = g_malloc0(sizeof(*ioeventfd));
  310. if (event_notifier_init(&ioeventfd->e, 0)) {
  311. g_free(ioeventfd);
  312. return NULL;
  313. }
  314. /*
  315. * MemoryRegion and relative offset, plus additional ioeventfd setup
  316. * parameters for configuring and later tearing down KVM ioeventfd.
  317. */
  318. ioeventfd->mr = mr;
  319. ioeventfd->addr = addr;
  320. ioeventfd->size = size;
  321. ioeventfd->data = data;
  322. ioeventfd->dynamic = dynamic;
  323. /*
  324. * VFIORegion and relative offset for implementing the userspace
  325. * handler. data & size fields shared for both uses.
  326. */
  327. ioeventfd->region = region;
  328. ioeventfd->region_addr = region_addr;
  329. if (!vdev->no_vfio_ioeventfd) {
  330. struct vfio_device_ioeventfd vfio_ioeventfd;
  331. vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
  332. vfio_ioeventfd.flags = ioeventfd->size;
  333. vfio_ioeventfd.data = ioeventfd->data;
  334. vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
  335. ioeventfd->region_addr;
  336. vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
  337. ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
  338. VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
  339. }
  340. if (!ioeventfd->vfio) {
  341. qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
  342. vfio_ioeventfd_handler, NULL, ioeventfd);
  343. }
  344. memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
  345. true, ioeventfd->data, &ioeventfd->e);
  346. trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
  347. size, data, ioeventfd->vfio);
  348. return ioeventfd;
  349. }
  350. static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
  351. {
  352. VFIOQuirk *quirk;
  353. /*
  354. * As long as the BAR is >= 256 bytes it will be aligned such that the
  355. * lower byte is always zero. Filter out anything else, if it exists.
  356. */
  357. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
  358. !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
  359. return;
  360. }
  361. quirk = vfio_quirk_alloc(1);
  362. memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
  363. "vfio-ati-3c3-quirk", 1);
  364. memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
  365. 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
  366. QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
  367. quirk, next);
  368. trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
  369. }
  370. /*
  371. * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
  372. * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
  373. * the MMIO space directly, but a window to this space is provided through
  374. * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
  375. * data register. When the address is programmed to a range of 0x4000-0x4fff
  376. * PCI configuration space is available. Experimentation seems to indicate
  377. * that read-only may be provided by hardware.
  378. */
  379. static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
  380. {
  381. VFIOQuirk *quirk;
  382. VFIOConfigWindowQuirk *window;
  383. /* This windows doesn't seem to be used except by legacy VGA code */
  384. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
  385. !vdev->vga || nr != 4) {
  386. return;
  387. }
  388. quirk = vfio_quirk_alloc(2);
  389. window = quirk->data = g_malloc0(sizeof(*window) +
  390. sizeof(VFIOConfigWindowMatch));
  391. window->vdev = vdev;
  392. window->address_offset = 0;
  393. window->data_offset = 4;
  394. window->nr_matches = 1;
  395. window->matches[0].match = 0x4000;
  396. window->matches[0].mask = vdev->config_size - 1;
  397. window->bar = nr;
  398. window->addr_mem = &quirk->mem[0];
  399. window->data_mem = &quirk->mem[1];
  400. memory_region_init_io(window->addr_mem, OBJECT(vdev),
  401. &vfio_generic_window_address_quirk, window,
  402. "vfio-ati-bar4-window-address-quirk", 4);
  403. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  404. window->address_offset,
  405. window->addr_mem, 1);
  406. memory_region_init_io(window->data_mem, OBJECT(vdev),
  407. &vfio_generic_window_data_quirk, window,
  408. "vfio-ati-bar4-window-data-quirk", 4);
  409. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  410. window->data_offset,
  411. window->data_mem, 1);
  412. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  413. trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
  414. }
  415. /*
  416. * Trap the BAR2 MMIO mirror to config space as well.
  417. */
  418. static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
  419. {
  420. VFIOQuirk *quirk;
  421. VFIOConfigMirrorQuirk *mirror;
  422. /* Only enable on newer devices where BAR2 is 64bit */
  423. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
  424. !vdev->vga || nr != 2 || !vdev->bars[2].mem64) {
  425. return;
  426. }
  427. quirk = vfio_quirk_alloc(1);
  428. mirror = quirk->data = g_malloc0(sizeof(*mirror));
  429. mirror->mem = quirk->mem;
  430. mirror->vdev = vdev;
  431. mirror->offset = 0x4000;
  432. mirror->bar = nr;
  433. memory_region_init_io(mirror->mem, OBJECT(vdev),
  434. &vfio_generic_mirror_quirk, mirror,
  435. "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
  436. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  437. mirror->offset, mirror->mem, 1);
  438. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  439. trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
  440. }
  441. /*
  442. * Older ATI/AMD cards like the X550 have a similar window to that above.
  443. * I/O port BAR1 provides a window to a mirror of PCI config space located
  444. * in BAR2 at offset 0xf00. We don't care to support such older cards, but
  445. * note it for future reference.
  446. */
  447. /*
  448. * Nvidia has several different methods to get to config space, the
  449. * nouveu project has several of these documented here:
  450. * https://github.com/pathscale/envytools/tree/master/hwdocs
  451. *
  452. * The first quirk is actually not documented in envytools and is found
  453. * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
  454. * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
  455. * the mirror of PCI config space found at BAR0 offset 0x1800. The access
  456. * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
  457. * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
  458. * is written for a write to 0x3d4. The BAR0 offset is then accessible
  459. * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
  460. * that use the I/O port BAR5 window but it doesn't hurt to leave it.
  461. */
  462. typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
  463. static const char *nv3d0_states[] = { "NONE", "SELECT",
  464. "WINDOW", "READ", "WRITE" };
  465. typedef struct VFIONvidia3d0Quirk {
  466. VFIOPCIDevice *vdev;
  467. VFIONvidia3d0State state;
  468. uint32_t offset;
  469. } VFIONvidia3d0Quirk;
  470. static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
  471. hwaddr addr, unsigned size)
  472. {
  473. VFIONvidia3d0Quirk *quirk = opaque;
  474. VFIOPCIDevice *vdev = quirk->vdev;
  475. quirk->state = NONE;
  476. return vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
  477. addr + 0x14, size);
  478. }
  479. static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
  480. uint64_t data, unsigned size)
  481. {
  482. VFIONvidia3d0Quirk *quirk = opaque;
  483. VFIOPCIDevice *vdev = quirk->vdev;
  484. VFIONvidia3d0State old_state = quirk->state;
  485. quirk->state = NONE;
  486. switch (data) {
  487. case 0x338:
  488. if (old_state == NONE) {
  489. quirk->state = SELECT;
  490. trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
  491. nv3d0_states[quirk->state]);
  492. }
  493. break;
  494. case 0x538:
  495. if (old_state == WINDOW) {
  496. quirk->state = READ;
  497. trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
  498. nv3d0_states[quirk->state]);
  499. }
  500. break;
  501. case 0x738:
  502. if (old_state == WINDOW) {
  503. quirk->state = WRITE;
  504. trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
  505. nv3d0_states[quirk->state]);
  506. }
  507. break;
  508. }
  509. vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
  510. addr + 0x14, data, size);
  511. }
  512. static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
  513. .read = vfio_nvidia_3d4_quirk_read,
  514. .write = vfio_nvidia_3d4_quirk_write,
  515. .endianness = DEVICE_LITTLE_ENDIAN,
  516. };
  517. static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
  518. hwaddr addr, unsigned size)
  519. {
  520. VFIONvidia3d0Quirk *quirk = opaque;
  521. VFIOPCIDevice *vdev = quirk->vdev;
  522. VFIONvidia3d0State old_state = quirk->state;
  523. uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
  524. addr + 0x10, size);
  525. quirk->state = NONE;
  526. if (old_state == READ &&
  527. (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
  528. uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
  529. data = vfio_pci_read_config(&vdev->pdev, offset, size);
  530. trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
  531. offset, size, data);
  532. }
  533. return data;
  534. }
  535. static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
  536. uint64_t data, unsigned size)
  537. {
  538. VFIONvidia3d0Quirk *quirk = opaque;
  539. VFIOPCIDevice *vdev = quirk->vdev;
  540. VFIONvidia3d0State old_state = quirk->state;
  541. quirk->state = NONE;
  542. if (old_state == SELECT) {
  543. quirk->offset = (uint32_t)data;
  544. quirk->state = WINDOW;
  545. trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
  546. nv3d0_states[quirk->state]);
  547. } else if (old_state == WRITE) {
  548. if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
  549. uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
  550. vfio_pci_write_config(&vdev->pdev, offset, data, size);
  551. trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
  552. offset, data, size);
  553. return;
  554. }
  555. }
  556. vfio_vga_write(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
  557. addr + 0x10, data, size);
  558. }
  559. static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
  560. .read = vfio_nvidia_3d0_quirk_read,
  561. .write = vfio_nvidia_3d0_quirk_write,
  562. .endianness = DEVICE_LITTLE_ENDIAN,
  563. };
  564. static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
  565. {
  566. VFIOQuirk *quirk;
  567. VFIONvidia3d0Quirk *data;
  568. if (vdev->no_geforce_quirks ||
  569. !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
  570. !vdev->bars[1].region.size) {
  571. return;
  572. }
  573. quirk = vfio_quirk_alloc(2);
  574. quirk->data = data = g_malloc0(sizeof(*data));
  575. data->vdev = vdev;
  576. memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
  577. data, "vfio-nvidia-3d4-quirk", 2);
  578. memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
  579. 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
  580. memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
  581. data, "vfio-nvidia-3d0-quirk", 2);
  582. memory_region_add_subregion(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem,
  583. 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
  584. QLIST_INSERT_HEAD(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks,
  585. quirk, next);
  586. trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
  587. }
  588. /*
  589. * The second quirk is documented in envytools. The I/O port BAR5 is just
  590. * a set of address/data ports to the MMIO BARs. The BAR we care about is
  591. * again BAR0. This backdoor is apparently a bit newer than the one above
  592. * so we need to not only trap 256 bytes @0x1800, but all of PCI config
  593. * space, including extended space is available at the 4k @0x88000.
  594. */
  595. typedef struct VFIONvidiaBAR5Quirk {
  596. uint32_t master;
  597. uint32_t enable;
  598. MemoryRegion *addr_mem;
  599. MemoryRegion *data_mem;
  600. bool enabled;
  601. VFIOConfigWindowQuirk window; /* last for match data */
  602. } VFIONvidiaBAR5Quirk;
  603. static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
  604. {
  605. VFIOPCIDevice *vdev = bar5->window.vdev;
  606. if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
  607. return;
  608. }
  609. bar5->enabled = !bar5->enabled;
  610. trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
  611. bar5->enabled ? "Enable" : "Disable");
  612. memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
  613. memory_region_set_enabled(bar5->data_mem, bar5->enabled);
  614. }
  615. static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
  616. hwaddr addr, unsigned size)
  617. {
  618. VFIONvidiaBAR5Quirk *bar5 = opaque;
  619. VFIOPCIDevice *vdev = bar5->window.vdev;
  620. return vfio_region_read(&vdev->bars[5].region, addr, size);
  621. }
  622. static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
  623. uint64_t data, unsigned size)
  624. {
  625. VFIONvidiaBAR5Quirk *bar5 = opaque;
  626. VFIOPCIDevice *vdev = bar5->window.vdev;
  627. vfio_region_write(&vdev->bars[5].region, addr, data, size);
  628. bar5->master = data;
  629. vfio_nvidia_bar5_enable(bar5);
  630. }
  631. static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
  632. .read = vfio_nvidia_bar5_quirk_master_read,
  633. .write = vfio_nvidia_bar5_quirk_master_write,
  634. .endianness = DEVICE_LITTLE_ENDIAN,
  635. };
  636. static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
  637. hwaddr addr, unsigned size)
  638. {
  639. VFIONvidiaBAR5Quirk *bar5 = opaque;
  640. VFIOPCIDevice *vdev = bar5->window.vdev;
  641. return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
  642. }
  643. static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
  644. uint64_t data, unsigned size)
  645. {
  646. VFIONvidiaBAR5Quirk *bar5 = opaque;
  647. VFIOPCIDevice *vdev = bar5->window.vdev;
  648. vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
  649. bar5->enable = data;
  650. vfio_nvidia_bar5_enable(bar5);
  651. }
  652. static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
  653. .read = vfio_nvidia_bar5_quirk_enable_read,
  654. .write = vfio_nvidia_bar5_quirk_enable_write,
  655. .endianness = DEVICE_LITTLE_ENDIAN,
  656. };
  657. static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
  658. {
  659. VFIOQuirk *quirk;
  660. VFIONvidiaBAR5Quirk *bar5;
  661. VFIOConfigWindowQuirk *window;
  662. if (vdev->no_geforce_quirks ||
  663. !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
  664. !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
  665. return;
  666. }
  667. quirk = vfio_quirk_alloc(4);
  668. bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
  669. (sizeof(VFIOConfigWindowMatch) * 2));
  670. window = &bar5->window;
  671. window->vdev = vdev;
  672. window->address_offset = 0x8;
  673. window->data_offset = 0xc;
  674. window->nr_matches = 2;
  675. window->matches[0].match = 0x1800;
  676. window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
  677. window->matches[1].match = 0x88000;
  678. window->matches[1].mask = vdev->config_size - 1;
  679. window->bar = nr;
  680. window->addr_mem = bar5->addr_mem = &quirk->mem[0];
  681. window->data_mem = bar5->data_mem = &quirk->mem[1];
  682. memory_region_init_io(window->addr_mem, OBJECT(vdev),
  683. &vfio_generic_window_address_quirk, window,
  684. "vfio-nvidia-bar5-window-address-quirk", 4);
  685. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  686. window->address_offset,
  687. window->addr_mem, 1);
  688. memory_region_set_enabled(window->addr_mem, false);
  689. memory_region_init_io(window->data_mem, OBJECT(vdev),
  690. &vfio_generic_window_data_quirk, window,
  691. "vfio-nvidia-bar5-window-data-quirk", 4);
  692. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  693. window->data_offset,
  694. window->data_mem, 1);
  695. memory_region_set_enabled(window->data_mem, false);
  696. memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
  697. &vfio_nvidia_bar5_quirk_master, bar5,
  698. "vfio-nvidia-bar5-master-quirk", 4);
  699. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  700. 0, &quirk->mem[2], 1);
  701. memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
  702. &vfio_nvidia_bar5_quirk_enable, bar5,
  703. "vfio-nvidia-bar5-enable-quirk", 4);
  704. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  705. 4, &quirk->mem[3], 1);
  706. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  707. trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
  708. }
  709. typedef struct LastDataSet {
  710. VFIOQuirk *quirk;
  711. hwaddr addr;
  712. uint64_t data;
  713. unsigned size;
  714. int hits;
  715. int added;
  716. } LastDataSet;
  717. #define MAX_DYN_IOEVENTFD 10
  718. #define HITS_FOR_IOEVENTFD 10
  719. /*
  720. * Finally, BAR0 itself. We want to redirect any accesses to either
  721. * 0x1800 or 0x88000 through the PCI config space access functions.
  722. */
  723. static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
  724. uint64_t data, unsigned size)
  725. {
  726. VFIOConfigMirrorQuirk *mirror = opaque;
  727. VFIOPCIDevice *vdev = mirror->vdev;
  728. PCIDevice *pdev = &vdev->pdev;
  729. LastDataSet *last = (LastDataSet *)&mirror->data;
  730. vfio_generic_quirk_mirror_write(opaque, addr, data, size);
  731. /*
  732. * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
  733. * MSI capability ID register. Both the ID and next register are
  734. * read-only, so we allow writes covering either of those to real hw.
  735. */
  736. if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
  737. vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
  738. vfio_region_write(&vdev->bars[mirror->bar].region,
  739. addr + mirror->offset, data, size);
  740. trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
  741. }
  742. /*
  743. * Automatically add an ioeventfd to handle any repeated write with the
  744. * same data and size above the standard PCI config space header. This is
  745. * primarily expected to accelerate the MSI-ACK behavior, such as noted
  746. * above. Current hardware/drivers should trigger an ioeventfd at config
  747. * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
  748. *
  749. * The criteria of 10 successive hits is arbitrary but reliably adds the
  750. * MSI-ACK region. Note that as some writes are bypassed via the ioeventfd,
  751. * the remaining ones have a greater chance of being seen successively.
  752. * To avoid the pathological case of burning up all of QEMU's open file
  753. * handles, arbitrarily limit this algorithm from adding no more than 10
  754. * ioeventfds, print an error if we would have added an 11th, and then
  755. * stop counting.
  756. */
  757. if (!vdev->no_kvm_ioeventfd &&
  758. addr >= PCI_STD_HEADER_SIZEOF && last->added <= MAX_DYN_IOEVENTFD) {
  759. if (addr != last->addr || data != last->data || size != last->size) {
  760. last->addr = addr;
  761. last->data = data;
  762. last->size = size;
  763. last->hits = 1;
  764. } else if (++last->hits >= HITS_FOR_IOEVENTFD) {
  765. if (last->added < MAX_DYN_IOEVENTFD) {
  766. VFIOIOEventFD *ioeventfd;
  767. ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size,
  768. data, &vdev->bars[mirror->bar].region,
  769. mirror->offset + addr, true);
  770. if (ioeventfd) {
  771. VFIOQuirk *quirk = last->quirk;
  772. QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
  773. last->added++;
  774. }
  775. } else {
  776. last->added++;
  777. warn_report("NVIDIA ioeventfd queue full for %s, unable to "
  778. "accelerate 0x%"HWADDR_PRIx", data 0x%"PRIx64", "
  779. "size %u", vdev->vbasedev.name, addr, data, size);
  780. }
  781. }
  782. }
  783. }
  784. static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
  785. .read = vfio_generic_quirk_mirror_read,
  786. .write = vfio_nvidia_quirk_mirror_write,
  787. .endianness = DEVICE_LITTLE_ENDIAN,
  788. };
  789. static void vfio_nvidia_bar0_quirk_reset(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
  790. {
  791. VFIOConfigMirrorQuirk *mirror = quirk->data;
  792. LastDataSet *last = (LastDataSet *)&mirror->data;
  793. last->addr = last->data = last->size = last->hits = last->added = 0;
  794. vfio_drop_dynamic_eventfds(vdev, quirk);
  795. }
  796. static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
  797. {
  798. VFIOQuirk *quirk;
  799. VFIOConfigMirrorQuirk *mirror;
  800. LastDataSet *last;
  801. if (vdev->no_geforce_quirks ||
  802. !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
  803. !vfio_is_vga(vdev) || nr != 0) {
  804. return;
  805. }
  806. quirk = vfio_quirk_alloc(1);
  807. quirk->reset = vfio_nvidia_bar0_quirk_reset;
  808. mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
  809. mirror->mem = quirk->mem;
  810. mirror->vdev = vdev;
  811. mirror->offset = 0x88000;
  812. mirror->bar = nr;
  813. last = (LastDataSet *)&mirror->data;
  814. last->quirk = quirk;
  815. memory_region_init_io(mirror->mem, OBJECT(vdev),
  816. &vfio_nvidia_mirror_quirk, mirror,
  817. "vfio-nvidia-bar0-88000-mirror-quirk",
  818. vdev->config_size);
  819. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  820. mirror->offset, mirror->mem, 1);
  821. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  822. /* The 0x1800 offset mirror only seems to get used by legacy VGA */
  823. if (vdev->vga) {
  824. quirk = vfio_quirk_alloc(1);
  825. quirk->reset = vfio_nvidia_bar0_quirk_reset;
  826. mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
  827. mirror->mem = quirk->mem;
  828. mirror->vdev = vdev;
  829. mirror->offset = 0x1800;
  830. mirror->bar = nr;
  831. last = (LastDataSet *)&mirror->data;
  832. last->quirk = quirk;
  833. memory_region_init_io(mirror->mem, OBJECT(vdev),
  834. &vfio_nvidia_mirror_quirk, mirror,
  835. "vfio-nvidia-bar0-1800-mirror-quirk",
  836. PCI_CONFIG_SPACE_SIZE);
  837. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  838. mirror->offset, mirror->mem, 1);
  839. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  840. }
  841. trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
  842. }
  843. /*
  844. * TODO - Some Nvidia devices provide config access to their companion HDA
  845. * device and even to their parent bridge via these config space mirrors.
  846. * Add quirks for those regions.
  847. */
  848. #define PCI_VENDOR_ID_REALTEK 0x10ec
  849. /*
  850. * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
  851. * offset 0x70 there is a dword data register, offset 0x74 is a dword address
  852. * register. According to the Linux r8169 driver, the MSI-X table is addressed
  853. * when the "type" portion of the address register is set to 0x1. This appears
  854. * to be bits 16:30. Bit 31 is both a write indicator and some sort of
  855. * "address latched" indicator. Bits 12:15 are a mask field, which we can
  856. * ignore because the MSI-X table should always be accessed as a dword (full
  857. * mask). Bits 0:11 is offset within the type.
  858. *
  859. * Example trace:
  860. *
  861. * Read from MSI-X table offset 0
  862. * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
  863. * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
  864. * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
  865. *
  866. * Write 0xfee00000 to MSI-X table offset 0
  867. * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
  868. * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
  869. * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
  870. */
  871. typedef struct VFIOrtl8168Quirk {
  872. VFIOPCIDevice *vdev;
  873. uint32_t addr;
  874. uint32_t data;
  875. bool enabled;
  876. } VFIOrtl8168Quirk;
  877. static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
  878. hwaddr addr, unsigned size)
  879. {
  880. VFIOrtl8168Quirk *rtl = opaque;
  881. VFIOPCIDevice *vdev = rtl->vdev;
  882. uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
  883. if (rtl->enabled) {
  884. data = rtl->addr ^ 0x80000000U; /* latch/complete */
  885. trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
  886. }
  887. return data;
  888. }
  889. static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
  890. uint64_t data, unsigned size)
  891. {
  892. VFIOrtl8168Quirk *rtl = opaque;
  893. VFIOPCIDevice *vdev = rtl->vdev;
  894. rtl->enabled = false;
  895. if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
  896. rtl->enabled = true;
  897. rtl->addr = (uint32_t)data;
  898. if (data & 0x80000000U) { /* Do write */
  899. if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
  900. hwaddr offset = data & 0xfff;
  901. uint64_t val = rtl->data;
  902. trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
  903. (uint16_t)offset, val);
  904. /* Write to the proper guest MSI-X table instead */
  905. memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
  906. offset, val,
  907. size_memop(size) | MO_LE,
  908. MEMTXATTRS_UNSPECIFIED);
  909. }
  910. return; /* Do not write guest MSI-X data to hardware */
  911. }
  912. }
  913. vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
  914. }
  915. static const MemoryRegionOps vfio_rtl_address_quirk = {
  916. .read = vfio_rtl8168_quirk_address_read,
  917. .write = vfio_rtl8168_quirk_address_write,
  918. .valid = {
  919. .min_access_size = 4,
  920. .max_access_size = 4,
  921. .unaligned = false,
  922. },
  923. .endianness = DEVICE_LITTLE_ENDIAN,
  924. };
  925. static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
  926. hwaddr addr, unsigned size)
  927. {
  928. VFIOrtl8168Quirk *rtl = opaque;
  929. VFIOPCIDevice *vdev = rtl->vdev;
  930. uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
  931. if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
  932. hwaddr offset = rtl->addr & 0xfff;
  933. memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
  934. &data, size_memop(size) | MO_LE,
  935. MEMTXATTRS_UNSPECIFIED);
  936. trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
  937. }
  938. return data;
  939. }
  940. static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
  941. uint64_t data, unsigned size)
  942. {
  943. VFIOrtl8168Quirk *rtl = opaque;
  944. VFIOPCIDevice *vdev = rtl->vdev;
  945. rtl->data = (uint32_t)data;
  946. vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
  947. }
  948. static const MemoryRegionOps vfio_rtl_data_quirk = {
  949. .read = vfio_rtl8168_quirk_data_read,
  950. .write = vfio_rtl8168_quirk_data_write,
  951. .valid = {
  952. .min_access_size = 4,
  953. .max_access_size = 4,
  954. .unaligned = false,
  955. },
  956. .endianness = DEVICE_LITTLE_ENDIAN,
  957. };
  958. static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
  959. {
  960. VFIOQuirk *quirk;
  961. VFIOrtl8168Quirk *rtl;
  962. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
  963. return;
  964. }
  965. quirk = vfio_quirk_alloc(2);
  966. quirk->data = rtl = g_malloc0(sizeof(*rtl));
  967. rtl->vdev = vdev;
  968. memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
  969. &vfio_rtl_address_quirk, rtl,
  970. "vfio-rtl8168-window-address-quirk", 4);
  971. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  972. 0x74, &quirk->mem[0], 1);
  973. memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
  974. &vfio_rtl_data_quirk, rtl,
  975. "vfio-rtl8168-window-data-quirk", 4);
  976. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  977. 0x70, &quirk->mem[1], 1);
  978. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  979. trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
  980. }
  981. /*
  982. * Intel IGD support
  983. *
  984. * Obviously IGD is not a discrete device, this is evidenced not only by it
  985. * being integrated into the CPU, but by the various chipset and BIOS
  986. * dependencies that it brings along with it. Intel is trying to move away
  987. * from this and Broadwell and newer devices can run in what Intel calls
  988. * "Universal Pass-Through" mode, or UPT. Theoretically in UPT mode, nothing
  989. * more is required beyond assigning the IGD device to a VM. There are
  990. * however support limitations to this mode. It only supports IGD as a
  991. * secondary graphics device in the VM and it doesn't officially support any
  992. * physical outputs.
  993. *
  994. * The code here attempts to enable what we'll call legacy mode assignment,
  995. * IGD retains most of the capabilities we expect for it to have on bare
  996. * metal. To enable this mode, the IGD device must be assigned to the VM
  997. * at PCI address 00:02.0, it must have a ROM, it very likely needs VGA
  998. * support, we must have VM BIOS support for reserving and populating some
  999. * of the required tables, and we need to tweak the chipset with revisions
  1000. * and IDs and an LPC/ISA bridge device. The intention is to make all of
  1001. * this happen automatically by installing the device at the correct VM PCI
  1002. * bus address. If any of the conditions are not met, we cross our fingers
  1003. * and hope the user knows better.
  1004. *
  1005. * NB - It is possible to enable physical outputs in UPT mode by supplying
  1006. * an OpRegion table. We don't do this by default because the guest driver
  1007. * behaves differently if an OpRegion is provided and no monitor is attached
  1008. * vs no OpRegion and a monitor being attached or not. Effectively, if a
  1009. * headless setup is desired, the OpRegion gets in the way of that.
  1010. */
  1011. /*
  1012. * This presumes the device is already known to be an Intel VGA device, so we
  1013. * take liberties in which device ID bits match which generation. This should
  1014. * not be taken as an indication that all the devices are supported, or even
  1015. * supportable, some of them don't even support VT-d.
  1016. * See linux:include/drm/i915_pciids.h for IDs.
  1017. */
  1018. static int igd_gen(VFIOPCIDevice *vdev)
  1019. {
  1020. if ((vdev->device_id & 0xfff) == 0xa84) {
  1021. return 8; /* Broxton */
  1022. }
  1023. switch (vdev->device_id & 0xff00) {
  1024. /* Old, untested, unavailable, unknown */
  1025. case 0x0000:
  1026. case 0x2500:
  1027. case 0x2700:
  1028. case 0x2900:
  1029. case 0x2a00:
  1030. case 0x2e00:
  1031. case 0x3500:
  1032. case 0xa000:
  1033. return -1;
  1034. /* SandyBridge, IvyBridge, ValleyView, Haswell */
  1035. case 0x0100:
  1036. case 0x0400:
  1037. case 0x0a00:
  1038. case 0x0c00:
  1039. case 0x0d00:
  1040. case 0x0f00:
  1041. return 6;
  1042. /* BroadWell, CherryView, SkyLake, KabyLake */
  1043. case 0x1600:
  1044. case 0x1900:
  1045. case 0x2200:
  1046. case 0x5900:
  1047. return 8;
  1048. }
  1049. return 8; /* Assume newer is compatible */
  1050. }
  1051. typedef struct VFIOIGDQuirk {
  1052. struct VFIOPCIDevice *vdev;
  1053. uint32_t index;
  1054. uint32_t bdsm;
  1055. } VFIOIGDQuirk;
  1056. #define IGD_GMCH 0x50 /* Graphics Control Register */
  1057. #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
  1058. #define IGD_ASLS 0xfc /* ASL Storage Register */
  1059. /*
  1060. * The OpRegion includes the Video BIOS Table, which seems important for
  1061. * telling the driver what sort of outputs it has. Without this, the device
  1062. * may work in the guest, but we may not get output. This also requires BIOS
  1063. * support to reserve and populate a section of guest memory sufficient for
  1064. * the table and to write the base address of that memory to the ASLS register
  1065. * of the IGD device.
  1066. */
  1067. int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
  1068. struct vfio_region_info *info, Error **errp)
  1069. {
  1070. int ret;
  1071. vdev->igd_opregion = g_malloc0(info->size);
  1072. ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
  1073. info->size, info->offset);
  1074. if (ret != info->size) {
  1075. error_setg(errp, "failed to read IGD OpRegion");
  1076. g_free(vdev->igd_opregion);
  1077. vdev->igd_opregion = NULL;
  1078. return -EINVAL;
  1079. }
  1080. /*
  1081. * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to
  1082. * allocate 32bit reserved memory for, copy these contents into, and write
  1083. * the reserved memory base address to the device ASLS register at 0xFC.
  1084. * Alignment of this reserved region seems flexible, but using a 4k page
  1085. * alignment seems to work well. This interface assumes a single IGD
  1086. * device, which may be at VM address 00:02.0 in legacy mode or another
  1087. * address in UPT mode.
  1088. *
  1089. * NB, there may be future use cases discovered where the VM should have
  1090. * direct interaction with the host OpRegion, in which case the write to
  1091. * the ASLS register would trigger MemoryRegion setup to enable that.
  1092. */
  1093. fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
  1094. vdev->igd_opregion, info->size);
  1095. trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
  1096. pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
  1097. pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
  1098. pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
  1099. return 0;
  1100. }
  1101. /*
  1102. * The rather short list of registers that we copy from the host devices.
  1103. * The LPC/ISA bridge values are definitely needed to support the vBIOS, the
  1104. * host bridge values may or may not be needed depending on the guest OS.
  1105. * Since we're only munging revision and subsystem values on the host bridge,
  1106. * we don't require our own device. The LPC/ISA bridge needs to be our very
  1107. * own though.
  1108. */
  1109. typedef struct {
  1110. uint8_t offset;
  1111. uint8_t len;
  1112. } IGDHostInfo;
  1113. static const IGDHostInfo igd_host_bridge_infos[] = {
  1114. {PCI_REVISION_ID, 2},
  1115. {PCI_SUBSYSTEM_VENDOR_ID, 2},
  1116. {PCI_SUBSYSTEM_ID, 2},
  1117. };
  1118. static const IGDHostInfo igd_lpc_bridge_infos[] = {
  1119. {PCI_VENDOR_ID, 2},
  1120. {PCI_DEVICE_ID, 2},
  1121. {PCI_REVISION_ID, 2},
  1122. {PCI_SUBSYSTEM_VENDOR_ID, 2},
  1123. {PCI_SUBSYSTEM_ID, 2},
  1124. };
  1125. static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
  1126. struct vfio_region_info *info,
  1127. const IGDHostInfo *list, int len)
  1128. {
  1129. int i, ret;
  1130. for (i = 0; i < len; i++) {
  1131. ret = pread(vdev->vbasedev.fd, pdev->config + list[i].offset,
  1132. list[i].len, info->offset + list[i].offset);
  1133. if (ret != list[i].len) {
  1134. error_report("IGD copy failed: %m");
  1135. return -errno;
  1136. }
  1137. }
  1138. return 0;
  1139. }
  1140. /*
  1141. * Stuff a few values into the host bridge.
  1142. */
  1143. static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
  1144. struct vfio_region_info *info)
  1145. {
  1146. PCIBus *bus;
  1147. PCIDevice *host_bridge;
  1148. int ret;
  1149. bus = pci_device_root_bus(&vdev->pdev);
  1150. host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
  1151. if (!host_bridge) {
  1152. error_report("Can't find host bridge");
  1153. return -ENODEV;
  1154. }
  1155. ret = vfio_pci_igd_copy(vdev, host_bridge, info, igd_host_bridge_infos,
  1156. ARRAY_SIZE(igd_host_bridge_infos));
  1157. if (!ret) {
  1158. trace_vfio_pci_igd_host_bridge_enabled(vdev->vbasedev.name);
  1159. }
  1160. return ret;
  1161. }
  1162. /*
  1163. * IGD LPC/ISA bridge support code. The vBIOS needs this, but we can't write
  1164. * arbitrary values into just any bridge, so we must create our own. We try
  1165. * to handle if the user has created it for us, which they might want to do
  1166. * to enable multifunction so we don't occupy the whole PCI slot.
  1167. */
  1168. static void vfio_pci_igd_lpc_bridge_realize(PCIDevice *pdev, Error **errp)
  1169. {
  1170. if (pdev->devfn != PCI_DEVFN(0x1f, 0)) {
  1171. error_setg(errp, "VFIO dummy ISA/LPC bridge must have address 1f.0");
  1172. }
  1173. }
  1174. static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data)
  1175. {
  1176. DeviceClass *dc = DEVICE_CLASS(klass);
  1177. PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
  1178. set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
  1179. dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment";
  1180. dc->hotpluggable = false;
  1181. k->realize = vfio_pci_igd_lpc_bridge_realize;
  1182. k->class_id = PCI_CLASS_BRIDGE_ISA;
  1183. }
  1184. static TypeInfo vfio_pci_igd_lpc_bridge_info = {
  1185. .name = "vfio-pci-igd-lpc-bridge",
  1186. .parent = TYPE_PCI_DEVICE,
  1187. .class_init = vfio_pci_igd_lpc_bridge_class_init,
  1188. .interfaces = (InterfaceInfo[]) {
  1189. { INTERFACE_CONVENTIONAL_PCI_DEVICE },
  1190. { },
  1191. },
  1192. };
  1193. static void vfio_pci_igd_register_types(void)
  1194. {
  1195. type_register_static(&vfio_pci_igd_lpc_bridge_info);
  1196. }
  1197. type_init(vfio_pci_igd_register_types)
  1198. static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
  1199. struct vfio_region_info *info)
  1200. {
  1201. PCIDevice *lpc_bridge;
  1202. int ret;
  1203. lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
  1204. 0, PCI_DEVFN(0x1f, 0));
  1205. if (!lpc_bridge) {
  1206. lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
  1207. PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");
  1208. }
  1209. ret = vfio_pci_igd_copy(vdev, lpc_bridge, info, igd_lpc_bridge_infos,
  1210. ARRAY_SIZE(igd_lpc_bridge_infos));
  1211. if (!ret) {
  1212. trace_vfio_pci_igd_lpc_bridge_enabled(vdev->vbasedev.name);
  1213. }
  1214. return ret;
  1215. }
  1216. /*
  1217. * IGD Gen8 and newer support up to 8MB for the GTT and use a 64bit PTE
  1218. * entry, older IGDs use 2MB and 32bit. Each PTE maps a 4k page. Therefore
  1219. * we either have 2M/4k * 4 = 2k or 8M/4k * 8 = 16k as the maximum iobar index
  1220. * for programming the GTT.
  1221. *
  1222. * See linux:include/drm/i915_drm.h for shift and mask values.
  1223. */
  1224. static int vfio_igd_gtt_max(VFIOPCIDevice *vdev)
  1225. {
  1226. uint32_t gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
  1227. int ggms, gen = igd_gen(vdev);
  1228. gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, sizeof(gmch));
  1229. ggms = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
  1230. if (gen > 6) {
  1231. ggms = 1 << ggms;
  1232. }
  1233. ggms *= MiB;
  1234. return (ggms / (4 * KiB)) * (gen < 8 ? 4 : 8);
  1235. }
  1236. /*
  1237. * The IGD ROM will make use of stolen memory (GGMS) for support of VESA modes.
  1238. * Somehow the host stolen memory range is used for this, but how the ROM gets
  1239. * it is a mystery, perhaps it's hardcoded into the ROM. Thankfully though, it
  1240. * reprograms the GTT through the IOBAR where we can trap it and transpose the
  1241. * programming to the VM allocated buffer. That buffer gets reserved by the VM
  1242. * firmware via the fw_cfg entry added below. Here we're just monitoring the
  1243. * IOBAR address and data registers to detect a write sequence targeting the
  1244. * GTTADR. This code is developed by observed behavior and doesn't have a
  1245. * direct spec reference, unfortunately.
  1246. */
  1247. static uint64_t vfio_igd_quirk_data_read(void *opaque,
  1248. hwaddr addr, unsigned size)
  1249. {
  1250. VFIOIGDQuirk *igd = opaque;
  1251. VFIOPCIDevice *vdev = igd->vdev;
  1252. igd->index = ~0;
  1253. return vfio_region_read(&vdev->bars[4].region, addr + 4, size);
  1254. }
  1255. static void vfio_igd_quirk_data_write(void *opaque, hwaddr addr,
  1256. uint64_t data, unsigned size)
  1257. {
  1258. VFIOIGDQuirk *igd = opaque;
  1259. VFIOPCIDevice *vdev = igd->vdev;
  1260. uint64_t val = data;
  1261. int gen = igd_gen(vdev);
  1262. /*
  1263. * Programming the GGMS starts at index 0x1 and uses every 4th index (ie.
  1264. * 0x1, 0x5, 0x9, 0xd,...). For pre-Gen8 each 4-byte write is a whole PTE
  1265. * entry, with 0th bit enable set. For Gen8 and up, PTEs are 64bit, so
  1266. * entries 0x5 & 0xd are the high dword, in our case zero. Each PTE points
  1267. * to a 4k page, which we translate to a page from the VM allocated region,
  1268. * pointed to by the BDSM register. If this is not set, we fail.
  1269. *
  1270. * We trap writes to the full configured GTT size, but we typically only
  1271. * see the vBIOS writing up to (nearly) the 1MB barrier. In fact it often
  1272. * seems to miss the last entry for an even 1MB GTT. Doing a gratuitous
  1273. * write of that last entry does work, but is hopefully unnecessary since
  1274. * we clear the previous GTT on initialization.
  1275. */
  1276. if ((igd->index % 4 == 1) && igd->index < vfio_igd_gtt_max(vdev)) {
  1277. if (gen < 8 || (igd->index % 8 == 1)) {
  1278. uint32_t base;
  1279. base = pci_get_long(vdev->pdev.config + IGD_BDSM);
  1280. if (!base) {
  1281. hw_error("vfio-igd: Guest attempted to program IGD GTT before "
  1282. "BIOS reserved stolen memory. Unsupported BIOS?");
  1283. }
  1284. val = data - igd->bdsm + base;
  1285. } else {
  1286. val = 0; /* upper 32bits of pte, we only enable below 4G PTEs */
  1287. }
  1288. trace_vfio_pci_igd_bar4_write(vdev->vbasedev.name,
  1289. igd->index, data, val);
  1290. }
  1291. vfio_region_write(&vdev->bars[4].region, addr + 4, val, size);
  1292. igd->index = ~0;
  1293. }
  1294. static const MemoryRegionOps vfio_igd_data_quirk = {
  1295. .read = vfio_igd_quirk_data_read,
  1296. .write = vfio_igd_quirk_data_write,
  1297. .endianness = DEVICE_LITTLE_ENDIAN,
  1298. };
  1299. static uint64_t vfio_igd_quirk_index_read(void *opaque,
  1300. hwaddr addr, unsigned size)
  1301. {
  1302. VFIOIGDQuirk *igd = opaque;
  1303. VFIOPCIDevice *vdev = igd->vdev;
  1304. igd->index = ~0;
  1305. return vfio_region_read(&vdev->bars[4].region, addr, size);
  1306. }
  1307. static void vfio_igd_quirk_index_write(void *opaque, hwaddr addr,
  1308. uint64_t data, unsigned size)
  1309. {
  1310. VFIOIGDQuirk *igd = opaque;
  1311. VFIOPCIDevice *vdev = igd->vdev;
  1312. igd->index = data;
  1313. vfio_region_write(&vdev->bars[4].region, addr, data, size);
  1314. }
  1315. static const MemoryRegionOps vfio_igd_index_quirk = {
  1316. .read = vfio_igd_quirk_index_read,
  1317. .write = vfio_igd_quirk_index_write,
  1318. .endianness = DEVICE_LITTLE_ENDIAN,
  1319. };
  1320. static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
  1321. {
  1322. struct vfio_region_info *rom = NULL, *opregion = NULL,
  1323. *host = NULL, *lpc = NULL;
  1324. VFIOQuirk *quirk;
  1325. VFIOIGDQuirk *igd;
  1326. PCIDevice *lpc_bridge;
  1327. int i, ret, ggms_mb, gms_mb = 0, gen;
  1328. uint64_t *bdsm_size;
  1329. uint32_t gmch;
  1330. uint16_t cmd_orig, cmd;
  1331. Error *err = NULL;
  1332. /*
  1333. * This must be an Intel VGA device at address 00:02.0 for us to even
  1334. * consider enabling legacy mode. The vBIOS has dependencies on the
  1335. * PCI bus address.
  1336. */
  1337. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
  1338. !vfio_is_vga(vdev) || nr != 4 ||
  1339. &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
  1340. 0, PCI_DEVFN(0x2, 0))) {
  1341. return;
  1342. }
  1343. /*
  1344. * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we
  1345. * can stuff host values into, so if there's already one there and it's not
  1346. * one we can hack on, legacy mode is no-go. Sorry Q35.
  1347. */
  1348. lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
  1349. 0, PCI_DEVFN(0x1f, 0));
  1350. if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
  1351. "vfio-pci-igd-lpc-bridge")) {
  1352. error_report("IGD device %s cannot support legacy mode due to existing "
  1353. "devices at address 1f.0", vdev->vbasedev.name);
  1354. return;
  1355. }
  1356. /*
  1357. * IGD is not a standard, they like to change their specs often. We
  1358. * only attempt to support back to SandBridge and we hope that newer
  1359. * devices maintain compatibility with generation 8.
  1360. */
  1361. gen = igd_gen(vdev);
  1362. if (gen != 6 && gen != 8) {
  1363. error_report("IGD device %s is unsupported in legacy mode, "
  1364. "try SandyBridge or newer", vdev->vbasedev.name);
  1365. return;
  1366. }
  1367. /*
  1368. * Most of what we're doing here is to enable the ROM to run, so if
  1369. * there's no ROM, there's no point in setting up this quirk.
  1370. * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
  1371. */
  1372. ret = vfio_get_region_info(&vdev->vbasedev,
  1373. VFIO_PCI_ROM_REGION_INDEX, &rom);
  1374. if ((ret || !rom->size) && !vdev->pdev.romfile) {
  1375. error_report("IGD device %s has no ROM, legacy mode disabled",
  1376. vdev->vbasedev.name);
  1377. goto out;
  1378. }
  1379. /*
  1380. * Ignore the hotplug corner case, mark the ROM failed, we can't
  1381. * create the devices we need for legacy mode in the hotplug scenario.
  1382. */
  1383. if (vdev->pdev.qdev.hotplugged) {
  1384. error_report("IGD device %s hotplugged, ROM disabled, "
  1385. "legacy mode disabled", vdev->vbasedev.name);
  1386. vdev->rom_read_failed = true;
  1387. goto out;
  1388. }
  1389. /*
  1390. * Check whether we have all the vfio device specific regions to
  1391. * support legacy mode (added in Linux v4.6). If not, bail.
  1392. */
  1393. ret = vfio_get_dev_region_info(&vdev->vbasedev,
  1394. VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
  1395. VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
  1396. if (ret) {
  1397. error_report("IGD device %s does not support OpRegion access,"
  1398. "legacy mode disabled", vdev->vbasedev.name);
  1399. goto out;
  1400. }
  1401. ret = vfio_get_dev_region_info(&vdev->vbasedev,
  1402. VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
  1403. VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
  1404. if (ret) {
  1405. error_report("IGD device %s does not support host bridge access,"
  1406. "legacy mode disabled", vdev->vbasedev.name);
  1407. goto out;
  1408. }
  1409. ret = vfio_get_dev_region_info(&vdev->vbasedev,
  1410. VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
  1411. VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, &lpc);
  1412. if (ret) {
  1413. error_report("IGD device %s does not support LPC bridge access,"
  1414. "legacy mode disabled", vdev->vbasedev.name);
  1415. goto out;
  1416. }
  1417. gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
  1418. /*
  1419. * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
  1420. * try to enable it. Probably shouldn't be using legacy mode without VGA,
  1421. * but also no point in us enabling VGA if disabled in hardware.
  1422. */
  1423. if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev, &err)) {
  1424. error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
  1425. error_report("IGD device %s failed to enable VGA access, "
  1426. "legacy mode disabled", vdev->vbasedev.name);
  1427. goto out;
  1428. }
  1429. /* Create our LPC/ISA bridge */
  1430. ret = vfio_pci_igd_lpc_init(vdev, lpc);
  1431. if (ret) {
  1432. error_report("IGD device %s failed to create LPC bridge, "
  1433. "legacy mode disabled", vdev->vbasedev.name);
  1434. goto out;
  1435. }
  1436. /* Stuff some host values into the VM PCI host bridge */
  1437. ret = vfio_pci_igd_host_init(vdev, host);
  1438. if (ret) {
  1439. error_report("IGD device %s failed to modify host bridge, "
  1440. "legacy mode disabled", vdev->vbasedev.name);
  1441. goto out;
  1442. }
  1443. /* Setup OpRegion access */
  1444. ret = vfio_pci_igd_opregion_init(vdev, opregion, &err);
  1445. if (ret) {
  1446. error_append_hint(&err, "IGD legacy mode disabled\n");
  1447. error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
  1448. goto out;
  1449. }
  1450. /* Setup our quirk to munge GTT addresses to the VM allocated buffer */
  1451. quirk = vfio_quirk_alloc(2);
  1452. igd = quirk->data = g_malloc0(sizeof(*igd));
  1453. igd->vdev = vdev;
  1454. igd->index = ~0;
  1455. igd->bdsm = vfio_pci_read_config(&vdev->pdev, IGD_BDSM, 4);
  1456. igd->bdsm &= ~((1 * MiB) - 1); /* 1MB aligned */
  1457. memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_index_quirk,
  1458. igd, "vfio-igd-index-quirk", 4);
  1459. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  1460. 0, &quirk->mem[0], 1);
  1461. memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_igd_data_quirk,
  1462. igd, "vfio-igd-data-quirk", 4);
  1463. memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
  1464. 4, &quirk->mem[1], 1);
  1465. QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
  1466. /* Determine the size of stolen memory needed for GTT */
  1467. ggms_mb = (gmch >> (gen < 8 ? 8 : 6)) & 0x3;
  1468. if (gen > 6) {
  1469. ggms_mb = 1 << ggms_mb;
  1470. }
  1471. /*
  1472. * Assume we have no GMS memory, but allow it to be overrided by device
  1473. * option (experimental). The spec doesn't actually allow zero GMS when
  1474. * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
  1475. * so let's not waste VM memory for it.
  1476. */
  1477. gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
  1478. if (vdev->igd_gms) {
  1479. if (vdev->igd_gms <= 0x10) {
  1480. gms_mb = vdev->igd_gms * 32;
  1481. gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
  1482. } else {
  1483. error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
  1484. vdev->igd_gms = 0;
  1485. }
  1486. }
  1487. /*
  1488. * Request reserved memory for stolen memory via fw_cfg. VM firmware
  1489. * must allocate a 1MB aligned reserved memory region below 4GB with
  1490. * the requested size (in bytes) for use by the Intel PCI class VGA
  1491. * device at VM address 00:02.0. The base address of this reserved
  1492. * memory region must be written to the device BDSM regsiter at PCI
  1493. * config offset 0x5C.
  1494. */
  1495. bdsm_size = g_malloc(sizeof(*bdsm_size));
  1496. *bdsm_size = cpu_to_le64((ggms_mb + gms_mb) * MiB);
  1497. fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
  1498. bdsm_size, sizeof(*bdsm_size));
  1499. /* GMCH is read-only, emulated */
  1500. pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
  1501. pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
  1502. pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
  1503. /* BDSM is read-write, emulated. The BIOS needs to be able to write it */
  1504. pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
  1505. pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
  1506. pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
  1507. /*
  1508. * This IOBAR gives us access to GTTADR, which allows us to write to
  1509. * the GTT itself. So let's go ahead and write zero to all the GTT
  1510. * entries to avoid spurious DMA faults. Be sure I/O access is enabled
  1511. * before talking to the device.
  1512. */
  1513. if (pread(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
  1514. vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
  1515. error_report("IGD device %s - failed to read PCI command register",
  1516. vdev->vbasedev.name);
  1517. }
  1518. cmd = cmd_orig | PCI_COMMAND_IO;
  1519. if (pwrite(vdev->vbasedev.fd, &cmd, sizeof(cmd),
  1520. vdev->config_offset + PCI_COMMAND) != sizeof(cmd)) {
  1521. error_report("IGD device %s - failed to write PCI command register",
  1522. vdev->vbasedev.name);
  1523. }
  1524. for (i = 1; i < vfio_igd_gtt_max(vdev); i += 4) {
  1525. vfio_region_write(&vdev->bars[4].region, 0, i, 4);
  1526. vfio_region_write(&vdev->bars[4].region, 4, 0, 4);
  1527. }
  1528. if (pwrite(vdev->vbasedev.fd, &cmd_orig, sizeof(cmd_orig),
  1529. vdev->config_offset + PCI_COMMAND) != sizeof(cmd_orig)) {
  1530. error_report("IGD device %s - failed to restore PCI command register",
  1531. vdev->vbasedev.name);
  1532. }
  1533. trace_vfio_pci_igd_bdsm_enabled(vdev->vbasedev.name, ggms_mb + gms_mb);
  1534. out:
  1535. g_free(rom);
  1536. g_free(opregion);
  1537. g_free(host);
  1538. g_free(lpc);
  1539. }
  1540. /*
  1541. * Common quirk probe entry points.
  1542. */
  1543. void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
  1544. {
  1545. vfio_vga_probe_ati_3c3_quirk(vdev);
  1546. vfio_vga_probe_nvidia_3d0_quirk(vdev);
  1547. }
  1548. void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
  1549. {
  1550. VFIOQuirk *quirk;
  1551. int i, j;
  1552. for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
  1553. QLIST_FOREACH(quirk, &vdev->vga->region[i].quirks, next) {
  1554. for (j = 0; j < quirk->nr_mem; j++) {
  1555. memory_region_del_subregion(&vdev->vga->region[i].mem,
  1556. &quirk->mem[j]);
  1557. }
  1558. }
  1559. }
  1560. }
  1561. void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
  1562. {
  1563. int i, j;
  1564. for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
  1565. while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
  1566. VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
  1567. QLIST_REMOVE(quirk, next);
  1568. for (j = 0; j < quirk->nr_mem; j++) {
  1569. object_unparent(OBJECT(&quirk->mem[j]));
  1570. }
  1571. g_free(quirk->mem);
  1572. g_free(quirk->data);
  1573. g_free(quirk);
  1574. }
  1575. }
  1576. }
  1577. void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
  1578. {
  1579. vfio_probe_ati_bar4_quirk(vdev, nr);
  1580. vfio_probe_ati_bar2_quirk(vdev, nr);
  1581. vfio_probe_nvidia_bar5_quirk(vdev, nr);
  1582. vfio_probe_nvidia_bar0_quirk(vdev, nr);
  1583. vfio_probe_rtl8168_bar2_quirk(vdev, nr);
  1584. vfio_probe_igd_bar4_quirk(vdev, nr);
  1585. }
  1586. void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
  1587. {
  1588. VFIOBAR *bar = &vdev->bars[nr];
  1589. VFIOQuirk *quirk;
  1590. int i;
  1591. QLIST_FOREACH(quirk, &bar->quirks, next) {
  1592. while (!QLIST_EMPTY(&quirk->ioeventfds)) {
  1593. vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
  1594. }
  1595. for (i = 0; i < quirk->nr_mem; i++) {
  1596. memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
  1597. }
  1598. }
  1599. }
  1600. void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
  1601. {
  1602. VFIOBAR *bar = &vdev->bars[nr];
  1603. int i;
  1604. while (!QLIST_EMPTY(&bar->quirks)) {
  1605. VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
  1606. QLIST_REMOVE(quirk, next);
  1607. for (i = 0; i < quirk->nr_mem; i++) {
  1608. object_unparent(OBJECT(&quirk->mem[i]));
  1609. }
  1610. g_free(quirk->mem);
  1611. g_free(quirk->data);
  1612. g_free(quirk);
  1613. }
  1614. }
  1615. /*
  1616. * Reset quirks
  1617. */
  1618. void vfio_quirk_reset(VFIOPCIDevice *vdev)
  1619. {
  1620. int i;
  1621. for (i = 0; i < PCI_ROM_SLOT; i++) {
  1622. VFIOQuirk *quirk;
  1623. VFIOBAR *bar = &vdev->bars[i];
  1624. QLIST_FOREACH(quirk, &bar->quirks, next) {
  1625. if (quirk->reset) {
  1626. quirk->reset(vdev, quirk);
  1627. }
  1628. }
  1629. }
  1630. }
  1631. /*
  1632. * AMD Radeon PCI config reset, based on Linux:
  1633. * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
  1634. * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
  1635. * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
  1636. * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
  1637. * IDs: include/drm/drm_pciids.h
  1638. * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
  1639. *
  1640. * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
  1641. * hardware that should be fixed on future ASICs. The symptom of this is that
  1642. * once the accerlated driver loads, Windows guests will bsod on subsequent
  1643. * attmpts to load the driver, such as after VM reset or shutdown/restart. To
  1644. * work around this, we do an AMD specific PCI config reset, followed by an SMC
  1645. * reset. The PCI config reset only works if SMC firmware is running, so we
  1646. * have a dependency on the state of the device as to whether this reset will
  1647. * be effective. There are still cases where we won't be able to kick the
  1648. * device into working, but this greatly improves the usability overall. The
  1649. * config reset magic is relatively common on AMD GPUs, but the setup and SMC
  1650. * poking is largely ASIC specific.
  1651. */
  1652. static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
  1653. {
  1654. uint32_t clk, pc_c;
  1655. /*
  1656. * Registers 200h and 204h are index and data registers for accessing
  1657. * indirect configuration registers within the device.
  1658. */
  1659. vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
  1660. clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1661. vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
  1662. pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1663. return (!(clk & 1) && (0x20100 <= pc_c));
  1664. }
  1665. /*
  1666. * The scope of a config reset is controlled by a mode bit in the misc register
  1667. * and a fuse, exposed as a bit in another register. The fuse is the default
  1668. * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
  1669. * scope = !(misc ^ fuse), where the resulting scope is defined the same as
  1670. * the fuse. A truth table therefore tells us that if misc == fuse, we need
  1671. * to flip the value of the bit in the misc register.
  1672. */
  1673. static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
  1674. {
  1675. uint32_t misc, fuse;
  1676. bool a, b;
  1677. vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
  1678. fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1679. b = fuse & 64;
  1680. vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
  1681. misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1682. a = misc & 2;
  1683. if (a == b) {
  1684. vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
  1685. vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
  1686. }
  1687. }
  1688. static int vfio_radeon_reset(VFIOPCIDevice *vdev)
  1689. {
  1690. PCIDevice *pdev = &vdev->pdev;
  1691. int i, ret = 0;
  1692. uint32_t data;
  1693. /* Defer to a kernel implemented reset */
  1694. if (vdev->vbasedev.reset_works) {
  1695. trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
  1696. return -ENODEV;
  1697. }
  1698. /* Enable only memory BAR access */
  1699. vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
  1700. /* Reset only works if SMC firmware is loaded and running */
  1701. if (!vfio_radeon_smc_is_running(vdev)) {
  1702. ret = -EINVAL;
  1703. trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
  1704. goto out;
  1705. }
  1706. /* Make sure only the GFX function is reset */
  1707. vfio_radeon_set_gfx_only_reset(vdev);
  1708. /* AMD PCI config reset */
  1709. vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
  1710. usleep(100);
  1711. /* Read back the memory size to make sure we're out of reset */
  1712. for (i = 0; i < 100000; i++) {
  1713. if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
  1714. goto reset_smc;
  1715. }
  1716. usleep(1);
  1717. }
  1718. trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
  1719. reset_smc:
  1720. /* Reset SMC */
  1721. vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
  1722. data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1723. data |= 1;
  1724. vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
  1725. /* Disable SMC clock */
  1726. vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
  1727. data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
  1728. data |= 1;
  1729. vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
  1730. trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
  1731. out:
  1732. /* Restore PCI command register */
  1733. vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
  1734. return ret;
  1735. }
  1736. void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
  1737. {
  1738. switch (vdev->vendor_id) {
  1739. case 0x1002:
  1740. switch (vdev->device_id) {
  1741. /* Bonaire */
  1742. case 0x6649: /* Bonaire [FirePro W5100] */
  1743. case 0x6650:
  1744. case 0x6651:
  1745. case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
  1746. case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
  1747. case 0x665d: /* Bonaire [Radeon R7 200 Series] */
  1748. /* Hawaii */
  1749. case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
  1750. case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
  1751. case 0x67A2:
  1752. case 0x67A8:
  1753. case 0x67A9:
  1754. case 0x67AA:
  1755. case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
  1756. case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
  1757. case 0x67B8:
  1758. case 0x67B9:
  1759. case 0x67BA:
  1760. case 0x67BE:
  1761. vdev->resetfn = vfio_radeon_reset;
  1762. trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
  1763. break;
  1764. }
  1765. break;
  1766. }
  1767. }
  1768. /*
  1769. * The NVIDIA GPUDirect P2P Vendor capability allows the user to specify
  1770. * devices as a member of a clique. Devices within the same clique ID
  1771. * are capable of direct P2P. It's the user's responsibility that this
  1772. * is correct. The spec says that this may reside at any unused config
  1773. * offset, but reserves and recommends hypervisors place this at C8h.
  1774. * The spec also states that the hypervisor should place this capability
  1775. * at the end of the capability list, thus next is defined as 0h.
  1776. *
  1777. * +----------------+----------------+----------------+----------------+
  1778. * | sig 7:0 ('P') | vndr len (8h) | next (0h) | cap id (9h) |
  1779. * +----------------+----------------+----------------+----------------+
  1780. * | rsvd 15:7(0h),id 6:3,ver 2:0(0h)| sig 23:8 ('P2') |
  1781. * +---------------------------------+---------------------------------+
  1782. *
  1783. * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf
  1784. */
  1785. static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v,
  1786. const char *name, void *opaque,
  1787. Error **errp)
  1788. {
  1789. DeviceState *dev = DEVICE(obj);
  1790. Property *prop = opaque;
  1791. uint8_t *ptr = qdev_get_prop_ptr(dev, prop);
  1792. visit_type_uint8(v, name, ptr, errp);
  1793. }
  1794. static void set_nv_gpudirect_clique_id(Object *obj, Visitor *v,
  1795. const char *name, void *opaque,
  1796. Error **errp)
  1797. {
  1798. DeviceState *dev = DEVICE(obj);
  1799. Property *prop = opaque;
  1800. uint8_t value, *ptr = qdev_get_prop_ptr(dev, prop);
  1801. Error *local_err = NULL;
  1802. if (dev->realized) {
  1803. qdev_prop_set_after_realize(dev, name, errp);
  1804. return;
  1805. }
  1806. visit_type_uint8(v, name, &value, &local_err);
  1807. if (local_err) {
  1808. error_propagate(errp, local_err);
  1809. return;
  1810. }
  1811. if (value & ~0xF) {
  1812. error_setg(errp, "Property %s: valid range 0-15", name);
  1813. return;
  1814. }
  1815. *ptr = value;
  1816. }
  1817. const PropertyInfo qdev_prop_nv_gpudirect_clique = {
  1818. .name = "uint4",
  1819. .description = "NVIDIA GPUDirect Clique ID (0 - 15)",
  1820. .get = get_nv_gpudirect_clique_id,
  1821. .set = set_nv_gpudirect_clique_id,
  1822. };
  1823. static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
  1824. {
  1825. PCIDevice *pdev = &vdev->pdev;
  1826. int ret, pos = 0xC8;
  1827. if (vdev->nv_gpudirect_clique == 0xFF) {
  1828. return 0;
  1829. }
  1830. if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID)) {
  1831. error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid device vendor");
  1832. return -EINVAL;
  1833. }
  1834. if (pci_get_byte(pdev->config + PCI_CLASS_DEVICE + 1) !=
  1835. PCI_BASE_CLASS_DISPLAY) {
  1836. error_setg(errp, "NVIDIA GPUDirect Clique ID: unsupported PCI class");
  1837. return -EINVAL;
  1838. }
  1839. ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp);
  1840. if (ret < 0) {
  1841. error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: ");
  1842. return ret;
  1843. }
  1844. memset(vdev->emulated_config_bits + pos, 0xFF, 8);
  1845. pos += PCI_CAP_FLAGS;
  1846. pci_set_byte(pdev->config + pos++, 8);
  1847. pci_set_byte(pdev->config + pos++, 'P');
  1848. pci_set_byte(pdev->config + pos++, '2');
  1849. pci_set_byte(pdev->config + pos++, 'P');
  1850. pci_set_byte(pdev->config + pos++, vdev->nv_gpudirect_clique << 3);
  1851. pci_set_byte(pdev->config + pos, 0);
  1852. return 0;
  1853. }
  1854. int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp)
  1855. {
  1856. int ret;
  1857. ret = vfio_add_nv_gpudirect_cap(vdev, errp);
  1858. if (ret) {
  1859. return ret;
  1860. }
  1861. return 0;
  1862. }
  1863. static void vfio_pci_nvlink2_get_tgt(Object *obj, Visitor *v,
  1864. const char *name,
  1865. void *opaque, Error **errp)
  1866. {
  1867. uint64_t tgt = (uintptr_t) opaque;
  1868. visit_type_uint64(v, name, &tgt, errp);
  1869. }
  1870. static void vfio_pci_nvlink2_get_link_speed(Object *obj, Visitor *v,
  1871. const char *name,
  1872. void *opaque, Error **errp)
  1873. {
  1874. uint32_t link_speed = (uint32_t)(uintptr_t) opaque;
  1875. visit_type_uint32(v, name, &link_speed, errp);
  1876. }
  1877. int vfio_pci_nvidia_v100_ram_init(VFIOPCIDevice *vdev, Error **errp)
  1878. {
  1879. int ret;
  1880. void *p;
  1881. struct vfio_region_info *nv2reg = NULL;
  1882. struct vfio_info_cap_header *hdr;
  1883. struct vfio_region_info_cap_nvlink2_ssatgt *cap;
  1884. VFIOQuirk *quirk;
  1885. ret = vfio_get_dev_region_info(&vdev->vbasedev,
  1886. VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
  1887. PCI_VENDOR_ID_NVIDIA,
  1888. VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
  1889. &nv2reg);
  1890. if (ret) {
  1891. return ret;
  1892. }
  1893. hdr = vfio_get_region_info_cap(nv2reg, VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
  1894. if (!hdr) {
  1895. ret = -ENODEV;
  1896. goto free_exit;
  1897. }
  1898. cap = (void *) hdr;
  1899. p = mmap(NULL, nv2reg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
  1900. MAP_SHARED, vdev->vbasedev.fd, nv2reg->offset);
  1901. if (p == MAP_FAILED) {
  1902. ret = -errno;
  1903. goto free_exit;
  1904. }
  1905. quirk = vfio_quirk_alloc(1);
  1906. memory_region_init_ram_ptr(&quirk->mem[0], OBJECT(vdev), "nvlink2-mr",
  1907. nv2reg->size, p);
  1908. QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
  1909. object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
  1910. vfio_pci_nvlink2_get_tgt, NULL, NULL,
  1911. (void *) (uintptr_t) cap->tgt, NULL);
  1912. trace_vfio_pci_nvidia_gpu_setup_quirk(vdev->vbasedev.name, cap->tgt,
  1913. nv2reg->size);
  1914. free_exit:
  1915. g_free(nv2reg);
  1916. return ret;
  1917. }
  1918. int vfio_pci_nvlink2_init(VFIOPCIDevice *vdev, Error **errp)
  1919. {
  1920. int ret;
  1921. void *p;
  1922. struct vfio_region_info *atsdreg = NULL;
  1923. struct vfio_info_cap_header *hdr;
  1924. struct vfio_region_info_cap_nvlink2_ssatgt *captgt;
  1925. struct vfio_region_info_cap_nvlink2_lnkspd *capspeed;
  1926. VFIOQuirk *quirk;
  1927. ret = vfio_get_dev_region_info(&vdev->vbasedev,
  1928. VFIO_REGION_TYPE_PCI_VENDOR_TYPE |
  1929. PCI_VENDOR_ID_IBM,
  1930. VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
  1931. &atsdreg);
  1932. if (ret) {
  1933. return ret;
  1934. }
  1935. hdr = vfio_get_region_info_cap(atsdreg,
  1936. VFIO_REGION_INFO_CAP_NVLINK2_SSATGT);
  1937. if (!hdr) {
  1938. ret = -ENODEV;
  1939. goto free_exit;
  1940. }
  1941. captgt = (void *) hdr;
  1942. hdr = vfio_get_region_info_cap(atsdreg,
  1943. VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD);
  1944. if (!hdr) {
  1945. ret = -ENODEV;
  1946. goto free_exit;
  1947. }
  1948. capspeed = (void *) hdr;
  1949. /* Some NVLink bridges may not have assigned ATSD */
  1950. if (atsdreg->size) {
  1951. p = mmap(NULL, atsdreg->size, PROT_READ | PROT_WRITE | PROT_EXEC,
  1952. MAP_SHARED, vdev->vbasedev.fd, atsdreg->offset);
  1953. if (p == MAP_FAILED) {
  1954. ret = -errno;
  1955. goto free_exit;
  1956. }
  1957. quirk = vfio_quirk_alloc(1);
  1958. memory_region_init_ram_device_ptr(&quirk->mem[0], OBJECT(vdev),
  1959. "nvlink2-atsd-mr", atsdreg->size, p);
  1960. QLIST_INSERT_HEAD(&vdev->bars[0].quirks, quirk, next);
  1961. }
  1962. object_property_add(OBJECT(vdev), "nvlink2-tgt", "uint64",
  1963. vfio_pci_nvlink2_get_tgt, NULL, NULL,
  1964. (void *) (uintptr_t) captgt->tgt, NULL);
  1965. trace_vfio_pci_nvlink2_setup_quirk_ssatgt(vdev->vbasedev.name, captgt->tgt,
  1966. atsdreg->size);
  1967. object_property_add(OBJECT(vdev), "nvlink2-link-speed", "uint32",
  1968. vfio_pci_nvlink2_get_link_speed, NULL, NULL,
  1969. (void *) (uintptr_t) capspeed->link_speed, NULL);
  1970. trace_vfio_pci_nvlink2_setup_quirk_lnkspd(vdev->vbasedev.name,
  1971. capspeed->link_speed);
  1972. free_exit:
  1973. g_free(atsdreg);
  1974. return ret;
  1975. }