2
0

msix.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /*
  2. * MSI-X device support
  3. *
  4. * This module includes support for MSI-X in pci devices.
  5. *
  6. * Author: Michael S. Tsirkin <mst@redhat.com>
  7. *
  8. * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
  9. *
  10. * This work is licensed under the terms of the GNU GPL, version 2. See
  11. * the COPYING file in the top-level directory.
  12. */
  13. #include "hw.h"
  14. #include "msix.h"
  15. #include "pci.h"
  16. #include "range.h"
  17. #define MSIX_CAP_LENGTH 12
  18. /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
  19. #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
  20. #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
  21. #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
  22. /* How much space does an MSIX table need. */
  23. /* The spec requires giving the table structure
  24. * a 4K aligned region all by itself. */
  25. #define MSIX_PAGE_SIZE 0x1000
  26. /* Reserve second half of the page for pending bits */
  27. #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
  28. #define MSIX_MAX_ENTRIES 32
  29. /* Flag for interrupt controller to declare MSI-X support */
  30. int msix_supported;
  31. /* Add MSI-X capability to the config space for the device. */
  32. /* Given a bar and its size, add MSI-X table on top of it
  33. * and fill MSI-X capability in the config space.
  34. * Original bar size must be a power of 2 or 0.
  35. * New bar size is returned. */
  36. static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
  37. unsigned bar_nr, unsigned bar_size)
  38. {
  39. int config_offset;
  40. uint8_t *config;
  41. uint32_t new_size;
  42. if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
  43. return -EINVAL;
  44. if (bar_size > 0x80000000)
  45. return -ENOSPC;
  46. /* Add space for MSI-X structures */
  47. if (!bar_size) {
  48. new_size = MSIX_PAGE_SIZE;
  49. } else if (bar_size < MSIX_PAGE_SIZE) {
  50. bar_size = MSIX_PAGE_SIZE;
  51. new_size = MSIX_PAGE_SIZE * 2;
  52. } else {
  53. new_size = bar_size * 2;
  54. }
  55. pdev->msix_bar_size = new_size;
  56. config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
  57. 0, MSIX_CAP_LENGTH);
  58. if (config_offset < 0)
  59. return config_offset;
  60. config = pdev->config + config_offset;
  61. pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
  62. /* Table on top of BAR */
  63. pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
  64. /* Pending bits on top of that */
  65. pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
  66. bar_nr);
  67. pdev->msix_cap = config_offset;
  68. /* Make flags bit writable. */
  69. pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
  70. MSIX_MASKALL_MASK;
  71. return 0;
  72. }
  73. static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
  74. {
  75. PCIDevice *dev = opaque;
  76. unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
  77. void *page = dev->msix_table_page;
  78. return pci_get_long(page + offset);
  79. }
  80. static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
  81. {
  82. fprintf(stderr, "MSI-X: only dword read is allowed!\n");
  83. return 0;
  84. }
  85. static uint8_t msix_pending_mask(int vector)
  86. {
  87. return 1 << (vector % 8);
  88. }
  89. static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
  90. {
  91. return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
  92. }
  93. static int msix_is_pending(PCIDevice *dev, int vector)
  94. {
  95. return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
  96. }
  97. static void msix_set_pending(PCIDevice *dev, int vector)
  98. {
  99. *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
  100. }
  101. static void msix_clr_pending(PCIDevice *dev, int vector)
  102. {
  103. *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
  104. }
  105. static int msix_function_masked(PCIDevice *dev)
  106. {
  107. return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
  108. }
  109. static int msix_is_masked(PCIDevice *dev, int vector)
  110. {
  111. unsigned offset =
  112. vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  113. return msix_function_masked(dev) ||
  114. dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
  115. }
  116. static void msix_handle_mask_update(PCIDevice *dev, int vector)
  117. {
  118. if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
  119. msix_clr_pending(dev, vector);
  120. msix_notify(dev, vector);
  121. }
  122. }
  123. /* Handle MSI-X capability config write. */
  124. void msix_write_config(PCIDevice *dev, uint32_t addr,
  125. uint32_t val, int len)
  126. {
  127. unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
  128. int vector;
  129. if (!range_covers_byte(addr, len, enable_pos)) {
  130. return;
  131. }
  132. if (!msix_enabled(dev)) {
  133. return;
  134. }
  135. pci_device_deassert_intx(dev);
  136. if (msix_function_masked(dev)) {
  137. return;
  138. }
  139. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  140. msix_handle_mask_update(dev, vector);
  141. }
  142. }
  143. static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
  144. uint32_t val)
  145. {
  146. PCIDevice *dev = opaque;
  147. unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
  148. int vector = offset / PCI_MSIX_ENTRY_SIZE;
  149. pci_set_long(dev->msix_table_page + offset, val);
  150. msix_handle_mask_update(dev, vector);
  151. }
  152. static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
  153. uint32_t val)
  154. {
  155. fprintf(stderr, "MSI-X: only dword write is allowed!\n");
  156. }
  157. static CPUWriteMemoryFunc * const msix_mmio_write[] = {
  158. msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
  159. };
  160. static CPUReadMemoryFunc * const msix_mmio_read[] = {
  161. msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
  162. };
  163. /* Should be called from device's map method. */
  164. void msix_mmio_map(PCIDevice *d, int region_num,
  165. pcibus_t addr, pcibus_t size, int type)
  166. {
  167. uint8_t *config = d->config + d->msix_cap;
  168. uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
  169. uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
  170. /* TODO: for assigned devices, we'll want to make it possible to map
  171. * pending bits separately in case they are in a separate bar. */
  172. int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
  173. if (table_bir != region_num)
  174. return;
  175. if (size <= offset)
  176. return;
  177. cpu_register_physical_memory(addr + offset, size - offset,
  178. d->msix_mmio_index);
  179. }
  180. static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
  181. {
  182. int vector;
  183. for (vector = 0; vector < nentries; ++vector) {
  184. unsigned offset =
  185. vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  186. dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
  187. }
  188. }
  189. /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
  190. * modified, it should be retrieved with msix_bar_size. */
  191. int msix_init(struct PCIDevice *dev, unsigned short nentries,
  192. unsigned bar_nr, unsigned bar_size)
  193. {
  194. int ret;
  195. /* Nothing to do if MSI is not supported by interrupt controller */
  196. if (!msix_supported)
  197. return -ENOTSUP;
  198. if (nentries > MSIX_MAX_ENTRIES)
  199. return -EINVAL;
  200. dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
  201. sizeof *dev->msix_entry_used);
  202. dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
  203. msix_mask_all(dev, nentries);
  204. dev->msix_mmio_index = cpu_register_io_memory(msix_mmio_read,
  205. msix_mmio_write, dev,
  206. DEVICE_NATIVE_ENDIAN);
  207. if (dev->msix_mmio_index == -1) {
  208. ret = -EBUSY;
  209. goto err_index;
  210. }
  211. dev->msix_entries_nr = nentries;
  212. ret = msix_add_config(dev, nentries, bar_nr, bar_size);
  213. if (ret)
  214. goto err_config;
  215. dev->cap_present |= QEMU_PCI_CAP_MSIX;
  216. return 0;
  217. err_config:
  218. dev->msix_entries_nr = 0;
  219. cpu_unregister_io_memory(dev->msix_mmio_index);
  220. err_index:
  221. qemu_free(dev->msix_table_page);
  222. dev->msix_table_page = NULL;
  223. qemu_free(dev->msix_entry_used);
  224. dev->msix_entry_used = NULL;
  225. return ret;
  226. }
  227. static void msix_free_irq_entries(PCIDevice *dev)
  228. {
  229. int vector;
  230. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  231. dev->msix_entry_used[vector] = 0;
  232. msix_clr_pending(dev, vector);
  233. }
  234. }
  235. /* Clean up resources for the device. */
  236. int msix_uninit(PCIDevice *dev)
  237. {
  238. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  239. return 0;
  240. pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
  241. dev->msix_cap = 0;
  242. msix_free_irq_entries(dev);
  243. dev->msix_entries_nr = 0;
  244. cpu_unregister_io_memory(dev->msix_mmio_index);
  245. qemu_free(dev->msix_table_page);
  246. dev->msix_table_page = NULL;
  247. qemu_free(dev->msix_entry_used);
  248. dev->msix_entry_used = NULL;
  249. dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
  250. return 0;
  251. }
  252. void msix_save(PCIDevice *dev, QEMUFile *f)
  253. {
  254. unsigned n = dev->msix_entries_nr;
  255. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
  256. return;
  257. }
  258. qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
  259. qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
  260. }
  261. /* Should be called after restoring the config space. */
  262. void msix_load(PCIDevice *dev, QEMUFile *f)
  263. {
  264. unsigned n = dev->msix_entries_nr;
  265. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
  266. return;
  267. }
  268. msix_free_irq_entries(dev);
  269. qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
  270. qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
  271. }
  272. /* Does device support MSI-X? */
  273. int msix_present(PCIDevice *dev)
  274. {
  275. return dev->cap_present & QEMU_PCI_CAP_MSIX;
  276. }
  277. /* Is MSI-X enabled? */
  278. int msix_enabled(PCIDevice *dev)
  279. {
  280. return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
  281. (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
  282. MSIX_ENABLE_MASK);
  283. }
  284. /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
  285. uint32_t msix_bar_size(PCIDevice *dev)
  286. {
  287. return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
  288. dev->msix_bar_size : 0;
  289. }
  290. /* Send an MSI-X message */
  291. void msix_notify(PCIDevice *dev, unsigned vector)
  292. {
  293. uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
  294. uint64_t address;
  295. uint32_t data;
  296. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
  297. return;
  298. if (msix_is_masked(dev, vector)) {
  299. msix_set_pending(dev, vector);
  300. return;
  301. }
  302. address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
  303. data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
  304. stl_le_phys(address, data);
  305. }
  306. void msix_reset(PCIDevice *dev)
  307. {
  308. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  309. return;
  310. msix_free_irq_entries(dev);
  311. dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
  312. ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
  313. memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
  314. msix_mask_all(dev, dev->msix_entries_nr);
  315. }
  316. /* PCI spec suggests that devices make it possible for software to configure
  317. * less vectors than supported by the device, but does not specify a standard
  318. * mechanism for devices to do so.
  319. *
  320. * We support this by asking devices to declare vectors software is going to
  321. * actually use, and checking this on the notification path. Devices that
  322. * don't want to follow the spec suggestion can declare all vectors as used. */
  323. /* Mark vector as used. */
  324. int msix_vector_use(PCIDevice *dev, unsigned vector)
  325. {
  326. if (vector >= dev->msix_entries_nr)
  327. return -EINVAL;
  328. dev->msix_entry_used[vector]++;
  329. return 0;
  330. }
  331. /* Mark vector as unused. */
  332. void msix_vector_unuse(PCIDevice *dev, unsigned vector)
  333. {
  334. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
  335. return;
  336. }
  337. if (--dev->msix_entry_used[vector]) {
  338. return;
  339. }
  340. msix_clr_pending(dev, vector);
  341. }
  342. void msix_unuse_all_vectors(PCIDevice *dev)
  343. {
  344. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  345. return;
  346. msix_free_irq_entries(dev);
  347. }