msix.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /*
  2. * MSI-X device support
  3. *
  4. * This module includes support for MSI-X in pci devices.
  5. *
  6. * Author: Michael S. Tsirkin <mst@redhat.com>
  7. *
  8. * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
  9. *
  10. * This work is licensed under the terms of the GNU GPL, version 2. See
  11. * the COPYING file in the top-level directory.
  12. */
  13. #include "hw.h"
  14. #include "msix.h"
  15. #include "pci.h"
  16. #include "range.h"
  17. #define MSIX_CAP_LENGTH 12
  18. /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
  19. #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
  20. #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
  21. #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
  22. /* How much space does an MSIX table need. */
  23. /* The spec requires giving the table structure
  24. * a 4K aligned region all by itself. */
  25. #define MSIX_PAGE_SIZE 0x1000
  26. /* Reserve second half of the page for pending bits */
  27. #define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
  28. #define MSIX_MAX_ENTRIES 32
  29. /* Flag for interrupt controller to declare MSI-X support */
  30. int msix_supported;
  31. /* Add MSI-X capability to the config space for the device. */
  32. /* Given a bar and its size, add MSI-X table on top of it
  33. * and fill MSI-X capability in the config space.
  34. * Original bar size must be a power of 2 or 0.
  35. * New bar size is returned. */
  36. static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
  37. unsigned bar_nr, unsigned bar_size)
  38. {
  39. int config_offset;
  40. uint8_t *config;
  41. uint32_t new_size;
  42. if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
  43. return -EINVAL;
  44. if (bar_size > 0x80000000)
  45. return -ENOSPC;
  46. /* Add space for MSI-X structures */
  47. if (!bar_size) {
  48. new_size = MSIX_PAGE_SIZE;
  49. } else if (bar_size < MSIX_PAGE_SIZE) {
  50. bar_size = MSIX_PAGE_SIZE;
  51. new_size = MSIX_PAGE_SIZE * 2;
  52. } else {
  53. new_size = bar_size * 2;
  54. }
  55. pdev->msix_bar_size = new_size;
  56. config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
  57. 0, MSIX_CAP_LENGTH);
  58. if (config_offset < 0)
  59. return config_offset;
  60. config = pdev->config + config_offset;
  61. pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
  62. /* Table on top of BAR */
  63. pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
  64. /* Pending bits on top of that */
  65. pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
  66. bar_nr);
  67. pdev->msix_cap = config_offset;
  68. /* Make flags bit writable. */
  69. pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
  70. MSIX_MASKALL_MASK;
  71. pdev->msix_function_masked = true;
  72. return 0;
  73. }
  74. static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
  75. unsigned size)
  76. {
  77. PCIDevice *dev = opaque;
  78. unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
  79. void *page = dev->msix_table_page;
  80. return pci_get_long(page + offset);
  81. }
  82. static uint8_t msix_pending_mask(int vector)
  83. {
  84. return 1 << (vector % 8);
  85. }
  86. static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
  87. {
  88. return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
  89. }
  90. static int msix_is_pending(PCIDevice *dev, int vector)
  91. {
  92. return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
  93. }
  94. static void msix_set_pending(PCIDevice *dev, int vector)
  95. {
  96. *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
  97. }
  98. static void msix_clr_pending(PCIDevice *dev, int vector)
  99. {
  100. *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
  101. }
  102. static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
  103. {
  104. unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  105. return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
  106. }
  107. static bool msix_is_masked(PCIDevice *dev, int vector)
  108. {
  109. return msix_vector_masked(dev, vector, dev->msix_function_masked);
  110. }
  111. static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
  112. {
  113. bool is_masked = msix_is_masked(dev, vector);
  114. if (is_masked == was_masked) {
  115. return;
  116. }
  117. if (!is_masked && msix_is_pending(dev, vector)) {
  118. msix_clr_pending(dev, vector);
  119. msix_notify(dev, vector);
  120. }
  121. }
  122. static void msix_update_function_masked(PCIDevice *dev)
  123. {
  124. dev->msix_function_masked = !msix_enabled(dev) ||
  125. (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK);
  126. }
  127. /* Handle MSI-X capability config write. */
  128. void msix_write_config(PCIDevice *dev, uint32_t addr,
  129. uint32_t val, int len)
  130. {
  131. unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
  132. int vector;
  133. bool was_masked;
  134. if (!range_covers_byte(addr, len, enable_pos)) {
  135. return;
  136. }
  137. was_masked = dev->msix_function_masked;
  138. msix_update_function_masked(dev);
  139. if (!msix_enabled(dev)) {
  140. return;
  141. }
  142. pci_device_deassert_intx(dev);
  143. if (dev->msix_function_masked == was_masked) {
  144. return;
  145. }
  146. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  147. msix_handle_mask_update(dev, vector,
  148. msix_vector_masked(dev, vector, was_masked));
  149. }
  150. }
  151. static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
  152. uint64_t val, unsigned size)
  153. {
  154. PCIDevice *dev = opaque;
  155. unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
  156. int vector = offset / PCI_MSIX_ENTRY_SIZE;
  157. bool was_masked;
  158. /* MSI-X page includes a read-only PBA and a writeable Vector Control. */
  159. if (vector >= dev->msix_entries_nr) {
  160. return;
  161. }
  162. was_masked = msix_is_masked(dev, vector);
  163. pci_set_long(dev->msix_table_page + offset, val);
  164. msix_handle_mask_update(dev, vector, was_masked);
  165. }
  166. static const MemoryRegionOps msix_mmio_ops = {
  167. .read = msix_mmio_read,
  168. .write = msix_mmio_write,
  169. .endianness = DEVICE_NATIVE_ENDIAN,
  170. .valid = {
  171. .min_access_size = 4,
  172. .max_access_size = 4,
  173. },
  174. };
  175. static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
  176. {
  177. uint8_t *config = d->config + d->msix_cap;
  178. uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
  179. uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
  180. /* TODO: for assigned devices, we'll want to make it possible to map
  181. * pending bits separately in case they are in a separate bar. */
  182. memory_region_add_subregion(bar, offset, &d->msix_mmio);
  183. }
  184. static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
  185. {
  186. int vector;
  187. for (vector = 0; vector < nentries; ++vector) {
  188. unsigned offset =
  189. vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  190. dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
  191. }
  192. }
  193. /* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
  194. * modified, it should be retrieved with msix_bar_size. */
  195. int msix_init(struct PCIDevice *dev, unsigned short nentries,
  196. MemoryRegion *bar,
  197. unsigned bar_nr, unsigned bar_size)
  198. {
  199. int ret;
  200. /* Nothing to do if MSI is not supported by interrupt controller */
  201. if (!msix_supported)
  202. return -ENOTSUP;
  203. if (nentries > MSIX_MAX_ENTRIES)
  204. return -EINVAL;
  205. dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
  206. sizeof *dev->msix_entry_used);
  207. dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
  208. msix_mask_all(dev, nentries);
  209. memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev,
  210. "msix", MSIX_PAGE_SIZE);
  211. dev->msix_entries_nr = nentries;
  212. ret = msix_add_config(dev, nentries, bar_nr, bar_size);
  213. if (ret)
  214. goto err_config;
  215. dev->cap_present |= QEMU_PCI_CAP_MSIX;
  216. msix_mmio_setup(dev, bar);
  217. return 0;
  218. err_config:
  219. dev->msix_entries_nr = 0;
  220. memory_region_destroy(&dev->msix_mmio);
  221. g_free(dev->msix_table_page);
  222. dev->msix_table_page = NULL;
  223. g_free(dev->msix_entry_used);
  224. dev->msix_entry_used = NULL;
  225. return ret;
  226. }
  227. static void msix_free_irq_entries(PCIDevice *dev)
  228. {
  229. int vector;
  230. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  231. dev->msix_entry_used[vector] = 0;
  232. msix_clr_pending(dev, vector);
  233. }
  234. }
  235. /* Clean up resources for the device. */
  236. int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
  237. {
  238. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  239. return 0;
  240. pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
  241. dev->msix_cap = 0;
  242. msix_free_irq_entries(dev);
  243. dev->msix_entries_nr = 0;
  244. memory_region_del_subregion(bar, &dev->msix_mmio);
  245. memory_region_destroy(&dev->msix_mmio);
  246. g_free(dev->msix_table_page);
  247. dev->msix_table_page = NULL;
  248. g_free(dev->msix_entry_used);
  249. dev->msix_entry_used = NULL;
  250. dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
  251. return 0;
  252. }
  253. void msix_save(PCIDevice *dev, QEMUFile *f)
  254. {
  255. unsigned n = dev->msix_entries_nr;
  256. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
  257. return;
  258. }
  259. qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
  260. qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
  261. }
  262. /* Should be called after restoring the config space. */
  263. void msix_load(PCIDevice *dev, QEMUFile *f)
  264. {
  265. unsigned n = dev->msix_entries_nr;
  266. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
  267. return;
  268. }
  269. msix_free_irq_entries(dev);
  270. qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
  271. qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
  272. msix_update_function_masked(dev);
  273. }
  274. /* Does device support MSI-X? */
  275. int msix_present(PCIDevice *dev)
  276. {
  277. return dev->cap_present & QEMU_PCI_CAP_MSIX;
  278. }
  279. /* Is MSI-X enabled? */
  280. int msix_enabled(PCIDevice *dev)
  281. {
  282. return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
  283. (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
  284. MSIX_ENABLE_MASK);
  285. }
  286. /* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
  287. uint32_t msix_bar_size(PCIDevice *dev)
  288. {
  289. return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
  290. dev->msix_bar_size : 0;
  291. }
  292. /* Send an MSI-X message */
  293. void msix_notify(PCIDevice *dev, unsigned vector)
  294. {
  295. uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
  296. uint64_t address;
  297. uint32_t data;
  298. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
  299. return;
  300. if (msix_is_masked(dev, vector)) {
  301. msix_set_pending(dev, vector);
  302. return;
  303. }
  304. address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
  305. data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
  306. stl_le_phys(address, data);
  307. }
  308. void msix_reset(PCIDevice *dev)
  309. {
  310. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  311. return;
  312. msix_free_irq_entries(dev);
  313. dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
  314. ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
  315. memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
  316. msix_mask_all(dev, dev->msix_entries_nr);
  317. }
  318. /* PCI spec suggests that devices make it possible for software to configure
  319. * less vectors than supported by the device, but does not specify a standard
  320. * mechanism for devices to do so.
  321. *
  322. * We support this by asking devices to declare vectors software is going to
  323. * actually use, and checking this on the notification path. Devices that
  324. * don't want to follow the spec suggestion can declare all vectors as used. */
  325. /* Mark vector as used. */
  326. int msix_vector_use(PCIDevice *dev, unsigned vector)
  327. {
  328. if (vector >= dev->msix_entries_nr)
  329. return -EINVAL;
  330. dev->msix_entry_used[vector]++;
  331. return 0;
  332. }
  333. /* Mark vector as unused. */
  334. void msix_vector_unuse(PCIDevice *dev, unsigned vector)
  335. {
  336. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
  337. return;
  338. }
  339. if (--dev->msix_entry_used[vector]) {
  340. return;
  341. }
  342. msix_clr_pending(dev, vector);
  343. }
  344. void msix_unuse_all_vectors(PCIDevice *dev)
  345. {
  346. if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
  347. return;
  348. msix_free_irq_entries(dev);
  349. }