msix.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. /*
  2. * MSI-X device support
  3. *
  4. * This module includes support for MSI-X in pci devices.
  5. *
  6. * Author: Michael S. Tsirkin <mst@redhat.com>
  7. *
  8. * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
  9. *
  10. * This work is licensed under the terms of the GNU GPL, version 2. See
  11. * the COPYING file in the top-level directory.
  12. *
  13. * Contributions after 2012-01-13 are licensed under the terms of the
  14. * GNU GPL, version 2 or (at your option) any later version.
  15. */
  16. #include "hw.h"
  17. #include "msi.h"
  18. #include "msix.h"
  19. #include "pci.h"
  20. #include "range.h"
  21. #define MSIX_CAP_LENGTH 12
  22. /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
  23. #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
  24. #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
  25. #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
  26. static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
  27. {
  28. uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
  29. MSIMessage msg;
  30. msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
  31. msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA);
  32. return msg;
  33. }
  34. /*
  35. * Special API for POWER to configure the vectors through
  36. * a side channel. Should never be used by devices.
  37. */
  38. void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
  39. {
  40. uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
  41. pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
  42. pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
  43. table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
  44. }
  45. static uint8_t msix_pending_mask(int vector)
  46. {
  47. return 1 << (vector % 8);
  48. }
  49. static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
  50. {
  51. return dev->msix_pba + vector / 8;
  52. }
  53. static int msix_is_pending(PCIDevice *dev, int vector)
  54. {
  55. return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
  56. }
  57. static void msix_set_pending(PCIDevice *dev, int vector)
  58. {
  59. *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
  60. }
  61. static void msix_clr_pending(PCIDevice *dev, int vector)
  62. {
  63. *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
  64. }
  65. static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
  66. {
  67. unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  68. return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
  69. }
  70. static bool msix_is_masked(PCIDevice *dev, int vector)
  71. {
  72. return msix_vector_masked(dev, vector, dev->msix_function_masked);
  73. }
  74. static void msix_fire_vector_notifier(PCIDevice *dev,
  75. unsigned int vector, bool is_masked)
  76. {
  77. MSIMessage msg;
  78. int ret;
  79. if (!dev->msix_vector_use_notifier) {
  80. return;
  81. }
  82. if (is_masked) {
  83. dev->msix_vector_release_notifier(dev, vector);
  84. } else {
  85. msg = msix_get_message(dev, vector);
  86. ret = dev->msix_vector_use_notifier(dev, vector, msg);
  87. assert(ret >= 0);
  88. }
  89. }
  90. static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
  91. {
  92. bool is_masked = msix_is_masked(dev, vector);
  93. if (is_masked == was_masked) {
  94. return;
  95. }
  96. msix_fire_vector_notifier(dev, vector, is_masked);
  97. if (!is_masked && msix_is_pending(dev, vector)) {
  98. msix_clr_pending(dev, vector);
  99. msix_notify(dev, vector);
  100. }
  101. }
  102. static void msix_update_function_masked(PCIDevice *dev)
  103. {
  104. dev->msix_function_masked = !msix_enabled(dev) ||
  105. (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK);
  106. }
  107. /* Handle MSI-X capability config write. */
  108. void msix_write_config(PCIDevice *dev, uint32_t addr,
  109. uint32_t val, int len)
  110. {
  111. unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET;
  112. int vector;
  113. bool was_masked;
  114. if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) {
  115. return;
  116. }
  117. was_masked = dev->msix_function_masked;
  118. msix_update_function_masked(dev);
  119. if (!msix_enabled(dev)) {
  120. return;
  121. }
  122. pci_device_deassert_intx(dev);
  123. if (dev->msix_function_masked == was_masked) {
  124. return;
  125. }
  126. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  127. msix_handle_mask_update(dev, vector,
  128. msix_vector_masked(dev, vector, was_masked));
  129. }
  130. }
  131. static uint64_t msix_table_mmio_read(void *opaque, target_phys_addr_t addr,
  132. unsigned size)
  133. {
  134. PCIDevice *dev = opaque;
  135. return pci_get_long(dev->msix_table + addr);
  136. }
  137. static void msix_table_mmio_write(void *opaque, target_phys_addr_t addr,
  138. uint64_t val, unsigned size)
  139. {
  140. PCIDevice *dev = opaque;
  141. int vector = addr / PCI_MSIX_ENTRY_SIZE;
  142. bool was_masked;
  143. was_masked = msix_is_masked(dev, vector);
  144. pci_set_long(dev->msix_table + addr, val);
  145. msix_handle_mask_update(dev, vector, was_masked);
  146. }
  147. static const MemoryRegionOps msix_table_mmio_ops = {
  148. .read = msix_table_mmio_read,
  149. .write = msix_table_mmio_write,
  150. /* TODO: MSIX should be LITTLE_ENDIAN. */
  151. .endianness = DEVICE_NATIVE_ENDIAN,
  152. .valid = {
  153. .min_access_size = 4,
  154. .max_access_size = 4,
  155. },
  156. };
  157. static uint64_t msix_pba_mmio_read(void *opaque, target_phys_addr_t addr,
  158. unsigned size)
  159. {
  160. PCIDevice *dev = opaque;
  161. return pci_get_long(dev->msix_pba + addr);
  162. }
  163. static const MemoryRegionOps msix_pba_mmio_ops = {
  164. .read = msix_pba_mmio_read,
  165. /* TODO: MSIX should be LITTLE_ENDIAN. */
  166. .endianness = DEVICE_NATIVE_ENDIAN,
  167. .valid = {
  168. .min_access_size = 4,
  169. .max_access_size = 4,
  170. },
  171. };
  172. static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
  173. {
  174. int vector;
  175. for (vector = 0; vector < nentries; ++vector) {
  176. unsigned offset =
  177. vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
  178. bool was_masked = msix_is_masked(dev, vector);
  179. dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
  180. msix_handle_mask_update(dev, vector, was_masked);
  181. }
  182. }
  183. /* Initialize the MSI-X structures */
  184. int msix_init(struct PCIDevice *dev, unsigned short nentries,
  185. MemoryRegion *table_bar, uint8_t table_bar_nr,
  186. unsigned table_offset, MemoryRegion *pba_bar,
  187. uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
  188. {
  189. int cap;
  190. unsigned table_size, pba_size;
  191. uint8_t *config;
  192. /* Nothing to do if MSI is not supported by interrupt controller */
  193. if (!msi_supported) {
  194. return -ENOTSUP;
  195. }
  196. if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
  197. return -EINVAL;
  198. }
  199. table_size = nentries * PCI_MSIX_ENTRY_SIZE;
  200. pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
  201. /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */
  202. if ((table_bar_nr == pba_bar_nr &&
  203. ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
  204. table_offset + table_size > memory_region_size(table_bar) ||
  205. pba_offset + pba_size > memory_region_size(pba_bar) ||
  206. (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) {
  207. return -EINVAL;
  208. }
  209. cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH);
  210. if (cap < 0) {
  211. return cap;
  212. }
  213. dev->msix_cap = cap;
  214. dev->cap_present |= QEMU_PCI_CAP_MSIX;
  215. config = dev->config + cap;
  216. pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
  217. dev->msix_entries_nr = nentries;
  218. dev->msix_function_masked = true;
  219. pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr);
  220. pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr);
  221. /* Make flags bit writable. */
  222. dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
  223. MSIX_MASKALL_MASK;
  224. dev->msix_table = g_malloc0(table_size);
  225. dev->msix_pba = g_malloc0(pba_size);
  226. dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
  227. msix_mask_all(dev, nentries);
  228. memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
  229. "msix-table", table_size);
  230. memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio);
  231. memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
  232. "msix-pba", pba_size);
  233. memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
  234. return 0;
  235. }
  236. int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries,
  237. uint8_t bar_nr)
  238. {
  239. int ret;
  240. char *name;
  241. /*
  242. * Migration compatibility dictates that this remains a 4k
  243. * BAR with the vector table in the lower half and PBA in
  244. * the upper half. Do not use these elsewhere!
  245. */
  246. #define MSIX_EXCLUSIVE_BAR_SIZE 4096
  247. #define MSIX_EXCLUSIVE_BAR_TABLE_OFFSET 0
  248. #define MSIX_EXCLUSIVE_BAR_PBA_OFFSET (MSIX_EXCLUSIVE_BAR_SIZE / 2)
  249. #define MSIX_EXCLUSIVE_CAP_OFFSET 0
  250. if (nentries * PCI_MSIX_ENTRY_SIZE > MSIX_EXCLUSIVE_BAR_PBA_OFFSET) {
  251. return -EINVAL;
  252. }
  253. name = g_strdup_printf("%s-msix", dev->name);
  254. memory_region_init(&dev->msix_exclusive_bar, name, MSIX_EXCLUSIVE_BAR_SIZE);
  255. g_free(name);
  256. ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr,
  257. MSIX_EXCLUSIVE_BAR_TABLE_OFFSET, &dev->msix_exclusive_bar,
  258. bar_nr, MSIX_EXCLUSIVE_BAR_PBA_OFFSET,
  259. MSIX_EXCLUSIVE_CAP_OFFSET);
  260. if (ret) {
  261. memory_region_destroy(&dev->msix_exclusive_bar);
  262. return ret;
  263. }
  264. pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY,
  265. &dev->msix_exclusive_bar);
  266. return 0;
  267. }
  268. static void msix_free_irq_entries(PCIDevice *dev)
  269. {
  270. int vector;
  271. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  272. dev->msix_entry_used[vector] = 0;
  273. msix_clr_pending(dev, vector);
  274. }
  275. }
  276. static void msix_clear_all_vectors(PCIDevice *dev)
  277. {
  278. int vector;
  279. for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
  280. msix_clr_pending(dev, vector);
  281. }
  282. }
  283. /* Clean up resources for the device. */
  284. void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
  285. {
  286. if (!msix_present(dev)) {
  287. return;
  288. }
  289. pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
  290. dev->msix_cap = 0;
  291. msix_free_irq_entries(dev);
  292. dev->msix_entries_nr = 0;
  293. memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
  294. memory_region_destroy(&dev->msix_pba_mmio);
  295. g_free(dev->msix_pba);
  296. dev->msix_pba = NULL;
  297. memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
  298. memory_region_destroy(&dev->msix_table_mmio);
  299. g_free(dev->msix_table);
  300. dev->msix_table = NULL;
  301. g_free(dev->msix_entry_used);
  302. dev->msix_entry_used = NULL;
  303. dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
  304. return;
  305. }
  306. void msix_uninit_exclusive_bar(PCIDevice *dev)
  307. {
  308. if (msix_present(dev)) {
  309. msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar);
  310. memory_region_destroy(&dev->msix_exclusive_bar);
  311. }
  312. }
  313. void msix_save(PCIDevice *dev, QEMUFile *f)
  314. {
  315. unsigned n = dev->msix_entries_nr;
  316. if (!msix_present(dev)) {
  317. return;
  318. }
  319. qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
  320. qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8);
  321. }
  322. /* Should be called after restoring the config space. */
  323. void msix_load(PCIDevice *dev, QEMUFile *f)
  324. {
  325. unsigned n = dev->msix_entries_nr;
  326. unsigned int vector;
  327. if (!msix_present(dev)) {
  328. return;
  329. }
  330. msix_clear_all_vectors(dev);
  331. qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
  332. qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
  333. msix_update_function_masked(dev);
  334. for (vector = 0; vector < n; vector++) {
  335. msix_handle_mask_update(dev, vector, true);
  336. }
  337. }
  338. /* Does device support MSI-X? */
  339. int msix_present(PCIDevice *dev)
  340. {
  341. return dev->cap_present & QEMU_PCI_CAP_MSIX;
  342. }
  343. /* Is MSI-X enabled? */
  344. int msix_enabled(PCIDevice *dev)
  345. {
  346. return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
  347. (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
  348. MSIX_ENABLE_MASK);
  349. }
  350. /* Send an MSI-X message */
  351. void msix_notify(PCIDevice *dev, unsigned vector)
  352. {
  353. MSIMessage msg;
  354. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
  355. return;
  356. if (msix_is_masked(dev, vector)) {
  357. msix_set_pending(dev, vector);
  358. return;
  359. }
  360. msg = msix_get_message(dev, vector);
  361. stl_le_phys(msg.address, msg.data);
  362. }
  363. void msix_reset(PCIDevice *dev)
  364. {
  365. if (!msix_present(dev)) {
  366. return;
  367. }
  368. msix_clear_all_vectors(dev);
  369. dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
  370. ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
  371. memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
  372. memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
  373. msix_mask_all(dev, dev->msix_entries_nr);
  374. }
  375. /* PCI spec suggests that devices make it possible for software to configure
  376. * less vectors than supported by the device, but does not specify a standard
  377. * mechanism for devices to do so.
  378. *
  379. * We support this by asking devices to declare vectors software is going to
  380. * actually use, and checking this on the notification path. Devices that
  381. * don't want to follow the spec suggestion can declare all vectors as used. */
  382. /* Mark vector as used. */
  383. int msix_vector_use(PCIDevice *dev, unsigned vector)
  384. {
  385. if (vector >= dev->msix_entries_nr)
  386. return -EINVAL;
  387. dev->msix_entry_used[vector]++;
  388. return 0;
  389. }
  390. /* Mark vector as unused. */
  391. void msix_vector_unuse(PCIDevice *dev, unsigned vector)
  392. {
  393. if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
  394. return;
  395. }
  396. if (--dev->msix_entry_used[vector]) {
  397. return;
  398. }
  399. msix_clr_pending(dev, vector);
  400. }
  401. void msix_unuse_all_vectors(PCIDevice *dev)
  402. {
  403. if (!msix_present(dev)) {
  404. return;
  405. }
  406. msix_free_irq_entries(dev);
  407. }
  408. unsigned int msix_nr_vectors_allocated(const PCIDevice *dev)
  409. {
  410. return dev->msix_entries_nr;
  411. }
  412. static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector)
  413. {
  414. MSIMessage msg;
  415. if (msix_is_masked(dev, vector)) {
  416. return 0;
  417. }
  418. msg = msix_get_message(dev, vector);
  419. return dev->msix_vector_use_notifier(dev, vector, msg);
  420. }
  421. static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector)
  422. {
  423. if (msix_is_masked(dev, vector)) {
  424. return;
  425. }
  426. dev->msix_vector_release_notifier(dev, vector);
  427. }
  428. int msix_set_vector_notifiers(PCIDevice *dev,
  429. MSIVectorUseNotifier use_notifier,
  430. MSIVectorReleaseNotifier release_notifier)
  431. {
  432. int vector, ret;
  433. assert(use_notifier && release_notifier);
  434. dev->msix_vector_use_notifier = use_notifier;
  435. dev->msix_vector_release_notifier = release_notifier;
  436. if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
  437. (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
  438. for (vector = 0; vector < dev->msix_entries_nr; vector++) {
  439. ret = msix_set_notifier_for_vector(dev, vector);
  440. if (ret < 0) {
  441. goto undo;
  442. }
  443. }
  444. }
  445. return 0;
  446. undo:
  447. while (--vector >= 0) {
  448. msix_unset_notifier_for_vector(dev, vector);
  449. }
  450. dev->msix_vector_use_notifier = NULL;
  451. dev->msix_vector_release_notifier = NULL;
  452. return ret;
  453. }
  454. void msix_unset_vector_notifiers(PCIDevice *dev)
  455. {
  456. int vector;
  457. assert(dev->msix_vector_use_notifier &&
  458. dev->msix_vector_release_notifier);
  459. if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &
  460. (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) {
  461. for (vector = 0; vector < dev->msix_entries_nr; vector++) {
  462. msix_unset_notifier_for_vector(dev, vector);
  463. }
  464. }
  465. dev->msix_vector_use_notifier = NULL;
  466. dev->msix_vector_release_notifier = NULL;
  467. }