pcie_sriov.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. /*
  2. * pcie_sriov.c:
  3. *
  4. * Implementation of SR/IOV emulation support.
  5. *
  6. * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com>
  7. *
  8. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  9. * See the COPYING file in the top-level directory.
  10. *
  11. */
  12. #include "qemu/osdep.h"
  13. #include "hw/pci/pci_device.h"
  14. #include "hw/pci/pcie.h"
  15. #include "hw/pci/pci_bus.h"
  16. #include "hw/qdev-properties.h"
  17. #include "qemu/error-report.h"
  18. #include "qemu/range.h"
  19. #include "qapi/error.h"
  20. #include "trace.h"
  21. static PCIDevice *register_vf(PCIDevice *pf, int devfn,
  22. const char *name, uint16_t vf_num);
  23. static void unregister_vfs(PCIDevice *dev);
  24. void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
  25. const char *vfname, uint16_t vf_dev_id,
  26. uint16_t init_vfs, uint16_t total_vfs,
  27. uint16_t vf_offset, uint16_t vf_stride)
  28. {
  29. uint8_t *cfg = dev->config + offset;
  30. uint8_t *wmask;
  31. pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
  32. offset, PCI_EXT_CAP_SRIOV_SIZEOF);
  33. dev->exp.sriov_cap = offset;
  34. dev->exp.sriov_pf.num_vfs = 0;
  35. dev->exp.sriov_pf.vfname = g_strdup(vfname);
  36. dev->exp.sriov_pf.vf = NULL;
  37. pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
  38. pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride);
  39. /*
  40. * Mandatory page sizes to support.
  41. * Device implementations can call pcie_sriov_pf_add_sup_pgsize()
  42. * to set more bits:
  43. */
  44. pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ);
  45. /*
  46. * Default is to use 4K pages, software can modify it
  47. * to any of the supported bits
  48. */
  49. pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1);
  50. /* Set up device ID and initial/total number of VFs available */
  51. pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id);
  52. pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs);
  53. pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs);
  54. pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0);
  55. /* Write enable control bits */
  56. wmask = dev->wmask + offset;
  57. pci_set_word(wmask + PCI_SRIOV_CTRL,
  58. PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
  59. pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff);
  60. pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
  61. qdev_prop_set_bit(&dev->qdev, "multifunction", true);
  62. }
  63. void pcie_sriov_pf_exit(PCIDevice *dev)
  64. {
  65. unregister_vfs(dev);
  66. g_free((char *)dev->exp.sriov_pf.vfname);
  67. dev->exp.sriov_pf.vfname = NULL;
  68. }
  69. void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
  70. uint8_t type, dma_addr_t size)
  71. {
  72. uint32_t addr;
  73. uint64_t wmask;
  74. uint16_t sriov_cap = dev->exp.sriov_cap;
  75. assert(sriov_cap > 0);
  76. assert(region_num >= 0);
  77. assert(region_num < PCI_NUM_REGIONS);
  78. assert(region_num != PCI_ROM_SLOT);
  79. wmask = ~(size - 1);
  80. addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4;
  81. pci_set_long(dev->config + addr, type);
  82. if (!(type & PCI_BASE_ADDRESS_SPACE_IO) &&
  83. type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
  84. pci_set_quad(dev->wmask + addr, wmask);
  85. pci_set_quad(dev->cmask + addr, ~0ULL);
  86. } else {
  87. pci_set_long(dev->wmask + addr, wmask & 0xffffffff);
  88. pci_set_long(dev->cmask + addr, 0xffffffff);
  89. }
  90. dev->exp.sriov_pf.vf_bar_type[region_num] = type;
  91. }
  92. void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
  93. MemoryRegion *memory)
  94. {
  95. PCIIORegion *r;
  96. PCIBus *bus = pci_get_bus(dev);
  97. uint8_t type;
  98. pcibus_t size = memory_region_size(memory);
  99. assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
  100. assert(region_num >= 0);
  101. assert(region_num < PCI_NUM_REGIONS);
  102. type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
  103. if (!is_power_of_2(size)) {
  104. error_report("%s: PCI region size must be a power"
  105. " of two - type=0x%x, size=0x%"FMT_PCIBUS,
  106. __func__, type, size);
  107. exit(1);
  108. }
  109. r = &dev->io_regions[region_num];
  110. r->memory = memory;
  111. r->address_space =
  112. type & PCI_BASE_ADDRESS_SPACE_IO
  113. ? bus->address_space_io
  114. : bus->address_space_mem;
  115. r->size = size;
  116. r->type = type;
  117. r->addr = pci_bar_address(dev, region_num, r->type, r->size);
  118. if (r->addr != PCI_BAR_UNMAPPED) {
  119. memory_region_add_subregion_overlap(r->address_space,
  120. r->addr, r->memory, 1);
  121. }
  122. }
  123. static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
  124. uint16_t vf_num)
  125. {
  126. PCIDevice *dev = pci_new(devfn, name);
  127. dev->exp.sriov_vf.pf = pf;
  128. dev->exp.sriov_vf.vf_number = vf_num;
  129. PCIBus *bus = pci_get_bus(pf);
  130. Error *local_err = NULL;
  131. qdev_realize(&dev->qdev, &bus->qbus, &local_err);
  132. if (local_err) {
  133. error_report_err(local_err);
  134. return NULL;
  135. }
  136. /* set vid/did according to sr/iov spec - they are not used */
  137. pci_config_set_vendor_id(dev->config, 0xffff);
  138. pci_config_set_device_id(dev->config, 0xffff);
  139. return dev;
  140. }
  141. static void register_vfs(PCIDevice *dev)
  142. {
  143. uint16_t num_vfs;
  144. uint16_t i;
  145. uint16_t sriov_cap = dev->exp.sriov_cap;
  146. uint16_t vf_offset =
  147. pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
  148. uint16_t vf_stride =
  149. pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
  150. int32_t devfn = dev->devfn + vf_offset;
  151. assert(sriov_cap > 0);
  152. num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
  153. dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
  154. assert(dev->exp.sriov_pf.vf);
  155. trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
  156. PCI_FUNC(dev->devfn), num_vfs);
  157. for (i = 0; i < num_vfs; i++) {
  158. dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
  159. dev->exp.sriov_pf.vfname, i);
  160. if (!dev->exp.sriov_pf.vf[i]) {
  161. num_vfs = i;
  162. break;
  163. }
  164. devfn += vf_stride;
  165. }
  166. dev->exp.sriov_pf.num_vfs = num_vfs;
  167. }
  168. static void unregister_vfs(PCIDevice *dev)
  169. {
  170. Error *local_err = NULL;
  171. uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
  172. uint16_t i;
  173. trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
  174. PCI_FUNC(dev->devfn), num_vfs);
  175. for (i = 0; i < num_vfs; i++) {
  176. PCIDevice *vf = dev->exp.sriov_pf.vf[i];
  177. object_property_set_bool(OBJECT(vf), "realized", false, &local_err);
  178. if (local_err) {
  179. fprintf(stderr, "Failed to unplug: %s\n",
  180. error_get_pretty(local_err));
  181. error_free(local_err);
  182. }
  183. object_unparent(OBJECT(vf));
  184. }
  185. g_free(dev->exp.sriov_pf.vf);
  186. dev->exp.sriov_pf.vf = NULL;
  187. dev->exp.sriov_pf.num_vfs = 0;
  188. pci_set_word(dev->config + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0);
  189. }
  190. void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
  191. uint32_t val, int len)
  192. {
  193. uint32_t off;
  194. uint16_t sriov_cap = dev->exp.sriov_cap;
  195. if (!sriov_cap || address < sriov_cap) {
  196. return;
  197. }
  198. off = address - sriov_cap;
  199. if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) {
  200. return;
  201. }
  202. trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn),
  203. PCI_FUNC(dev->devfn), off, val, len);
  204. if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
  205. if (dev->exp.sriov_pf.num_vfs) {
  206. if (!(val & PCI_SRIOV_CTRL_VFE)) {
  207. unregister_vfs(dev);
  208. }
  209. } else {
  210. if (val & PCI_SRIOV_CTRL_VFE) {
  211. register_vfs(dev);
  212. }
  213. }
  214. }
  215. }
  216. /* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */
  217. void pcie_sriov_pf_disable_vfs(PCIDevice *dev)
  218. {
  219. uint16_t sriov_cap = dev->exp.sriov_cap;
  220. if (sriov_cap) {
  221. uint32_t val = pci_get_byte(dev->config + sriov_cap + PCI_SRIOV_CTRL);
  222. if (val & PCI_SRIOV_CTRL_VFE) {
  223. val &= ~PCI_SRIOV_CTRL_VFE;
  224. pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
  225. }
  226. }
  227. }
  228. /* Add optional supported page sizes to the mask of supported page sizes */
  229. void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
  230. {
  231. uint8_t *cfg = dev->config + dev->exp.sriov_cap;
  232. uint8_t *wmask = dev->wmask + dev->exp.sriov_cap;
  233. uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE);
  234. sup_pgsize |= opt_sup_pgsize;
  235. /*
  236. * Make sure the new bits are set, and that system page size
  237. * also can be set to any of the new values according to spec:
  238. */
  239. pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize);
  240. pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize);
  241. }
  242. uint16_t pcie_sriov_vf_number(PCIDevice *dev)
  243. {
  244. assert(pci_is_vf(dev));
  245. return dev->exp.sriov_vf.vf_number;
  246. }
  247. PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
  248. {
  249. return dev->exp.sriov_vf.pf;
  250. }
  251. PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
  252. {
  253. assert(!pci_is_vf(dev));
  254. if (n < dev->exp.sriov_pf.num_vfs) {
  255. return dev->exp.sriov_pf.vf[n];
  256. }
  257. return NULL;
  258. }