2
0

vfio.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /*
  2. * VFIO API definition
  3. *
  4. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  5. * Author: Alex Williamson <alex.williamson@redhat.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #ifndef VFIO_H
  12. #define VFIO_H
  13. #include <linux/types.h>
  14. #include <linux/ioctl.h>
  15. #define VFIO_API_VERSION 0
  16. /* Kernel & User level defines for VFIO IOCTLs. */
  17. /* Extensions */
  18. #define VFIO_TYPE1_IOMMU 1
  19. #define VFIO_SPAPR_TCE_IOMMU 2
  20. #define VFIO_TYPE1v2_IOMMU 3
  21. /*
  22. * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This
  23. * capability is subject to change as groups are added or removed.
  24. */
  25. #define VFIO_DMA_CC_IOMMU 4
  26. /*
  27. * The IOCTL interface is designed for extensibility by embedding the
  28. * structure length (argsz) and flags into structures passed between
  29. * kernel and userspace. We therefore use the _IO() macro for these
  30. * defines to avoid implicitly embedding a size into the ioctl request.
  31. * As structure fields are added, argsz will increase to match and flag
  32. * bits will be defined to indicate additional fields with valid data.
  33. * It's *always* the caller's responsibility to indicate the size of
  34. * the structure passed by setting argsz appropriately.
  35. */
  36. #define VFIO_TYPE (';')
  37. #define VFIO_BASE 100
  38. /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
  39. /**
  40. * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
  41. *
  42. * Report the version of the VFIO API. This allows us to bump the entire
  43. * API version should we later need to add or change features in incompatible
  44. * ways.
  45. * Return: VFIO_API_VERSION
  46. * Availability: Always
  47. */
  48. #define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0)
  49. /**
  50. * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
  51. *
  52. * Check whether an extension is supported.
  53. * Return: 0 if not supported, 1 (or some other positive integer) if supported.
  54. * Availability: Always
  55. */
  56. #define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1)
  57. /**
  58. * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
  59. *
  60. * Set the iommu to the given type. The type must be supported by an
  61. * iommu driver as verified by calling CHECK_EXTENSION using the same
  62. * type. A group must be set to this file descriptor before this
  63. * ioctl is available. The IOMMU interfaces enabled by this call are
  64. * specific to the value set.
  65. * Return: 0 on success, -errno on failure
  66. * Availability: When VFIO group attached
  67. */
  68. #define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2)
  69. /* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
  70. /**
  71. * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
  72. * struct vfio_group_status)
  73. *
  74. * Retrieve information about the group. Fills in provided
  75. * struct vfio_group_info. Caller sets argsz.
  76. * Return: 0 on succes, -errno on failure.
  77. * Availability: Always
  78. */
  79. struct vfio_group_status {
  80. __u32 argsz;
  81. __u32 flags;
  82. #define VFIO_GROUP_FLAGS_VIABLE (1 << 0)
  83. #define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1)
  84. };
  85. #define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3)
  86. /**
  87. * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
  88. *
  89. * Set the container for the VFIO group to the open VFIO file
  90. * descriptor provided. Groups may only belong to a single
  91. * container. Containers may, at their discretion, support multiple
  92. * groups. Only when a container is set are all of the interfaces
  93. * of the VFIO file descriptor and the VFIO group file descriptor
  94. * available to the user.
  95. * Return: 0 on success, -errno on failure.
  96. * Availability: Always
  97. */
  98. #define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4)
  99. /**
  100. * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
  101. *
  102. * Remove the group from the attached container. This is the
  103. * opposite of the SET_CONTAINER call and returns the group to
  104. * an initial state. All device file descriptors must be released
  105. * prior to calling this interface. When removing the last group
  106. * from a container, the IOMMU will be disabled and all state lost,
  107. * effectively also returning the VFIO file descriptor to an initial
  108. * state.
  109. * Return: 0 on success, -errno on failure.
  110. * Availability: When attached to container
  111. */
  112. #define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5)
  113. /**
  114. * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
  115. *
  116. * Return a new file descriptor for the device object described by
  117. * the provided string. The string should match a device listed in
  118. * the devices subdirectory of the IOMMU group sysfs entry. The
  119. * group containing the device must already be added to this context.
  120. * Return: new file descriptor on success, -errno on failure.
  121. * Availability: When attached to container
  122. */
  123. #define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6)
  124. /* --------------- IOCTLs for DEVICE file descriptors --------------- */
  125. /**
  126. * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
  127. * struct vfio_device_info)
  128. *
  129. * Retrieve information about the device. Fills in provided
  130. * struct vfio_device_info. Caller sets argsz.
  131. * Return: 0 on success, -errno on failure.
  132. */
  133. struct vfio_device_info {
  134. __u32 argsz;
  135. __u32 flags;
  136. #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */
  137. #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
  138. __u32 num_regions; /* Max region index + 1 */
  139. __u32 num_irqs; /* Max IRQ index + 1 */
  140. };
  141. #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
  142. /**
  143. * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  144. * struct vfio_region_info)
  145. *
  146. * Retrieve information about a device region. Caller provides
  147. * struct vfio_region_info with index value set. Caller sets argsz.
  148. * Implementation of region mapping is bus driver specific. This is
  149. * intended to describe MMIO, I/O port, as well as bus specific
  150. * regions (ex. PCI config space). Zero sized regions may be used
  151. * to describe unimplemented regions (ex. unimplemented PCI BARs).
  152. * Return: 0 on success, -errno on failure.
  153. */
  154. struct vfio_region_info {
  155. __u32 argsz;
  156. __u32 flags;
  157. #define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
  158. #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
  159. #define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
  160. __u32 index; /* Region index */
  161. __u32 resv; /* Reserved for alignment */
  162. __u64 size; /* Region size (bytes) */
  163. __u64 offset; /* Region offset from start of device fd */
  164. };
  165. #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
  166. /**
  167. * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  168. * struct vfio_irq_info)
  169. *
  170. * Retrieve information about a device IRQ. Caller provides
  171. * struct vfio_irq_info with index value set. Caller sets argsz.
  172. * Implementation of IRQ mapping is bus driver specific. Indexes
  173. * using multiple IRQs are primarily intended to support MSI-like
  174. * interrupt blocks. Zero count irq blocks may be used to describe
  175. * unimplemented interrupt types.
  176. *
  177. * The EVENTFD flag indicates the interrupt index supports eventfd based
  178. * signaling.
  179. *
  180. * The MASKABLE flags indicates the index supports MASK and UNMASK
  181. * actions described below.
  182. *
  183. * AUTOMASKED indicates that after signaling, the interrupt line is
  184. * automatically masked by VFIO and the user needs to unmask the line
  185. * to receive new interrupts. This is primarily intended to distinguish
  186. * level triggered interrupts.
  187. *
  188. * The NORESIZE flag indicates that the interrupt lines within the index
  189. * are setup as a set and new subindexes cannot be enabled without first
  190. * disabling the entire index. This is used for interrupts like PCI MSI
  191. * and MSI-X where the driver may only use a subset of the available
  192. * indexes, but VFIO needs to enable a specific number of vectors
  193. * upfront. In the case of MSI-X, where the user can enable MSI-X and
  194. * then add and unmask vectors, it's up to userspace to make the decision
  195. * whether to allocate the maximum supported number of vectors or tear
  196. * down setup and incrementally increase the vectors as each is enabled.
  197. */
  198. struct vfio_irq_info {
  199. __u32 argsz;
  200. __u32 flags;
  201. #define VFIO_IRQ_INFO_EVENTFD (1 << 0)
  202. #define VFIO_IRQ_INFO_MASKABLE (1 << 1)
  203. #define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
  204. #define VFIO_IRQ_INFO_NORESIZE (1 << 3)
  205. __u32 index; /* IRQ index */
  206. __u32 count; /* Number of IRQs within this index */
  207. };
  208. #define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
  209. /**
  210. * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
  211. *
  212. * Set signaling, masking, and unmasking of interrupts. Caller provides
  213. * struct vfio_irq_set with all fields set. 'start' and 'count' indicate
  214. * the range of subindexes being specified.
  215. *
  216. * The DATA flags specify the type of data provided. If DATA_NONE, the
  217. * operation performs the specified action immediately on the specified
  218. * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]:
  219. * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
  220. *
  221. * DATA_BOOL allows sparse support for the same on arrays of interrupts.
  222. * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
  223. * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
  224. * data = {1,0,1}
  225. *
  226. * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
  227. * A value of -1 can be used to either de-assign interrupts if already
  228. * assigned or skip un-assigned interrupts. For example, to set an eventfd
  229. * to be trigger for interrupts [0,0] and [0,2]:
  230. * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
  231. * data = {fd1, -1, fd2}
  232. * If index [0,1] is previously set, two count = 1 ioctls calls would be
  233. * required to set [0,0] and [0,2] without changing [0,1].
  234. *
  235. * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
  236. * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
  237. * from userspace (ie. simulate hardware triggering).
  238. *
  239. * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
  240. * enables the interrupt index for the device. Individual subindex interrupts
  241. * can be disabled using the -1 value for DATA_EVENTFD or the index can be
  242. * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
  243. *
  244. * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
  245. * ACTION_TRIGGER specifies kernel->user signaling.
  246. */
  247. struct vfio_irq_set {
  248. __u32 argsz;
  249. __u32 flags;
  250. #define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */
  251. #define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */
  252. #define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */
  253. #define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */
  254. #define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */
  255. #define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
  256. __u32 index;
  257. __u32 start;
  258. __u32 count;
  259. __u8 data[];
  260. };
  261. #define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10)
  262. #define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
  263. VFIO_IRQ_SET_DATA_BOOL | \
  264. VFIO_IRQ_SET_DATA_EVENTFD)
  265. #define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \
  266. VFIO_IRQ_SET_ACTION_UNMASK | \
  267. VFIO_IRQ_SET_ACTION_TRIGGER)
  268. /**
  269. * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
  270. *
  271. * Reset a device.
  272. */
  273. #define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11)
  274. /*
  275. * The VFIO-PCI bus driver makes use of the following fixed region and
  276. * IRQ index mapping. Unimplemented regions return a size of zero.
  277. * Unimplemented IRQ types return a count of zero.
  278. */
  279. enum {
  280. VFIO_PCI_BAR0_REGION_INDEX,
  281. VFIO_PCI_BAR1_REGION_INDEX,
  282. VFIO_PCI_BAR2_REGION_INDEX,
  283. VFIO_PCI_BAR3_REGION_INDEX,
  284. VFIO_PCI_BAR4_REGION_INDEX,
  285. VFIO_PCI_BAR5_REGION_INDEX,
  286. VFIO_PCI_ROM_REGION_INDEX,
  287. VFIO_PCI_CONFIG_REGION_INDEX,
  288. /*
  289. * Expose VGA regions defined for PCI base class 03, subclass 00.
  290. * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
  291. * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented
  292. * range is found at it's identity mapped offset from the region
  293. * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas
  294. * between described ranges are unimplemented.
  295. */
  296. VFIO_PCI_VGA_REGION_INDEX,
  297. VFIO_PCI_NUM_REGIONS
  298. };
  299. enum {
  300. VFIO_PCI_INTX_IRQ_INDEX,
  301. VFIO_PCI_MSI_IRQ_INDEX,
  302. VFIO_PCI_MSIX_IRQ_INDEX,
  303. VFIO_PCI_ERR_IRQ_INDEX,
  304. VFIO_PCI_NUM_IRQS
  305. };
  306. /**
  307. * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
  308. * struct vfio_pci_hot_reset_info)
  309. *
  310. * Return: 0 on success, -errno on failure:
  311. * -enospc = insufficient buffer, -enodev = unsupported for device.
  312. */
  313. struct vfio_pci_dependent_device {
  314. __u32 group_id;
  315. __u16 segment;
  316. __u8 bus;
  317. __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */
  318. };
  319. struct vfio_pci_hot_reset_info {
  320. __u32 argsz;
  321. __u32 flags;
  322. __u32 count;
  323. struct vfio_pci_dependent_device devices[];
  324. };
  325. #define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  326. /**
  327. * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
  328. * struct vfio_pci_hot_reset)
  329. *
  330. * Return: 0 on success, -errno on failure.
  331. */
  332. struct vfio_pci_hot_reset {
  333. __u32 argsz;
  334. __u32 flags;
  335. __u32 count;
  336. __s32 group_fds[];
  337. };
  338. #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13)
  339. /* -------- API for Type1 VFIO IOMMU -------- */
  340. /**
  341. * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
  342. *
  343. * Retrieve information about the IOMMU object. Fills in provided
  344. * struct vfio_iommu_info. Caller sets argsz.
  345. *
  346. * XXX Should we do these by CHECK_EXTENSION too?
  347. */
  348. struct vfio_iommu_type1_info {
  349. __u32 argsz;
  350. __u32 flags;
  351. #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
  352. __u64 iova_pgsizes; /* Bitmap of supported page sizes */
  353. };
  354. #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  355. /**
  356. * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
  357. *
  358. * Map process virtual addresses to IO virtual addresses using the
  359. * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
  360. */
  361. struct vfio_iommu_type1_dma_map {
  362. __u32 argsz;
  363. __u32 flags;
  364. #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
  365. #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
  366. __u64 vaddr; /* Process virtual address */
  367. __u64 iova; /* IO virtual address */
  368. __u64 size; /* Size of mapping (bytes) */
  369. };
  370. #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
  371. /**
  372. * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
  373. * struct vfio_dma_unmap)
  374. *
  375. * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
  376. * Caller sets argsz. The actual unmapped size is returned in the size
  377. * field. No guarantee is made to the user that arbitrary unmaps of iova
  378. * or size different from those used in the original mapping call will
  379. * succeed.
  380. */
  381. struct vfio_iommu_type1_dma_unmap {
  382. __u32 argsz;
  383. __u32 flags;
  384. __u64 iova; /* IO virtual address */
  385. __u64 size; /* Size of mapping (bytes) */
  386. };
  387. #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
  388. /*
  389. * IOCTLs to enable/disable IOMMU container usage.
  390. * No parameters are supported.
  391. */
  392. #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
  393. #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
  394. /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
  395. /*
  396. * The SPAPR TCE info struct provides the information about the PCI bus
  397. * address ranges available for DMA, these values are programmed into
  398. * the hardware so the guest has to know that information.
  399. *
  400. * The DMA 32 bit window start is an absolute PCI bus address.
  401. * The IOVA address passed via map/unmap ioctls are absolute PCI bus
  402. * addresses too so the window works as a filter rather than an offset
  403. * for IOVA addresses.
  404. *
  405. * A flag will need to be added if other page sizes are supported,
  406. * so as defined here, it is always 4k.
  407. */
  408. struct vfio_iommu_spapr_tce_info {
  409. __u32 argsz;
  410. __u32 flags; /* reserved for future use */
  411. __u32 dma32_window_start; /* 32 bit window start (bytes) */
  412. __u32 dma32_window_size; /* 32 bit window size (bytes) */
  413. };
  414. #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  415. /* ***************************************************************** */
  416. #endif /* VFIO_H */