2
0

dma-helpers.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /*
  2. * DMA helper functions
  3. *
  4. * Copyright (c) 2009,2020 Red Hat
  5. *
  6. * This work is licensed under the terms of the GNU General Public License
  7. * (GNU GPL), version 2 or later.
  8. */
  9. #include "qemu/osdep.h"
  10. #include "system/block-backend.h"
  11. #include "system/dma.h"
  12. #include "trace.h"
  13. #include "qemu/thread.h"
  14. #include "qemu/main-loop.h"
  15. #include "system/cpu-timers.h"
  16. #include "qemu/range.h"
  17. /* #define DEBUG_IOMMU */
  18. MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
  19. uint8_t c, dma_addr_t len, MemTxAttrs attrs)
  20. {
  21. dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
  22. return address_space_set(as, addr, c, len, attrs);
  23. }
  24. void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
  25. AddressSpace *as)
  26. {
  27. qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
  28. qsg->nsg = 0;
  29. qsg->nalloc = alloc_hint;
  30. qsg->size = 0;
  31. qsg->as = as;
  32. qsg->dev = dev;
  33. object_ref(OBJECT(dev));
  34. }
  35. void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
  36. {
  37. if (qsg->nsg == qsg->nalloc) {
  38. qsg->nalloc = 2 * qsg->nalloc + 1;
  39. qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
  40. }
  41. qsg->sg[qsg->nsg].base = base;
  42. qsg->sg[qsg->nsg].len = len;
  43. qsg->size += len;
  44. ++qsg->nsg;
  45. }
  46. void qemu_sglist_destroy(QEMUSGList *qsg)
  47. {
  48. object_unref(OBJECT(qsg->dev));
  49. g_free(qsg->sg);
  50. memset(qsg, 0, sizeof(*qsg));
  51. }
  52. typedef struct {
  53. BlockAIOCB common;
  54. AioContext *ctx;
  55. BlockAIOCB *acb;
  56. QEMUSGList *sg;
  57. uint32_t align;
  58. uint64_t offset;
  59. DMADirection dir;
  60. int sg_cur_index;
  61. dma_addr_t sg_cur_byte;
  62. QEMUIOVector iov;
  63. QEMUBH *bh;
  64. DMAIOFunc *io_func;
  65. void *io_func_opaque;
  66. } DMAAIOCB;
  67. static void dma_blk_cb(void *opaque, int ret);
  68. static void reschedule_dma(void *opaque)
  69. {
  70. DMAAIOCB *dbs = (DMAAIOCB *)opaque;
  71. assert(!dbs->acb && dbs->bh);
  72. qemu_bh_delete(dbs->bh);
  73. dbs->bh = NULL;
  74. dma_blk_cb(dbs, 0);
  75. }
  76. static void dma_blk_unmap(DMAAIOCB *dbs)
  77. {
  78. int i;
  79. for (i = 0; i < dbs->iov.niov; ++i) {
  80. dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
  81. dbs->iov.iov[i].iov_len, dbs->dir,
  82. dbs->iov.iov[i].iov_len);
  83. }
  84. qemu_iovec_reset(&dbs->iov);
  85. }
  86. static void dma_complete(DMAAIOCB *dbs, int ret)
  87. {
  88. trace_dma_complete(dbs, ret, dbs->common.cb);
  89. assert(!dbs->acb && !dbs->bh);
  90. dma_blk_unmap(dbs);
  91. if (dbs->common.cb) {
  92. dbs->common.cb(dbs->common.opaque, ret);
  93. }
  94. qemu_iovec_destroy(&dbs->iov);
  95. qemu_aio_unref(dbs);
  96. }
  97. static void dma_blk_cb(void *opaque, int ret)
  98. {
  99. DMAAIOCB *dbs = (DMAAIOCB *)opaque;
  100. AioContext *ctx = dbs->ctx;
  101. dma_addr_t cur_addr, cur_len;
  102. void *mem;
  103. trace_dma_blk_cb(dbs, ret);
  104. /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */
  105. assert(ctx == qemu_get_current_aio_context());
  106. dbs->acb = NULL;
  107. dbs->offset += dbs->iov.size;
  108. if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
  109. dma_complete(dbs, ret);
  110. return;
  111. }
  112. dma_blk_unmap(dbs);
  113. while (dbs->sg_cur_index < dbs->sg->nsg) {
  114. cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
  115. cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
  116. mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
  117. MEMTXATTRS_UNSPECIFIED);
  118. /*
  119. * Make reads deterministic in icount mode. Windows sometimes issues
  120. * disk read requests with overlapping SGs. It leads
  121. * to non-determinism, because resulting buffer contents may be mixed
  122. * from several sectors. This code splits all SGs into several
  123. * groups. SGs in every group do not overlap.
  124. */
  125. if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
  126. int i;
  127. for (i = 0 ; i < dbs->iov.niov ; ++i) {
  128. if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
  129. dbs->iov.iov[i].iov_len, (intptr_t)mem,
  130. cur_len)) {
  131. dma_memory_unmap(dbs->sg->as, mem, cur_len,
  132. dbs->dir, cur_len);
  133. mem = NULL;
  134. break;
  135. }
  136. }
  137. }
  138. if (!mem)
  139. break;
  140. qemu_iovec_add(&dbs->iov, mem, cur_len);
  141. dbs->sg_cur_byte += cur_len;
  142. if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
  143. dbs->sg_cur_byte = 0;
  144. ++dbs->sg_cur_index;
  145. }
  146. }
  147. if (dbs->iov.size == 0) {
  148. trace_dma_map_wait(dbs);
  149. dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
  150. address_space_register_map_client(dbs->sg->as, dbs->bh);
  151. return;
  152. }
  153. if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
  154. qemu_iovec_discard_back(&dbs->iov,
  155. QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
  156. }
  157. dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
  158. dma_blk_cb, dbs, dbs->io_func_opaque);
  159. assert(dbs->acb);
  160. }
  161. static void dma_aio_cancel(BlockAIOCB *acb)
  162. {
  163. DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
  164. trace_dma_aio_cancel(dbs);
  165. assert(!(dbs->acb && dbs->bh));
  166. if (dbs->acb) {
  167. /* This will invoke dma_blk_cb. */
  168. blk_aio_cancel_async(dbs->acb);
  169. return;
  170. }
  171. if (dbs->bh) {
  172. address_space_unregister_map_client(dbs->sg->as, dbs->bh);
  173. qemu_bh_delete(dbs->bh);
  174. dbs->bh = NULL;
  175. }
  176. if (dbs->common.cb) {
  177. dbs->common.cb(dbs->common.opaque, -ECANCELED);
  178. }
  179. }
  180. static const AIOCBInfo dma_aiocb_info = {
  181. .aiocb_size = sizeof(DMAAIOCB),
  182. .cancel_async = dma_aio_cancel,
  183. };
  184. BlockAIOCB *dma_blk_io(
  185. QEMUSGList *sg, uint64_t offset, uint32_t align,
  186. DMAIOFunc *io_func, void *io_func_opaque,
  187. BlockCompletionFunc *cb,
  188. void *opaque, DMADirection dir)
  189. {
  190. DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
  191. trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
  192. dbs->acb = NULL;
  193. dbs->sg = sg;
  194. dbs->ctx = qemu_get_current_aio_context();
  195. dbs->offset = offset;
  196. dbs->align = align;
  197. dbs->sg_cur_index = 0;
  198. dbs->sg_cur_byte = 0;
  199. dbs->dir = dir;
  200. dbs->io_func = io_func;
  201. dbs->io_func_opaque = io_func_opaque;
  202. dbs->bh = NULL;
  203. qemu_iovec_init(&dbs->iov, sg->nsg);
  204. dma_blk_cb(dbs, 0);
  205. return &dbs->common;
  206. }
  207. static
  208. BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
  209. BlockCompletionFunc *cb, void *cb_opaque,
  210. void *opaque)
  211. {
  212. BlockBackend *blk = opaque;
  213. return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
  214. }
  215. BlockAIOCB *dma_blk_read(BlockBackend *blk,
  216. QEMUSGList *sg, uint64_t offset, uint32_t align,
  217. void (*cb)(void *opaque, int ret), void *opaque)
  218. {
  219. return dma_blk_io(sg, offset, align,
  220. dma_blk_read_io_func, blk, cb, opaque,
  221. DMA_DIRECTION_FROM_DEVICE);
  222. }
  223. static
  224. BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
  225. BlockCompletionFunc *cb, void *cb_opaque,
  226. void *opaque)
  227. {
  228. BlockBackend *blk = opaque;
  229. return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
  230. }
  231. BlockAIOCB *dma_blk_write(BlockBackend *blk,
  232. QEMUSGList *sg, uint64_t offset, uint32_t align,
  233. void (*cb)(void *opaque, int ret), void *opaque)
  234. {
  235. return dma_blk_io(sg, offset, align,
  236. dma_blk_write_io_func, blk, cb, opaque,
  237. DMA_DIRECTION_TO_DEVICE);
  238. }
  239. static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
  240. QEMUSGList *sg, DMADirection dir,
  241. MemTxAttrs attrs)
  242. {
  243. uint8_t *ptr = buf;
  244. dma_addr_t xresidual;
  245. int sg_cur_index;
  246. MemTxResult res = MEMTX_OK;
  247. xresidual = sg->size;
  248. sg_cur_index = 0;
  249. len = MIN(len, xresidual);
  250. while (len > 0) {
  251. ScatterGatherEntry entry = sg->sg[sg_cur_index++];
  252. dma_addr_t xfer = MIN(len, entry.len);
  253. res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
  254. ptr += xfer;
  255. len -= xfer;
  256. xresidual -= xfer;
  257. }
  258. if (residual) {
  259. *residual = xresidual;
  260. }
  261. return res;
  262. }
  263. MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
  264. QEMUSGList *sg, MemTxAttrs attrs)
  265. {
  266. return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
  267. }
  268. MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
  269. QEMUSGList *sg, MemTxAttrs attrs)
  270. {
  271. return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
  272. }
  273. void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
  274. QEMUSGList *sg, enum BlockAcctType type)
  275. {
  276. block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
  277. }
  278. uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
  279. {
  280. uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
  281. uint64_t alignment_mask, size_mask;
  282. if (max_addr_bits != 64) {
  283. max_mask = (1ULL << max_addr_bits) - 1;
  284. }
  285. alignment_mask = start ? (start & -start) - 1 : max_mask;
  286. alignment_mask = MIN(alignment_mask, max_mask);
  287. size_mask = MIN(addr_mask, max_mask);
  288. if (alignment_mask <= size_mask) {
  289. /* Increase the alignment of start */
  290. return alignment_mask;
  291. } else {
  292. /* Find the largest page mask from size */
  293. if (addr_mask == UINT64_MAX) {
  294. return UINT64_MAX;
  295. }
  296. return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
  297. }
  298. }