|
@@ -1117,19 +1117,82 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/* Check whether On-Demand Paging is supported by RDAM device */
|
|
|
+static bool rdma_support_odp(struct ibv_context *dev)
|
|
|
+{
|
|
|
+ struct ibv_device_attr_ex attr = {0};
|
|
|
+ int ret = ibv_query_device_ex(dev, NULL, &attr);
|
|
|
+ if (ret) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * ibv_advise_mr to avoid RNR NAK error as far as possible.
|
|
|
+ * The responder mr registering with ODP will sent RNR NAK back to
|
|
|
+ * the requester in the face of the page fault.
|
|
|
+ */
|
|
|
+static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr,
|
|
|
+ uint32_t len, uint32_t lkey,
|
|
|
+ const char *name, bool wr)
|
|
|
+{
|
|
|
+#ifdef HAVE_IBV_ADVISE_MR
|
|
|
+ int ret;
|
|
|
+ int advice = wr ? IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE :
|
|
|
+ IBV_ADVISE_MR_ADVICE_PREFETCH;
|
|
|
+ struct ibv_sge sg_list = {.lkey = lkey, .addr = addr, .length = len};
|
|
|
+
|
|
|
+ ret = ibv_advise_mr(pd, advice,
|
|
|
+ IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1);
|
|
|
+ /* ignore the error */
|
|
|
+ if (ret) {
|
|
|
+ trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno));
|
|
|
+ } else {
|
|
|
+ trace_qemu_rdma_advise_mr(name, len, addr, "successed");
|
|
|
+ }
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
|
|
|
{
|
|
|
int i;
|
|
|
RDMALocalBlocks *local = &rdma->local_ram_blocks;
|
|
|
|
|
|
for (i = 0; i < local->nb_blocks; i++) {
|
|
|
+ int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
|
|
|
+
|
|
|
local->block[i].mr =
|
|
|
ibv_reg_mr(rdma->pd,
|
|
|
local->block[i].local_host_addr,
|
|
|
- local->block[i].length,
|
|
|
- IBV_ACCESS_LOCAL_WRITE |
|
|
|
- IBV_ACCESS_REMOTE_WRITE
|
|
|
+ local->block[i].length, access
|
|
|
);
|
|
|
+
|
|
|
+ if (!local->block[i].mr &&
|
|
|
+ errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
|
|
|
+ access |= IBV_ACCESS_ON_DEMAND;
|
|
|
+ /* register ODP mr */
|
|
|
+ local->block[i].mr =
|
|
|
+ ibv_reg_mr(rdma->pd,
|
|
|
+ local->block[i].local_host_addr,
|
|
|
+ local->block[i].length, access);
|
|
|
+ trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
|
|
|
+
|
|
|
+ if (local->block[i].mr) {
|
|
|
+ qemu_rdma_advise_prefetch_mr(rdma->pd,
|
|
|
+ (uintptr_t)local->block[i].local_host_addr,
|
|
|
+ local->block[i].length,
|
|
|
+ local->block[i].mr->lkey,
|
|
|
+ local->block[i].block_name,
|
|
|
+ true);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
if (!local->block[i].mr) {
|
|
|
perror("Failed to register local dest ram block!");
|
|
|
break;
|
|
@@ -1215,28 +1278,40 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
|
|
|
*/
|
|
|
if (!block->pmr[chunk]) {
|
|
|
uint64_t len = chunk_end - chunk_start;
|
|
|
+ int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
|
|
|
+ 0;
|
|
|
|
|
|
trace_qemu_rdma_register_and_get_keys(len, chunk_start);
|
|
|
|
|
|
- block->pmr[chunk] = ibv_reg_mr(rdma->pd,
|
|
|
- chunk_start, len,
|
|
|
- (rkey ? (IBV_ACCESS_LOCAL_WRITE |
|
|
|
- IBV_ACCESS_REMOTE_WRITE) : 0));
|
|
|
-
|
|
|
- if (!block->pmr[chunk]) {
|
|
|
- perror("Failed to register chunk!");
|
|
|
- fprintf(stderr, "Chunk details: block: %d chunk index %d"
|
|
|
- " start %" PRIuPTR " end %" PRIuPTR
|
|
|
- " host %" PRIuPTR
|
|
|
- " local %" PRIuPTR " registrations: %d\n",
|
|
|
- block->index, chunk, (uintptr_t)chunk_start,
|
|
|
- (uintptr_t)chunk_end, host_addr,
|
|
|
- (uintptr_t)block->local_host_addr,
|
|
|
- rdma->total_registrations);
|
|
|
- return -1;
|
|
|
+ block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
|
|
|
+ if (!block->pmr[chunk] &&
|
|
|
+ errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
|
|
|
+ access |= IBV_ACCESS_ON_DEMAND;
|
|
|
+ /* register ODP mr */
|
|
|
+ block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
|
|
|
+ trace_qemu_rdma_register_odp_mr(block->block_name);
|
|
|
+
|
|
|
+ if (block->pmr[chunk]) {
|
|
|
+ qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start,
|
|
|
+ len, block->pmr[chunk]->lkey,
|
|
|
+ block->block_name, rkey);
|
|
|
+
|
|
|
+ }
|
|
|
}
|
|
|
- rdma->total_registrations++;
|
|
|
}
|
|
|
+ if (!block->pmr[chunk]) {
|
|
|
+ perror("Failed to register chunk!");
|
|
|
+ fprintf(stderr, "Chunk details: block: %d chunk index %d"
|
|
|
+ " start %" PRIuPTR " end %" PRIuPTR
|
|
|
+ " host %" PRIuPTR
|
|
|
+ " local %" PRIuPTR " registrations: %d\n",
|
|
|
+ block->index, chunk, (uintptr_t)chunk_start,
|
|
|
+ (uintptr_t)chunk_end, host_addr,
|
|
|
+ (uintptr_t)block->local_host_addr,
|
|
|
+ rdma->total_registrations);
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ rdma->total_registrations++;
|
|
|
|
|
|
if (lkey) {
|
|
|
*lkey = block->pmr[chunk]->lkey;
|