rdma_rm.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. /*
  2. * QEMU paravirtual RDMA - Resource Manager Implementation
  3. *
  4. * Copyright (C) 2018 Oracle
  5. * Copyright (C) 2018 Red Hat Inc
  6. *
  7. * Authors:
  8. * Yuval Shaia <yuval.shaia@oracle.com>
  9. * Marcel Apfelbaum <marcel@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12. * See the COPYING file in the top-level directory.
  13. *
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qapi/error.h"
  17. #include "cpu.h"
  18. #include "monitor/monitor.h"
  19. #include "trace.h"
  20. #include "rdma_utils.h"
  21. #include "rdma_backend.h"
  22. #include "rdma_rm.h"
  23. /* Page directory and page tables */
  24. #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  25. #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  26. void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf)
  27. {
  28. g_string_append_printf(buf, "\ttx : %" PRId64 "\n",
  29. dev_res->stats.tx);
  30. g_string_append_printf(buf, "\ttx_len : %" PRId64 "\n",
  31. dev_res->stats.tx_len);
  32. g_string_append_printf(buf, "\ttx_err : %" PRId64 "\n",
  33. dev_res->stats.tx_err);
  34. g_string_append_printf(buf, "\trx_bufs : %" PRId64 "\n",
  35. dev_res->stats.rx_bufs);
  36. g_string_append_printf(buf, "\trx_srq : %" PRId64 "\n",
  37. dev_res->stats.rx_srq);
  38. g_string_append_printf(buf, "\trx_bufs_len : %" PRId64 "\n",
  39. dev_res->stats.rx_bufs_len);
  40. g_string_append_printf(buf, "\trx_bufs_err : %" PRId64 "\n",
  41. dev_res->stats.rx_bufs_err);
  42. g_string_append_printf(buf, "\tcomps : %" PRId64 "\n",
  43. dev_res->stats.completions);
  44. g_string_append_printf(buf, "\tmissing_comps : %" PRId32 "\n",
  45. dev_res->stats.missing_cqe);
  46. g_string_append_printf(buf, "\tpoll_cq (bk) : %" PRId64 "\n",
  47. dev_res->stats.poll_cq_from_bk);
  48. g_string_append_printf(buf, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
  49. dev_res->stats.poll_cq_ppoll_to);
  50. g_string_append_printf(buf, "\tpoll_cq (fe) : %" PRId64 "\n",
  51. dev_res->stats.poll_cq_from_guest);
  52. g_string_append_printf(buf, "\tpoll_cq_empty : %" PRId64 "\n",
  53. dev_res->stats.poll_cq_from_guest_empty);
  54. g_string_append_printf(buf, "\tmad_tx : %" PRId64 "\n",
  55. dev_res->stats.mad_tx);
  56. g_string_append_printf(buf, "\tmad_tx_err : %" PRId64 "\n",
  57. dev_res->stats.mad_tx_err);
  58. g_string_append_printf(buf, "\tmad_rx : %" PRId64 "\n",
  59. dev_res->stats.mad_rx);
  60. g_string_append_printf(buf, "\tmad_rx_err : %" PRId64 "\n",
  61. dev_res->stats.mad_rx_err);
  62. g_string_append_printf(buf, "\tmad_rx_bufs : %" PRId64 "\n",
  63. dev_res->stats.mad_rx_bufs);
  64. g_string_append_printf(buf, "\tmad_rx_bufs_err : %" PRId64 "\n",
  65. dev_res->stats.mad_rx_bufs_err);
  66. g_string_append_printf(buf, "\tPDs : %" PRId32 "\n",
  67. dev_res->pd_tbl.used);
  68. g_string_append_printf(buf, "\tMRs : %" PRId32 "\n",
  69. dev_res->mr_tbl.used);
  70. g_string_append_printf(buf, "\tUCs : %" PRId32 "\n",
  71. dev_res->uc_tbl.used);
  72. g_string_append_printf(buf, "\tQPs : %" PRId32 "\n",
  73. dev_res->qp_tbl.used);
  74. g_string_append_printf(buf, "\tCQs : %" PRId32 "\n",
  75. dev_res->cq_tbl.used);
  76. g_string_append_printf(buf, "\tCEQ_CTXs : %" PRId32 "\n",
  77. dev_res->cqe_ctx_tbl.used);
  78. }
  79. static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
  80. uint32_t tbl_sz, uint32_t res_sz)
  81. {
  82. tbl->tbl = g_malloc(tbl_sz * res_sz);
  83. strncpy(tbl->name, name, MAX_RM_TBL_NAME);
  84. tbl->name[MAX_RM_TBL_NAME - 1] = 0;
  85. tbl->bitmap = bitmap_new(tbl_sz);
  86. tbl->tbl_sz = tbl_sz;
  87. tbl->res_sz = res_sz;
  88. tbl->used = 0;
  89. qemu_mutex_init(&tbl->lock);
  90. }
  91. static inline void res_tbl_free(RdmaRmResTbl *tbl)
  92. {
  93. if (!tbl->bitmap) {
  94. return;
  95. }
  96. qemu_mutex_destroy(&tbl->lock);
  97. g_free(tbl->tbl);
  98. g_free(tbl->bitmap);
  99. }
  100. static inline void *rdma_res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
  101. {
  102. trace_rdma_res_tbl_get(tbl->name, handle);
  103. if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
  104. return tbl->tbl + handle * tbl->res_sz;
  105. } else {
  106. rdma_error_report("Table %s, invalid handle %d", tbl->name, handle);
  107. return NULL;
  108. }
  109. }
  110. static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
  111. {
  112. qemu_mutex_lock(&tbl->lock);
  113. *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
  114. if (*handle > tbl->tbl_sz) {
  115. rdma_error_report("Table %s, failed to allocate, bitmap is full",
  116. tbl->name);
  117. qemu_mutex_unlock(&tbl->lock);
  118. return NULL;
  119. }
  120. set_bit(*handle, tbl->bitmap);
  121. tbl->used++;
  122. qemu_mutex_unlock(&tbl->lock);
  123. memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
  124. trace_rdma_res_tbl_alloc(tbl->name, *handle);
  125. return tbl->tbl + *handle * tbl->res_sz;
  126. }
  127. static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
  128. {
  129. trace_rdma_res_tbl_dealloc(tbl->name, handle);
  130. QEMU_LOCK_GUARD(&tbl->lock);
  131. if (handle < tbl->tbl_sz) {
  132. clear_bit(handle, tbl->bitmap);
  133. tbl->used--;
  134. }
  135. }
  136. int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  137. uint32_t *pd_handle, uint32_t ctx_handle)
  138. {
  139. RdmaRmPD *pd;
  140. int ret = -ENOMEM;
  141. pd = rdma_res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
  142. if (!pd) {
  143. goto out;
  144. }
  145. ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
  146. if (ret) {
  147. ret = -EIO;
  148. goto out_tbl_dealloc;
  149. }
  150. pd->ctx_handle = ctx_handle;
  151. return 0;
  152. out_tbl_dealloc:
  153. rdma_res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
  154. out:
  155. return ret;
  156. }
  157. RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
  158. {
  159. return rdma_res_tbl_get(&dev_res->pd_tbl, pd_handle);
  160. }
  161. void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
  162. {
  163. RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
  164. if (pd) {
  165. rdma_backend_destroy_pd(&pd->backend_pd);
  166. rdma_res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
  167. }
  168. }
  169. int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  170. uint64_t guest_start, uint64_t guest_length,
  171. void *host_virt, int access_flags, uint32_t *mr_handle,
  172. uint32_t *lkey, uint32_t *rkey)
  173. {
  174. RdmaRmMR *mr;
  175. int ret = 0;
  176. RdmaRmPD *pd;
  177. pd = rdma_rm_get_pd(dev_res, pd_handle);
  178. if (!pd) {
  179. return -EINVAL;
  180. }
  181. mr = rdma_res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
  182. if (!mr) {
  183. return -ENOMEM;
  184. }
  185. trace_rdma_rm_alloc_mr(*mr_handle, host_virt, guest_start, guest_length,
  186. access_flags);
  187. if (host_virt) {
  188. mr->virt = host_virt;
  189. mr->start = guest_start;
  190. mr->length = guest_length;
  191. mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
  192. ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
  193. mr->length, guest_start, access_flags);
  194. if (ret) {
  195. ret = -EIO;
  196. goto out_dealloc_mr;
  197. }
  198. #ifdef LEGACY_RDMA_REG_MR
  199. /* We keep mr_handle in lkey so send and recv get get mr ptr */
  200. *lkey = *mr_handle;
  201. #else
  202. *lkey = rdma_backend_mr_lkey(&mr->backend_mr);
  203. #endif
  204. }
  205. *rkey = -1;
  206. mr->pd_handle = pd_handle;
  207. return 0;
  208. out_dealloc_mr:
  209. rdma_res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
  210. return ret;
  211. }
  212. RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
  213. {
  214. return rdma_res_tbl_get(&dev_res->mr_tbl, mr_handle);
  215. }
  216. void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
  217. {
  218. RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
  219. if (mr) {
  220. rdma_backend_destroy_mr(&mr->backend_mr);
  221. trace_rdma_rm_dealloc_mr(mr_handle, mr->start);
  222. if (mr->start) {
  223. mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
  224. munmap(mr->virt, mr->length);
  225. }
  226. rdma_res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
  227. }
  228. }
  229. int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
  230. uint32_t *uc_handle)
  231. {
  232. RdmaRmUC *uc;
  233. /* TODO: Need to make sure pfn is between bar start address and
  234. * bsd+RDMA_BAR2_UAR_SIZE
  235. if (pfn > RDMA_BAR2_UAR_SIZE) {
  236. rdma_error_report("pfn out of range (%d > %d)", pfn,
  237. RDMA_BAR2_UAR_SIZE);
  238. return -ENOMEM;
  239. }
  240. */
  241. uc = rdma_res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
  242. if (!uc) {
  243. return -ENOMEM;
  244. }
  245. return 0;
  246. }
  247. RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
  248. {
  249. return rdma_res_tbl_get(&dev_res->uc_tbl, uc_handle);
  250. }
  251. void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
  252. {
  253. RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
  254. if (uc) {
  255. rdma_res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
  256. }
  257. }
  258. RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
  259. {
  260. return rdma_res_tbl_get(&dev_res->cq_tbl, cq_handle);
  261. }
  262. int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  263. uint32_t cqe, uint32_t *cq_handle, void *opaque)
  264. {
  265. int rc;
  266. RdmaRmCQ *cq;
  267. cq = rdma_res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
  268. if (!cq) {
  269. return -ENOMEM;
  270. }
  271. cq->opaque = opaque;
  272. cq->notify = CNT_CLEAR;
  273. rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
  274. if (rc) {
  275. rc = -EIO;
  276. goto out_dealloc_cq;
  277. }
  278. return 0;
  279. out_dealloc_cq:
  280. rdma_rm_dealloc_cq(dev_res, *cq_handle);
  281. return rc;
  282. }
  283. void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
  284. bool notify)
  285. {
  286. RdmaRmCQ *cq;
  287. cq = rdma_rm_get_cq(dev_res, cq_handle);
  288. if (!cq) {
  289. return;
  290. }
  291. if (cq->notify != CNT_SET) {
  292. cq->notify = notify ? CNT_ARM : CNT_CLEAR;
  293. }
  294. }
  295. void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
  296. {
  297. RdmaRmCQ *cq;
  298. cq = rdma_rm_get_cq(dev_res, cq_handle);
  299. if (!cq) {
  300. return;
  301. }
  302. rdma_backend_destroy_cq(&cq->backend_cq);
  303. rdma_res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
  304. }
  305. RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
  306. {
  307. GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
  308. RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
  309. g_bytes_unref(key);
  310. if (!qp) {
  311. rdma_error_report("Invalid QP handle %d", qpn);
  312. }
  313. return qp;
  314. }
  315. int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  316. uint8_t qp_type, uint32_t max_send_wr,
  317. uint32_t max_send_sge, uint32_t send_cq_handle,
  318. uint32_t max_recv_wr, uint32_t max_recv_sge,
  319. uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
  320. uint8_t is_srq, uint32_t srq_handle)
  321. {
  322. int rc;
  323. RdmaRmQP *qp;
  324. RdmaRmCQ *scq, *rcq;
  325. RdmaRmPD *pd;
  326. RdmaRmSRQ *srq = NULL;
  327. uint32_t rm_qpn;
  328. pd = rdma_rm_get_pd(dev_res, pd_handle);
  329. if (!pd) {
  330. return -EINVAL;
  331. }
  332. scq = rdma_rm_get_cq(dev_res, send_cq_handle);
  333. rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
  334. if (!scq || !rcq) {
  335. rdma_error_report("Invalid send_cqn or recv_cqn (%d, %d)",
  336. send_cq_handle, recv_cq_handle);
  337. return -EINVAL;
  338. }
  339. if (is_srq) {
  340. srq = rdma_rm_get_srq(dev_res, srq_handle);
  341. if (!srq) {
  342. rdma_error_report("Invalid srqn %d", srq_handle);
  343. return -EINVAL;
  344. }
  345. srq->recv_cq_handle = recv_cq_handle;
  346. }
  347. if (qp_type == IBV_QPT_GSI) {
  348. scq->notify = CNT_SET;
  349. rcq->notify = CNT_SET;
  350. }
  351. qp = rdma_res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
  352. if (!qp) {
  353. return -ENOMEM;
  354. }
  355. qp->qpn = rm_qpn;
  356. qp->qp_state = IBV_QPS_RESET;
  357. qp->qp_type = qp_type;
  358. qp->send_cq_handle = send_cq_handle;
  359. qp->recv_cq_handle = recv_cq_handle;
  360. qp->opaque = opaque;
  361. qp->is_srq = is_srq;
  362. rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
  363. &scq->backend_cq, &rcq->backend_cq,
  364. is_srq ? &srq->backend_srq : NULL,
  365. max_send_wr, max_recv_wr, max_send_sge,
  366. max_recv_sge);
  367. if (rc) {
  368. rc = -EIO;
  369. goto out_dealloc_qp;
  370. }
  371. *qpn = rdma_backend_qpn(&qp->backend_qp);
  372. trace_rdma_rm_alloc_qp(rm_qpn, *qpn, qp_type);
  373. g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
  374. return 0;
  375. out_dealloc_qp:
  376. rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
  377. return rc;
  378. }
  379. int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  380. uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
  381. union ibv_gid *dgid, uint32_t dqpn,
  382. enum ibv_qp_state qp_state, uint32_t qkey,
  383. uint32_t rq_psn, uint32_t sq_psn)
  384. {
  385. RdmaRmQP *qp;
  386. int ret;
  387. qp = rdma_rm_get_qp(dev_res, qp_handle);
  388. if (!qp) {
  389. return -EINVAL;
  390. }
  391. if (qp->qp_type == IBV_QPT_SMI) {
  392. rdma_error_report("Got QP0 request");
  393. return -EPERM;
  394. } else if (qp->qp_type == IBV_QPT_GSI) {
  395. return 0;
  396. }
  397. trace_rdma_rm_modify_qp(qp_handle, attr_mask, qp_state, sgid_idx);
  398. if (attr_mask & IBV_QP_STATE) {
  399. qp->qp_state = qp_state;
  400. if (qp->qp_state == IBV_QPS_INIT) {
  401. ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
  402. qp->qp_type, qkey);
  403. if (ret) {
  404. return -EIO;
  405. }
  406. }
  407. if (qp->qp_state == IBV_QPS_RTR) {
  408. /* Get backend gid index */
  409. sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
  410. sgid_idx);
  411. if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
  412. rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
  413. sgid_idx);
  414. return -EIO;
  415. }
  416. ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
  417. qp->qp_type, sgid_idx, dgid, dqpn,
  418. rq_psn, qkey,
  419. attr_mask & IBV_QP_QKEY);
  420. if (ret) {
  421. return -EIO;
  422. }
  423. }
  424. if (qp->qp_state == IBV_QPS_RTS) {
  425. ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
  426. sq_psn, qkey,
  427. attr_mask & IBV_QP_QKEY);
  428. if (ret) {
  429. return -EIO;
  430. }
  431. }
  432. }
  433. return 0;
  434. }
  435. int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  436. uint32_t qp_handle, struct ibv_qp_attr *attr,
  437. int attr_mask, struct ibv_qp_init_attr *init_attr)
  438. {
  439. RdmaRmQP *qp;
  440. qp = rdma_rm_get_qp(dev_res, qp_handle);
  441. if (!qp) {
  442. return -EINVAL;
  443. }
  444. return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
  445. }
  446. void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
  447. {
  448. RdmaRmQP *qp;
  449. GBytes *key;
  450. key = g_bytes_new(&qp_handle, sizeof(qp_handle));
  451. qp = g_hash_table_lookup(dev_res->qp_hash, key);
  452. g_hash_table_remove(dev_res->qp_hash, key);
  453. g_bytes_unref(key);
  454. if (!qp) {
  455. return;
  456. }
  457. rdma_backend_destroy_qp(&qp->backend_qp, dev_res);
  458. rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
  459. }
  460. RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
  461. {
  462. return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
  463. }
  464. int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  465. uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
  466. uint32_t *srq_handle, void *opaque)
  467. {
  468. RdmaRmSRQ *srq;
  469. RdmaRmPD *pd;
  470. int rc;
  471. pd = rdma_rm_get_pd(dev_res, pd_handle);
  472. if (!pd) {
  473. return -EINVAL;
  474. }
  475. srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
  476. if (!srq) {
  477. return -ENOMEM;
  478. }
  479. rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
  480. max_wr, max_sge, srq_limit);
  481. if (rc) {
  482. rc = -EIO;
  483. goto out_dealloc_srq;
  484. }
  485. srq->opaque = opaque;
  486. return 0;
  487. out_dealloc_srq:
  488. rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
  489. return rc;
  490. }
  491. int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
  492. struct ibv_srq_attr *srq_attr)
  493. {
  494. RdmaRmSRQ *srq;
  495. srq = rdma_rm_get_srq(dev_res, srq_handle);
  496. if (!srq) {
  497. return -EINVAL;
  498. }
  499. return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
  500. }
  501. int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
  502. struct ibv_srq_attr *srq_attr, int srq_attr_mask)
  503. {
  504. RdmaRmSRQ *srq;
  505. srq = rdma_rm_get_srq(dev_res, srq_handle);
  506. if (!srq) {
  507. return -EINVAL;
  508. }
  509. if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
  510. (srq_attr->srq_limit == 0)) {
  511. return -EINVAL;
  512. }
  513. if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
  514. (srq_attr->max_wr == 0)) {
  515. return -EINVAL;
  516. }
  517. return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
  518. srq_attr_mask);
  519. }
  520. void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
  521. {
  522. RdmaRmSRQ *srq;
  523. srq = rdma_rm_get_srq(dev_res, srq_handle);
  524. if (!srq) {
  525. return;
  526. }
  527. rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
  528. rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
  529. }
  530. void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
  531. {
  532. void **cqe_ctx;
  533. cqe_ctx = rdma_res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  534. if (!cqe_ctx) {
  535. return NULL;
  536. }
  537. return *cqe_ctx;
  538. }
  539. int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
  540. void *ctx)
  541. {
  542. void **cqe_ctx;
  543. cqe_ctx = rdma_res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  544. if (!cqe_ctx) {
  545. return -ENOMEM;
  546. }
  547. *cqe_ctx = ctx;
  548. return 0;
  549. }
  550. void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
  551. {
  552. rdma_res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  553. }
  554. int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  555. const char *ifname, union ibv_gid *gid, int gid_idx)
  556. {
  557. int rc;
  558. rc = rdma_backend_add_gid(backend_dev, ifname, gid);
  559. if (rc) {
  560. return -EINVAL;
  561. }
  562. memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
  563. return 0;
  564. }
  565. int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  566. const char *ifname, int gid_idx)
  567. {
  568. int rc;
  569. if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
  570. return 0;
  571. }
  572. rc = rdma_backend_del_gid(backend_dev, ifname,
  573. &dev_res->port.gid_tbl[gid_idx].gid);
  574. if (rc) {
  575. return -EINVAL;
  576. }
  577. memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
  578. sizeof(dev_res->port.gid_tbl[gid_idx].gid));
  579. dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
  580. return 0;
  581. }
  582. int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
  583. RdmaBackendDev *backend_dev, int sgid_idx)
  584. {
  585. if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) {
  586. rdma_error_report("Got invalid sgid_idx %d", sgid_idx);
  587. return -EINVAL;
  588. }
  589. if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
  590. dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
  591. rdma_backend_get_gid_index(backend_dev,
  592. &dev_res->port.gid_tbl[sgid_idx].gid);
  593. }
  594. return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
  595. }
  596. static void destroy_qp_hash_key(gpointer data)
  597. {
  598. g_bytes_unref(data);
  599. }
  600. static void init_ports(RdmaDeviceResources *dev_res)
  601. {
  602. int i;
  603. memset(&dev_res->port, 0, sizeof(dev_res->port));
  604. dev_res->port.state = IBV_PORT_DOWN;
  605. for (i = 0; i < MAX_PORT_GIDS; i++) {
  606. dev_res->port.gid_tbl[i].backend_gid_index = -1;
  607. }
  608. }
  609. static void fini_ports(RdmaDeviceResources *dev_res,
  610. RdmaBackendDev *backend_dev, const char *ifname)
  611. {
  612. int i;
  613. dev_res->port.state = IBV_PORT_DOWN;
  614. for (i = 0; i < MAX_PORT_GIDS; i++) {
  615. rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
  616. }
  617. }
  618. int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
  619. {
  620. dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
  621. destroy_qp_hash_key, NULL);
  622. if (!dev_res->qp_hash) {
  623. return -ENOMEM;
  624. }
  625. res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
  626. res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
  627. res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
  628. res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
  629. res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
  630. dev_attr->max_qp_wr, sizeof(void *));
  631. res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
  632. res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
  633. sizeof(RdmaRmSRQ));
  634. init_ports(dev_res);
  635. qemu_mutex_init(&dev_res->lock);
  636. memset(&dev_res->stats, 0, sizeof(dev_res->stats));
  637. qatomic_set(&dev_res->stats.missing_cqe, 0);
  638. return 0;
  639. }
  640. void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  641. const char *ifname)
  642. {
  643. qemu_mutex_destroy(&dev_res->lock);
  644. fini_ports(dev_res, backend_dev, ifname);
  645. res_tbl_free(&dev_res->srq_tbl);
  646. res_tbl_free(&dev_res->uc_tbl);
  647. res_tbl_free(&dev_res->cqe_ctx_tbl);
  648. res_tbl_free(&dev_res->qp_tbl);
  649. res_tbl_free(&dev_res->mr_tbl);
  650. res_tbl_free(&dev_res->cq_tbl);
  651. res_tbl_free(&dev_res->pd_tbl);
  652. if (dev_res->qp_hash) {
  653. g_hash_table_destroy(dev_res->qp_hash);
  654. }
  655. }