2
0

rdma_rm.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. /*
  2. * QEMU paravirtual RDMA - Resource Manager Implementation
  3. *
  4. * Copyright (C) 2018 Oracle
  5. * Copyright (C) 2018 Red Hat Inc
  6. *
  7. * Authors:
  8. * Yuval Shaia <yuval.shaia@oracle.com>
  9. * Marcel Apfelbaum <marcel@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12. * See the COPYING file in the top-level directory.
  13. *
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qapi/error.h"
  17. #include "cpu.h"
  18. #include "monitor/monitor.h"
  19. #include "trace.h"
  20. #include "rdma_utils.h"
  21. #include "rdma_backend.h"
  22. #include "rdma_rm.h"
  23. /* Page directory and page tables */
  24. #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  25. #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  26. void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res)
  27. {
  28. monitor_printf(mon, "\ttx : %" PRId64 "\n",
  29. dev_res->stats.tx);
  30. monitor_printf(mon, "\ttx_len : %" PRId64 "\n",
  31. dev_res->stats.tx_len);
  32. monitor_printf(mon, "\ttx_err : %" PRId64 "\n",
  33. dev_res->stats.tx_err);
  34. monitor_printf(mon, "\trx_bufs : %" PRId64 "\n",
  35. dev_res->stats.rx_bufs);
  36. monitor_printf(mon, "\trx_srq : %" PRId64 "\n",
  37. dev_res->stats.rx_srq);
  38. monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n",
  39. dev_res->stats.rx_bufs_len);
  40. monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n",
  41. dev_res->stats.rx_bufs_err);
  42. monitor_printf(mon, "\tcomps : %" PRId64 "\n",
  43. dev_res->stats.completions);
  44. monitor_printf(mon, "\tmissing_comps : %" PRId32 "\n",
  45. dev_res->stats.missing_cqe);
  46. monitor_printf(mon, "\tpoll_cq (bk) : %" PRId64 "\n",
  47. dev_res->stats.poll_cq_from_bk);
  48. monitor_printf(mon, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
  49. dev_res->stats.poll_cq_ppoll_to);
  50. monitor_printf(mon, "\tpoll_cq (fe) : %" PRId64 "\n",
  51. dev_res->stats.poll_cq_from_guest);
  52. monitor_printf(mon, "\tpoll_cq_empty : %" PRId64 "\n",
  53. dev_res->stats.poll_cq_from_guest_empty);
  54. monitor_printf(mon, "\tmad_tx : %" PRId64 "\n",
  55. dev_res->stats.mad_tx);
  56. monitor_printf(mon, "\tmad_tx_err : %" PRId64 "\n",
  57. dev_res->stats.mad_tx_err);
  58. monitor_printf(mon, "\tmad_rx : %" PRId64 "\n",
  59. dev_res->stats.mad_rx);
  60. monitor_printf(mon, "\tmad_rx_err : %" PRId64 "\n",
  61. dev_res->stats.mad_rx_err);
  62. monitor_printf(mon, "\tmad_rx_bufs : %" PRId64 "\n",
  63. dev_res->stats.mad_rx_bufs);
  64. monitor_printf(mon, "\tmad_rx_bufs_err : %" PRId64 "\n",
  65. dev_res->stats.mad_rx_bufs_err);
  66. monitor_printf(mon, "\tPDs : %" PRId32 "\n",
  67. dev_res->pd_tbl.used);
  68. monitor_printf(mon, "\tMRs : %" PRId32 "\n",
  69. dev_res->mr_tbl.used);
  70. monitor_printf(mon, "\tUCs : %" PRId32 "\n",
  71. dev_res->uc_tbl.used);
  72. monitor_printf(mon, "\tQPs : %" PRId32 "\n",
  73. dev_res->qp_tbl.used);
  74. monitor_printf(mon, "\tCQs : %" PRId32 "\n",
  75. dev_res->cq_tbl.used);
  76. monitor_printf(mon, "\tCEQ_CTXs : %" PRId32 "\n",
  77. dev_res->cqe_ctx_tbl.used);
  78. }
  79. static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
  80. uint32_t tbl_sz, uint32_t res_sz)
  81. {
  82. tbl->tbl = g_malloc(tbl_sz * res_sz);
  83. strncpy(tbl->name, name, MAX_RM_TBL_NAME);
  84. tbl->name[MAX_RM_TBL_NAME - 1] = 0;
  85. tbl->bitmap = bitmap_new(tbl_sz);
  86. tbl->tbl_sz = tbl_sz;
  87. tbl->res_sz = res_sz;
  88. tbl->used = 0;
  89. qemu_mutex_init(&tbl->lock);
  90. }
  91. static inline void res_tbl_free(RdmaRmResTbl *tbl)
  92. {
  93. if (!tbl->bitmap) {
  94. return;
  95. }
  96. qemu_mutex_destroy(&tbl->lock);
  97. g_free(tbl->tbl);
  98. g_free(tbl->bitmap);
  99. }
  100. static inline void *rdma_res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
  101. {
  102. trace_rdma_res_tbl_get(tbl->name, handle);
  103. if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
  104. return tbl->tbl + handle * tbl->res_sz;
  105. } else {
  106. rdma_error_report("Table %s, invalid handle %d", tbl->name, handle);
  107. return NULL;
  108. }
  109. }
  110. static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
  111. {
  112. qemu_mutex_lock(&tbl->lock);
  113. *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
  114. if (*handle > tbl->tbl_sz) {
  115. rdma_error_report("Table %s, failed to allocate, bitmap is full",
  116. tbl->name);
  117. qemu_mutex_unlock(&tbl->lock);
  118. return NULL;
  119. }
  120. set_bit(*handle, tbl->bitmap);
  121. tbl->used++;
  122. qemu_mutex_unlock(&tbl->lock);
  123. memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
  124. trace_rdma_res_tbl_alloc(tbl->name, *handle);
  125. return tbl->tbl + *handle * tbl->res_sz;
  126. }
  127. static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
  128. {
  129. trace_rdma_res_tbl_dealloc(tbl->name, handle);
  130. qemu_mutex_lock(&tbl->lock);
  131. if (handle < tbl->tbl_sz) {
  132. clear_bit(handle, tbl->bitmap);
  133. tbl->used--;
  134. }
  135. qemu_mutex_unlock(&tbl->lock);
  136. }
  137. int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  138. uint32_t *pd_handle, uint32_t ctx_handle)
  139. {
  140. RdmaRmPD *pd;
  141. int ret = -ENOMEM;
  142. pd = rdma_res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
  143. if (!pd) {
  144. goto out;
  145. }
  146. ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
  147. if (ret) {
  148. ret = -EIO;
  149. goto out_tbl_dealloc;
  150. }
  151. pd->ctx_handle = ctx_handle;
  152. return 0;
  153. out_tbl_dealloc:
  154. rdma_res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
  155. out:
  156. return ret;
  157. }
  158. RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
  159. {
  160. return rdma_res_tbl_get(&dev_res->pd_tbl, pd_handle);
  161. }
  162. void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
  163. {
  164. RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
  165. if (pd) {
  166. rdma_backend_destroy_pd(&pd->backend_pd);
  167. rdma_res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
  168. }
  169. }
  170. int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  171. uint64_t guest_start, uint64_t guest_length,
  172. void *host_virt, int access_flags, uint32_t *mr_handle,
  173. uint32_t *lkey, uint32_t *rkey)
  174. {
  175. RdmaRmMR *mr;
  176. int ret = 0;
  177. RdmaRmPD *pd;
  178. pd = rdma_rm_get_pd(dev_res, pd_handle);
  179. if (!pd) {
  180. return -EINVAL;
  181. }
  182. mr = rdma_res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
  183. if (!mr) {
  184. return -ENOMEM;
  185. }
  186. trace_rdma_rm_alloc_mr(*mr_handle, host_virt, guest_start, guest_length,
  187. access_flags);
  188. if (host_virt) {
  189. mr->virt = host_virt;
  190. mr->start = guest_start;
  191. mr->length = guest_length;
  192. mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
  193. #ifdef LEGACY_RDMA_REG_MR
  194. ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
  195. mr->length, access_flags);
  196. #else
  197. ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
  198. mr->length, guest_start, access_flags);
  199. #endif
  200. if (ret) {
  201. ret = -EIO;
  202. goto out_dealloc_mr;
  203. }
  204. }
  205. /* We keep mr_handle in lkey so send and recv get get mr ptr */
  206. *lkey = *mr_handle;
  207. *rkey = -1;
  208. mr->pd_handle = pd_handle;
  209. return 0;
  210. out_dealloc_mr:
  211. rdma_res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
  212. return ret;
  213. }
  214. RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
  215. {
  216. return rdma_res_tbl_get(&dev_res->mr_tbl, mr_handle);
  217. }
  218. void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
  219. {
  220. RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
  221. if (mr) {
  222. rdma_backend_destroy_mr(&mr->backend_mr);
  223. trace_rdma_rm_dealloc_mr(mr_handle, mr->start);
  224. if (mr->start) {
  225. mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
  226. munmap(mr->virt, mr->length);
  227. }
  228. rdma_res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
  229. }
  230. }
  231. int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
  232. uint32_t *uc_handle)
  233. {
  234. RdmaRmUC *uc;
  235. /* TODO: Need to make sure pfn is between bar start address and
  236. * bsd+RDMA_BAR2_UAR_SIZE
  237. if (pfn > RDMA_BAR2_UAR_SIZE) {
  238. rdma_error_report("pfn out of range (%d > %d)", pfn,
  239. RDMA_BAR2_UAR_SIZE);
  240. return -ENOMEM;
  241. }
  242. */
  243. uc = rdma_res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
  244. if (!uc) {
  245. return -ENOMEM;
  246. }
  247. return 0;
  248. }
  249. RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
  250. {
  251. return rdma_res_tbl_get(&dev_res->uc_tbl, uc_handle);
  252. }
  253. void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
  254. {
  255. RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
  256. if (uc) {
  257. rdma_res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
  258. }
  259. }
  260. RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
  261. {
  262. return rdma_res_tbl_get(&dev_res->cq_tbl, cq_handle);
  263. }
  264. int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  265. uint32_t cqe, uint32_t *cq_handle, void *opaque)
  266. {
  267. int rc;
  268. RdmaRmCQ *cq;
  269. cq = rdma_res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
  270. if (!cq) {
  271. return -ENOMEM;
  272. }
  273. cq->opaque = opaque;
  274. cq->notify = CNT_CLEAR;
  275. rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
  276. if (rc) {
  277. rc = -EIO;
  278. goto out_dealloc_cq;
  279. }
  280. return 0;
  281. out_dealloc_cq:
  282. rdma_rm_dealloc_cq(dev_res, *cq_handle);
  283. return rc;
  284. }
  285. void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
  286. bool notify)
  287. {
  288. RdmaRmCQ *cq;
  289. cq = rdma_rm_get_cq(dev_res, cq_handle);
  290. if (!cq) {
  291. return;
  292. }
  293. if (cq->notify != CNT_SET) {
  294. cq->notify = notify ? CNT_ARM : CNT_CLEAR;
  295. }
  296. }
  297. void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
  298. {
  299. RdmaRmCQ *cq;
  300. cq = rdma_rm_get_cq(dev_res, cq_handle);
  301. if (!cq) {
  302. return;
  303. }
  304. rdma_backend_destroy_cq(&cq->backend_cq);
  305. rdma_res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
  306. }
  307. RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
  308. {
  309. GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
  310. RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
  311. g_bytes_unref(key);
  312. if (!qp) {
  313. rdma_error_report("Invalid QP handle %d", qpn);
  314. }
  315. return qp;
  316. }
  317. int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  318. uint8_t qp_type, uint32_t max_send_wr,
  319. uint32_t max_send_sge, uint32_t send_cq_handle,
  320. uint32_t max_recv_wr, uint32_t max_recv_sge,
  321. uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
  322. uint8_t is_srq, uint32_t srq_handle)
  323. {
  324. int rc;
  325. RdmaRmQP *qp;
  326. RdmaRmCQ *scq, *rcq;
  327. RdmaRmPD *pd;
  328. RdmaRmSRQ *srq = NULL;
  329. uint32_t rm_qpn;
  330. pd = rdma_rm_get_pd(dev_res, pd_handle);
  331. if (!pd) {
  332. return -EINVAL;
  333. }
  334. scq = rdma_rm_get_cq(dev_res, send_cq_handle);
  335. rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
  336. if (!scq || !rcq) {
  337. rdma_error_report("Invalid send_cqn or recv_cqn (%d, %d)",
  338. send_cq_handle, recv_cq_handle);
  339. return -EINVAL;
  340. }
  341. if (is_srq) {
  342. srq = rdma_rm_get_srq(dev_res, srq_handle);
  343. if (!srq) {
  344. rdma_error_report("Invalid srqn %d", srq_handle);
  345. return -EINVAL;
  346. }
  347. srq->recv_cq_handle = recv_cq_handle;
  348. }
  349. if (qp_type == IBV_QPT_GSI) {
  350. scq->notify = CNT_SET;
  351. rcq->notify = CNT_SET;
  352. }
  353. qp = rdma_res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
  354. if (!qp) {
  355. return -ENOMEM;
  356. }
  357. qp->qpn = rm_qpn;
  358. qp->qp_state = IBV_QPS_RESET;
  359. qp->qp_type = qp_type;
  360. qp->send_cq_handle = send_cq_handle;
  361. qp->recv_cq_handle = recv_cq_handle;
  362. qp->opaque = opaque;
  363. qp->is_srq = is_srq;
  364. rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
  365. &scq->backend_cq, &rcq->backend_cq,
  366. is_srq ? &srq->backend_srq : NULL,
  367. max_send_wr, max_recv_wr, max_send_sge,
  368. max_recv_sge);
  369. if (rc) {
  370. rc = -EIO;
  371. goto out_dealloc_qp;
  372. }
  373. *qpn = rdma_backend_qpn(&qp->backend_qp);
  374. trace_rdma_rm_alloc_qp(rm_qpn, *qpn, qp_type);
  375. g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
  376. return 0;
  377. out_dealloc_qp:
  378. rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
  379. return rc;
  380. }
  381. int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  382. uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
  383. union ibv_gid *dgid, uint32_t dqpn,
  384. enum ibv_qp_state qp_state, uint32_t qkey,
  385. uint32_t rq_psn, uint32_t sq_psn)
  386. {
  387. RdmaRmQP *qp;
  388. int ret;
  389. qp = rdma_rm_get_qp(dev_res, qp_handle);
  390. if (!qp) {
  391. return -EINVAL;
  392. }
  393. if (qp->qp_type == IBV_QPT_SMI) {
  394. rdma_error_report("Got QP0 request");
  395. return -EPERM;
  396. } else if (qp->qp_type == IBV_QPT_GSI) {
  397. return 0;
  398. }
  399. trace_rdma_rm_modify_qp(qp_handle, attr_mask, qp_state, sgid_idx);
  400. if (attr_mask & IBV_QP_STATE) {
  401. qp->qp_state = qp_state;
  402. if (qp->qp_state == IBV_QPS_INIT) {
  403. ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
  404. qp->qp_type, qkey);
  405. if (ret) {
  406. return -EIO;
  407. }
  408. }
  409. if (qp->qp_state == IBV_QPS_RTR) {
  410. /* Get backend gid index */
  411. sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
  412. sgid_idx);
  413. if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
  414. rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
  415. sgid_idx);
  416. return -EIO;
  417. }
  418. ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
  419. qp->qp_type, sgid_idx, dgid, dqpn,
  420. rq_psn, qkey,
  421. attr_mask & IBV_QP_QKEY);
  422. if (ret) {
  423. return -EIO;
  424. }
  425. }
  426. if (qp->qp_state == IBV_QPS_RTS) {
  427. ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
  428. sq_psn, qkey,
  429. attr_mask & IBV_QP_QKEY);
  430. if (ret) {
  431. return -EIO;
  432. }
  433. }
  434. }
  435. return 0;
  436. }
  437. int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  438. uint32_t qp_handle, struct ibv_qp_attr *attr,
  439. int attr_mask, struct ibv_qp_init_attr *init_attr)
  440. {
  441. RdmaRmQP *qp;
  442. qp = rdma_rm_get_qp(dev_res, qp_handle);
  443. if (!qp) {
  444. return -EINVAL;
  445. }
  446. return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
  447. }
  448. void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
  449. {
  450. RdmaRmQP *qp;
  451. GBytes *key;
  452. key = g_bytes_new(&qp_handle, sizeof(qp_handle));
  453. qp = g_hash_table_lookup(dev_res->qp_hash, key);
  454. g_hash_table_remove(dev_res->qp_hash, key);
  455. g_bytes_unref(key);
  456. if (!qp) {
  457. return;
  458. }
  459. rdma_backend_destroy_qp(&qp->backend_qp, dev_res);
  460. rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
  461. }
  462. RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
  463. {
  464. return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
  465. }
  466. int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
  467. uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
  468. uint32_t *srq_handle, void *opaque)
  469. {
  470. RdmaRmSRQ *srq;
  471. RdmaRmPD *pd;
  472. int rc;
  473. pd = rdma_rm_get_pd(dev_res, pd_handle);
  474. if (!pd) {
  475. return -EINVAL;
  476. }
  477. srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
  478. if (!srq) {
  479. return -ENOMEM;
  480. }
  481. rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
  482. max_wr, max_sge, srq_limit);
  483. if (rc) {
  484. rc = -EIO;
  485. goto out_dealloc_srq;
  486. }
  487. srq->opaque = opaque;
  488. return 0;
  489. out_dealloc_srq:
  490. rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
  491. return rc;
  492. }
  493. int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
  494. struct ibv_srq_attr *srq_attr)
  495. {
  496. RdmaRmSRQ *srq;
  497. srq = rdma_rm_get_srq(dev_res, srq_handle);
  498. if (!srq) {
  499. return -EINVAL;
  500. }
  501. return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
  502. }
  503. int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
  504. struct ibv_srq_attr *srq_attr, int srq_attr_mask)
  505. {
  506. RdmaRmSRQ *srq;
  507. srq = rdma_rm_get_srq(dev_res, srq_handle);
  508. if (!srq) {
  509. return -EINVAL;
  510. }
  511. if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
  512. (srq_attr->srq_limit == 0)) {
  513. return -EINVAL;
  514. }
  515. if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
  516. (srq_attr->max_wr == 0)) {
  517. return -EINVAL;
  518. }
  519. return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
  520. srq_attr_mask);
  521. }
  522. void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
  523. {
  524. RdmaRmSRQ *srq;
  525. srq = rdma_rm_get_srq(dev_res, srq_handle);
  526. if (!srq) {
  527. return;
  528. }
  529. rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
  530. rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
  531. }
  532. void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
  533. {
  534. void **cqe_ctx;
  535. cqe_ctx = rdma_res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  536. if (!cqe_ctx) {
  537. return NULL;
  538. }
  539. return *cqe_ctx;
  540. }
  541. int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
  542. void *ctx)
  543. {
  544. void **cqe_ctx;
  545. cqe_ctx = rdma_res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  546. if (!cqe_ctx) {
  547. return -ENOMEM;
  548. }
  549. *cqe_ctx = ctx;
  550. return 0;
  551. }
  552. void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
  553. {
  554. rdma_res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
  555. }
  556. int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  557. const char *ifname, union ibv_gid *gid, int gid_idx)
  558. {
  559. int rc;
  560. rc = rdma_backend_add_gid(backend_dev, ifname, gid);
  561. if (rc) {
  562. return -EINVAL;
  563. }
  564. memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
  565. return 0;
  566. }
  567. int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  568. const char *ifname, int gid_idx)
  569. {
  570. int rc;
  571. if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
  572. return 0;
  573. }
  574. rc = rdma_backend_del_gid(backend_dev, ifname,
  575. &dev_res->port.gid_tbl[gid_idx].gid);
  576. if (rc) {
  577. return -EINVAL;
  578. }
  579. memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
  580. sizeof(dev_res->port.gid_tbl[gid_idx].gid));
  581. dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
  582. return 0;
  583. }
  584. int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
  585. RdmaBackendDev *backend_dev, int sgid_idx)
  586. {
  587. if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) {
  588. rdma_error_report("Got invalid sgid_idx %d", sgid_idx);
  589. return -EINVAL;
  590. }
  591. if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
  592. dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
  593. rdma_backend_get_gid_index(backend_dev,
  594. &dev_res->port.gid_tbl[sgid_idx].gid);
  595. }
  596. return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
  597. }
  598. static void destroy_qp_hash_key(gpointer data)
  599. {
  600. g_bytes_unref(data);
  601. }
  602. static void init_ports(RdmaDeviceResources *dev_res)
  603. {
  604. int i;
  605. memset(&dev_res->port, 0, sizeof(dev_res->port));
  606. dev_res->port.state = IBV_PORT_DOWN;
  607. for (i = 0; i < MAX_PORT_GIDS; i++) {
  608. dev_res->port.gid_tbl[i].backend_gid_index = -1;
  609. }
  610. }
  611. static void fini_ports(RdmaDeviceResources *dev_res,
  612. RdmaBackendDev *backend_dev, const char *ifname)
  613. {
  614. int i;
  615. dev_res->port.state = IBV_PORT_DOWN;
  616. for (i = 0; i < MAX_PORT_GIDS; i++) {
  617. rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
  618. }
  619. }
  620. int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
  621. {
  622. dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
  623. destroy_qp_hash_key, NULL);
  624. if (!dev_res->qp_hash) {
  625. return -ENOMEM;
  626. }
  627. res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
  628. res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
  629. res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
  630. res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
  631. res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
  632. dev_attr->max_qp_wr, sizeof(void *));
  633. res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
  634. res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
  635. sizeof(RdmaRmSRQ));
  636. init_ports(dev_res);
  637. qemu_mutex_init(&dev_res->lock);
  638. memset(&dev_res->stats, 0, sizeof(dev_res->stats));
  639. atomic_set(&dev_res->stats.missing_cqe, 0);
  640. return 0;
  641. }
  642. void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  643. const char *ifname)
  644. {
  645. qemu_mutex_destroy(&dev_res->lock);
  646. fini_ports(dev_res, backend_dev, ifname);
  647. res_tbl_free(&dev_res->srq_tbl);
  648. res_tbl_free(&dev_res->uc_tbl);
  649. res_tbl_free(&dev_res->cqe_ctx_tbl);
  650. res_tbl_free(&dev_res->qp_tbl);
  651. res_tbl_free(&dev_res->mr_tbl);
  652. res_tbl_free(&dev_res->cq_tbl);
  653. res_tbl_free(&dev_res->pd_tbl);
  654. if (dev_res->qp_hash) {
  655. g_hash_table_destroy(dev_res->qp_hash);
  656. }
  657. }