2
0

libvhost-user.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513
  1. /*
  2. * Vhost User library
  3. *
  4. * Copyright IBM, Corp. 2007
  5. * Copyright (c) 2016 Red Hat, Inc.
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Marc-André Lureau <mlureau@redhat.com>
  10. * Victor Kaplansky <victork@redhat.com>
  11. *
  12. * This work is licensed under the terms of the GNU GPL, version 2 or
  13. * later. See the COPYING file in the top-level directory.
  14. */
  15. #include <qemu/osdep.h>
  16. #include <sys/eventfd.h>
  17. #include <linux/vhost.h>
  18. #include "qemu/atomic.h"
  19. #include "libvhost-user.h"
  20. #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
  21. /* The version of the protocol we support */
  22. #define VHOST_USER_VERSION 1
  23. #define LIBVHOST_USER_DEBUG 0
  24. #define DPRINT(...) \
  25. do { \
  26. if (LIBVHOST_USER_DEBUG) { \
  27. fprintf(stderr, __VA_ARGS__); \
  28. } \
  29. } while (0)
  30. static const char *
  31. vu_request_to_string(int req)
  32. {
  33. #define REQ(req) [req] = #req
  34. static const char *vu_request_str[] = {
  35. REQ(VHOST_USER_NONE),
  36. REQ(VHOST_USER_GET_FEATURES),
  37. REQ(VHOST_USER_SET_FEATURES),
  38. REQ(VHOST_USER_NONE),
  39. REQ(VHOST_USER_GET_FEATURES),
  40. REQ(VHOST_USER_SET_FEATURES),
  41. REQ(VHOST_USER_SET_OWNER),
  42. REQ(VHOST_USER_RESET_OWNER),
  43. REQ(VHOST_USER_SET_MEM_TABLE),
  44. REQ(VHOST_USER_SET_LOG_BASE),
  45. REQ(VHOST_USER_SET_LOG_FD),
  46. REQ(VHOST_USER_SET_VRING_NUM),
  47. REQ(VHOST_USER_SET_VRING_ADDR),
  48. REQ(VHOST_USER_SET_VRING_BASE),
  49. REQ(VHOST_USER_GET_VRING_BASE),
  50. REQ(VHOST_USER_SET_VRING_KICK),
  51. REQ(VHOST_USER_SET_VRING_CALL),
  52. REQ(VHOST_USER_SET_VRING_ERR),
  53. REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
  54. REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
  55. REQ(VHOST_USER_GET_QUEUE_NUM),
  56. REQ(VHOST_USER_SET_VRING_ENABLE),
  57. REQ(VHOST_USER_SEND_RARP),
  58. REQ(VHOST_USER_INPUT_GET_CONFIG),
  59. REQ(VHOST_USER_MAX),
  60. };
  61. #undef REQ
  62. if (req < VHOST_USER_MAX) {
  63. return vu_request_str[req];
  64. } else {
  65. return "unknown";
  66. }
  67. }
  68. static void
  69. vu_panic(VuDev *dev, const char *msg, ...)
  70. {
  71. char *buf = NULL;
  72. va_list ap;
  73. va_start(ap, msg);
  74. buf = g_strdup_vprintf(msg, ap);
  75. va_end(ap);
  76. dev->broken = true;
  77. dev->panic(dev, buf);
  78. free(buf);
  79. /* FIXME: find a way to call virtio_error? */
  80. }
  81. /* Translate guest physical address to our virtual address. */
  82. void *
  83. vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
  84. {
  85. int i;
  86. /* Find matching memory region. */
  87. for (i = 0; i < dev->nregions; i++) {
  88. VuDevRegion *r = &dev->regions[i];
  89. if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
  90. return (void *)(uintptr_t)
  91. guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
  92. }
  93. }
  94. return NULL;
  95. }
  96. /* Translate qemu virtual address to our virtual address. */
  97. static void *
  98. qva_to_va(VuDev *dev, uint64_t qemu_addr)
  99. {
  100. int i;
  101. /* Find matching memory region. */
  102. for (i = 0; i < dev->nregions; i++) {
  103. VuDevRegion *r = &dev->regions[i];
  104. if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
  105. return (void *)(uintptr_t)
  106. qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
  107. }
  108. }
  109. return NULL;
  110. }
  111. static void
  112. vmsg_close_fds(VhostUserMsg *vmsg)
  113. {
  114. int i;
  115. for (i = 0; i < vmsg->fd_num; i++) {
  116. close(vmsg->fds[i]);
  117. }
  118. }
  119. static bool
  120. vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
  121. {
  122. char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
  123. struct iovec iov = {
  124. .iov_base = (char *)vmsg,
  125. .iov_len = VHOST_USER_HDR_SIZE,
  126. };
  127. struct msghdr msg = {
  128. .msg_iov = &iov,
  129. .msg_iovlen = 1,
  130. .msg_control = control,
  131. .msg_controllen = sizeof(control),
  132. };
  133. size_t fd_size;
  134. struct cmsghdr *cmsg;
  135. int rc;
  136. do {
  137. rc = recvmsg(conn_fd, &msg, 0);
  138. } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
  139. if (rc <= 0) {
  140. vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
  141. return false;
  142. }
  143. vmsg->fd_num = 0;
  144. for (cmsg = CMSG_FIRSTHDR(&msg);
  145. cmsg != NULL;
  146. cmsg = CMSG_NXTHDR(&msg, cmsg))
  147. {
  148. if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
  149. fd_size = cmsg->cmsg_len - CMSG_LEN(0);
  150. vmsg->fd_num = fd_size / sizeof(int);
  151. memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
  152. break;
  153. }
  154. }
  155. if (vmsg->size > sizeof(vmsg->payload)) {
  156. vu_panic(dev,
  157. "Error: too big message request: %d, size: vmsg->size: %u, "
  158. "while sizeof(vmsg->payload) = %zu\n",
  159. vmsg->request, vmsg->size, sizeof(vmsg->payload));
  160. goto fail;
  161. }
  162. if (vmsg->size) {
  163. do {
  164. rc = read(conn_fd, &vmsg->payload, vmsg->size);
  165. } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
  166. if (rc <= 0) {
  167. vu_panic(dev, "Error while reading: %s", strerror(errno));
  168. goto fail;
  169. }
  170. assert(rc == vmsg->size);
  171. }
  172. return true;
  173. fail:
  174. vmsg_close_fds(vmsg);
  175. return false;
  176. }
  177. static bool
  178. vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
  179. {
  180. int rc;
  181. uint8_t *p = (uint8_t *)vmsg;
  182. /* Set the version in the flags when sending the reply */
  183. vmsg->flags &= ~VHOST_USER_VERSION_MASK;
  184. vmsg->flags |= VHOST_USER_VERSION;
  185. vmsg->flags |= VHOST_USER_REPLY_MASK;
  186. do {
  187. rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
  188. } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
  189. do {
  190. if (vmsg->data) {
  191. rc = write(conn_fd, vmsg->data, vmsg->size);
  192. } else {
  193. rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
  194. }
  195. } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
  196. if (rc <= 0) {
  197. vu_panic(dev, "Error while writing: %s", strerror(errno));
  198. return false;
  199. }
  200. return true;
  201. }
  202. /* Kick the log_call_fd if required. */
  203. static void
  204. vu_log_kick(VuDev *dev)
  205. {
  206. if (dev->log_call_fd != -1) {
  207. DPRINT("Kicking the QEMU's log...\n");
  208. if (eventfd_write(dev->log_call_fd, 1) < 0) {
  209. vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
  210. }
  211. }
  212. }
  213. static void
  214. vu_log_page(uint8_t *log_table, uint64_t page)
  215. {
  216. DPRINT("Logged dirty guest page: %"PRId64"\n", page);
  217. atomic_or(&log_table[page / 8], 1 << (page % 8));
  218. }
  219. static void
  220. vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
  221. {
  222. uint64_t page;
  223. if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
  224. !dev->log_table || !length) {
  225. return;
  226. }
  227. assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
  228. page = address / VHOST_LOG_PAGE;
  229. while (page * VHOST_LOG_PAGE < address + length) {
  230. vu_log_page(dev->log_table, page);
  231. page += VHOST_LOG_PAGE;
  232. }
  233. vu_log_kick(dev);
  234. }
  235. static void
  236. vu_kick_cb(VuDev *dev, int condition, void *data)
  237. {
  238. int index = (intptr_t)data;
  239. VuVirtq *vq = &dev->vq[index];
  240. int sock = vq->kick_fd;
  241. eventfd_t kick_data;
  242. ssize_t rc;
  243. rc = eventfd_read(sock, &kick_data);
  244. if (rc == -1) {
  245. vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
  246. dev->remove_watch(dev, dev->vq[index].kick_fd);
  247. } else {
  248. DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
  249. kick_data, vq->handler, index);
  250. if (vq->handler) {
  251. vq->handler(dev, index);
  252. }
  253. }
  254. }
  255. static bool
  256. vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
  257. {
  258. vmsg->payload.u64 =
  259. 1ULL << VHOST_F_LOG_ALL |
  260. 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
  261. if (dev->iface->get_features) {
  262. vmsg->payload.u64 |= dev->iface->get_features(dev);
  263. }
  264. vmsg->size = sizeof(vmsg->payload.u64);
  265. DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
  266. return true;
  267. }
  268. static void
  269. vu_set_enable_all_rings(VuDev *dev, bool enabled)
  270. {
  271. int i;
  272. for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
  273. dev->vq[i].enable = enabled;
  274. }
  275. }
  276. static bool
  277. vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
  278. {
  279. DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
  280. dev->features = vmsg->payload.u64;
  281. if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
  282. vu_set_enable_all_rings(dev, true);
  283. }
  284. if (dev->iface->set_features) {
  285. dev->iface->set_features(dev, dev->features);
  286. }
  287. return false;
  288. }
  289. static bool
  290. vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
  291. {
  292. return false;
  293. }
  294. static void
  295. vu_close_log(VuDev *dev)
  296. {
  297. if (dev->log_table) {
  298. if (munmap(dev->log_table, dev->log_size) != 0) {
  299. perror("close log munmap() error");
  300. }
  301. dev->log_table = NULL;
  302. }
  303. if (dev->log_call_fd != -1) {
  304. close(dev->log_call_fd);
  305. dev->log_call_fd = -1;
  306. }
  307. }
  308. static bool
  309. vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
  310. {
  311. vu_set_enable_all_rings(dev, false);
  312. return false;
  313. }
  314. static bool
  315. vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
  316. {
  317. int i;
  318. VhostUserMemory *memory = &vmsg->payload.memory;
  319. dev->nregions = memory->nregions;
  320. DPRINT("Nregions: %d\n", memory->nregions);
  321. for (i = 0; i < dev->nregions; i++) {
  322. void *mmap_addr;
  323. VhostUserMemoryRegion *msg_region = &memory->regions[i];
  324. VuDevRegion *dev_region = &dev->regions[i];
  325. DPRINT("Region %d\n", i);
  326. DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
  327. msg_region->guest_phys_addr);
  328. DPRINT(" memory_size: 0x%016"PRIx64"\n",
  329. msg_region->memory_size);
  330. DPRINT(" userspace_addr 0x%016"PRIx64"\n",
  331. msg_region->userspace_addr);
  332. DPRINT(" mmap_offset 0x%016"PRIx64"\n",
  333. msg_region->mmap_offset);
  334. dev_region->gpa = msg_region->guest_phys_addr;
  335. dev_region->size = msg_region->memory_size;
  336. dev_region->qva = msg_region->userspace_addr;
  337. dev_region->mmap_offset = msg_region->mmap_offset;
  338. /* We don't use offset argument of mmap() since the
  339. * mapped address has to be page aligned, and we use huge
  340. * pages. */
  341. mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
  342. PROT_READ | PROT_WRITE, MAP_SHARED,
  343. vmsg->fds[i], 0);
  344. if (mmap_addr == MAP_FAILED) {
  345. vu_panic(dev, "region mmap error: %s", strerror(errno));
  346. } else {
  347. dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
  348. DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
  349. dev_region->mmap_addr);
  350. }
  351. close(vmsg->fds[i]);
  352. }
  353. return false;
  354. }
  355. static bool
  356. vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
  357. {
  358. int fd;
  359. uint64_t log_mmap_size, log_mmap_offset;
  360. void *rc;
  361. if (vmsg->fd_num != 1 ||
  362. vmsg->size != sizeof(vmsg->payload.log)) {
  363. vu_panic(dev, "Invalid log_base message");
  364. return true;
  365. }
  366. fd = vmsg->fds[0];
  367. log_mmap_offset = vmsg->payload.log.mmap_offset;
  368. log_mmap_size = vmsg->payload.log.mmap_size;
  369. DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
  370. DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
  371. rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
  372. log_mmap_offset);
  373. if (rc == MAP_FAILED) {
  374. perror("log mmap error");
  375. }
  376. dev->log_table = rc;
  377. dev->log_size = log_mmap_size;
  378. vmsg->size = sizeof(vmsg->payload.u64);
  379. return true;
  380. }
  381. static bool
  382. vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
  383. {
  384. if (vmsg->fd_num != 1) {
  385. vu_panic(dev, "Invalid log_fd message");
  386. return false;
  387. }
  388. if (dev->log_call_fd != -1) {
  389. close(dev->log_call_fd);
  390. }
  391. dev->log_call_fd = vmsg->fds[0];
  392. DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
  393. return false;
  394. }
  395. static bool
  396. vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
  397. {
  398. unsigned int index = vmsg->payload.state.index;
  399. unsigned int num = vmsg->payload.state.num;
  400. DPRINT("State.index: %d\n", index);
  401. DPRINT("State.num: %d\n", num);
  402. dev->vq[index].vring.num = num;
  403. return false;
  404. }
  405. static bool
  406. vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
  407. {
  408. struct vhost_vring_addr *vra = &vmsg->payload.addr;
  409. unsigned int index = vra->index;
  410. VuVirtq *vq = &dev->vq[index];
  411. DPRINT("vhost_vring_addr:\n");
  412. DPRINT(" index: %d\n", vra->index);
  413. DPRINT(" flags: %d\n", vra->flags);
  414. DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
  415. DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
  416. DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
  417. DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
  418. vq->vring.flags = vra->flags;
  419. vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
  420. vq->vring.used = qva_to_va(dev, vra->used_user_addr);
  421. vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
  422. vq->vring.log_guest_addr = vra->log_guest_addr;
  423. DPRINT("Setting virtq addresses:\n");
  424. DPRINT(" vring_desc at %p\n", vq->vring.desc);
  425. DPRINT(" vring_used at %p\n", vq->vring.used);
  426. DPRINT(" vring_avail at %p\n", vq->vring.avail);
  427. if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
  428. vu_panic(dev, "Invalid vring_addr message");
  429. return false;
  430. }
  431. vq->used_idx = vq->vring.used->idx;
  432. return false;
  433. }
  434. static bool
  435. vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
  436. {
  437. unsigned int index = vmsg->payload.state.index;
  438. unsigned int num = vmsg->payload.state.num;
  439. DPRINT("State.index: %d\n", index);
  440. DPRINT("State.num: %d\n", num);
  441. dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
  442. return false;
  443. }
  444. static bool
  445. vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
  446. {
  447. unsigned int index = vmsg->payload.state.index;
  448. DPRINT("State.index: %d\n", index);
  449. vmsg->payload.state.num = dev->vq[index].last_avail_idx;
  450. vmsg->size = sizeof(vmsg->payload.state);
  451. dev->vq[index].started = false;
  452. if (dev->iface->queue_set_started) {
  453. dev->iface->queue_set_started(dev, index, false);
  454. }
  455. if (dev->vq[index].call_fd != -1) {
  456. close(dev->vq[index].call_fd);
  457. dev->vq[index].call_fd = -1;
  458. }
  459. if (dev->vq[index].kick_fd != -1) {
  460. dev->remove_watch(dev, dev->vq[index].kick_fd);
  461. close(dev->vq[index].kick_fd);
  462. dev->vq[index].kick_fd = -1;
  463. }
  464. return true;
  465. }
  466. static bool
  467. vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
  468. {
  469. int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
  470. if (index >= VHOST_MAX_NR_VIRTQUEUE) {
  471. vmsg_close_fds(vmsg);
  472. vu_panic(dev, "Invalid queue index: %u", index);
  473. return false;
  474. }
  475. if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
  476. vmsg->fd_num != 1) {
  477. vmsg_close_fds(vmsg);
  478. vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
  479. return false;
  480. }
  481. return true;
  482. }
  483. static bool
  484. vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
  485. {
  486. int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
  487. DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
  488. if (!vu_check_queue_msg_file(dev, vmsg)) {
  489. return false;
  490. }
  491. if (dev->vq[index].kick_fd != -1) {
  492. dev->remove_watch(dev, dev->vq[index].kick_fd);
  493. close(dev->vq[index].kick_fd);
  494. dev->vq[index].kick_fd = -1;
  495. }
  496. if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
  497. dev->vq[index].kick_fd = vmsg->fds[0];
  498. DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
  499. }
  500. dev->vq[index].started = true;
  501. if (dev->iface->queue_set_started) {
  502. dev->iface->queue_set_started(dev, index, true);
  503. }
  504. if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
  505. dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
  506. vu_kick_cb, (void *)(long)index);
  507. DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
  508. dev->vq[index].kick_fd, index);
  509. }
  510. return false;
  511. }
  512. void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
  513. vu_queue_handler_cb handler)
  514. {
  515. int qidx = vq - dev->vq;
  516. vq->handler = handler;
  517. if (vq->kick_fd >= 0) {
  518. if (handler) {
  519. dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
  520. vu_kick_cb, (void *)(long)qidx);
  521. } else {
  522. dev->remove_watch(dev, vq->kick_fd);
  523. }
  524. }
  525. }
  526. static bool
  527. vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
  528. {
  529. int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
  530. DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
  531. if (!vu_check_queue_msg_file(dev, vmsg)) {
  532. return false;
  533. }
  534. if (dev->vq[index].call_fd != -1) {
  535. close(dev->vq[index].call_fd);
  536. dev->vq[index].call_fd = -1;
  537. }
  538. if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
  539. dev->vq[index].call_fd = vmsg->fds[0];
  540. }
  541. DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
  542. return false;
  543. }
  544. static bool
  545. vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
  546. {
  547. int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
  548. DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
  549. if (!vu_check_queue_msg_file(dev, vmsg)) {
  550. return false;
  551. }
  552. if (dev->vq[index].err_fd != -1) {
  553. close(dev->vq[index].err_fd);
  554. dev->vq[index].err_fd = -1;
  555. }
  556. if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
  557. dev->vq[index].err_fd = vmsg->fds[0];
  558. }
  559. return false;
  560. }
  561. static bool
  562. vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
  563. {
  564. uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
  565. if (dev->iface->get_protocol_features) {
  566. features |= dev->iface->get_protocol_features(dev);
  567. }
  568. vmsg->payload.u64 = features;
  569. vmsg->size = sizeof(vmsg->payload.u64);
  570. return true;
  571. }
  572. static bool
  573. vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
  574. {
  575. uint64_t features = vmsg->payload.u64;
  576. DPRINT("u64: 0x%016"PRIx64"\n", features);
  577. dev->protocol_features = vmsg->payload.u64;
  578. if (dev->iface->set_protocol_features) {
  579. dev->iface->set_protocol_features(dev, features);
  580. }
  581. return false;
  582. }
  583. static bool
  584. vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
  585. {
  586. DPRINT("Function %s() not implemented yet.\n", __func__);
  587. return false;
  588. }
  589. static bool
  590. vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
  591. {
  592. unsigned int index = vmsg->payload.state.index;
  593. unsigned int enable = vmsg->payload.state.num;
  594. DPRINT("State.index: %d\n", index);
  595. DPRINT("State.enable: %d\n", enable);
  596. if (index >= VHOST_MAX_NR_VIRTQUEUE) {
  597. vu_panic(dev, "Invalid vring_enable index: %u", index);
  598. return false;
  599. }
  600. dev->vq[index].enable = enable;
  601. return false;
  602. }
  603. static bool
  604. vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
  605. {
  606. int do_reply = 0;
  607. /* Print out generic part of the request. */
  608. DPRINT("================ Vhost user message ================\n");
  609. DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
  610. vmsg->request);
  611. DPRINT("Flags: 0x%x\n", vmsg->flags);
  612. DPRINT("Size: %d\n", vmsg->size);
  613. if (vmsg->fd_num) {
  614. int i;
  615. DPRINT("Fds:");
  616. for (i = 0; i < vmsg->fd_num; i++) {
  617. DPRINT(" %d", vmsg->fds[i]);
  618. }
  619. DPRINT("\n");
  620. }
  621. if (dev->iface->process_msg &&
  622. dev->iface->process_msg(dev, vmsg, &do_reply)) {
  623. return do_reply;
  624. }
  625. switch (vmsg->request) {
  626. case VHOST_USER_GET_FEATURES:
  627. return vu_get_features_exec(dev, vmsg);
  628. case VHOST_USER_SET_FEATURES:
  629. return vu_set_features_exec(dev, vmsg);
  630. case VHOST_USER_GET_PROTOCOL_FEATURES:
  631. return vu_get_protocol_features_exec(dev, vmsg);
  632. case VHOST_USER_SET_PROTOCOL_FEATURES:
  633. return vu_set_protocol_features_exec(dev, vmsg);
  634. case VHOST_USER_SET_OWNER:
  635. return vu_set_owner_exec(dev, vmsg);
  636. case VHOST_USER_RESET_OWNER:
  637. return vu_reset_device_exec(dev, vmsg);
  638. case VHOST_USER_SET_MEM_TABLE:
  639. return vu_set_mem_table_exec(dev, vmsg);
  640. case VHOST_USER_SET_LOG_BASE:
  641. return vu_set_log_base_exec(dev, vmsg);
  642. case VHOST_USER_SET_LOG_FD:
  643. return vu_set_log_fd_exec(dev, vmsg);
  644. case VHOST_USER_SET_VRING_NUM:
  645. return vu_set_vring_num_exec(dev, vmsg);
  646. case VHOST_USER_SET_VRING_ADDR:
  647. return vu_set_vring_addr_exec(dev, vmsg);
  648. case VHOST_USER_SET_VRING_BASE:
  649. return vu_set_vring_base_exec(dev, vmsg);
  650. case VHOST_USER_GET_VRING_BASE:
  651. return vu_get_vring_base_exec(dev, vmsg);
  652. case VHOST_USER_SET_VRING_KICK:
  653. return vu_set_vring_kick_exec(dev, vmsg);
  654. case VHOST_USER_SET_VRING_CALL:
  655. return vu_set_vring_call_exec(dev, vmsg);
  656. case VHOST_USER_SET_VRING_ERR:
  657. return vu_set_vring_err_exec(dev, vmsg);
  658. case VHOST_USER_GET_QUEUE_NUM:
  659. return vu_get_queue_num_exec(dev, vmsg);
  660. case VHOST_USER_SET_VRING_ENABLE:
  661. return vu_set_vring_enable_exec(dev, vmsg);
  662. default:
  663. vmsg_close_fds(vmsg);
  664. vu_panic(dev, "Unhandled request: %d", vmsg->request);
  665. }
  666. return false;
  667. }
  668. bool
  669. vu_dispatch(VuDev *dev)
  670. {
  671. VhostUserMsg vmsg = { 0, };
  672. int reply_requested;
  673. bool success = false;
  674. if (!vu_message_read(dev, dev->sock, &vmsg)) {
  675. goto end;
  676. }
  677. reply_requested = vu_process_message(dev, &vmsg);
  678. if (!reply_requested) {
  679. success = true;
  680. goto end;
  681. }
  682. if (!vu_message_write(dev, dev->sock, &vmsg)) {
  683. goto end;
  684. }
  685. success = true;
  686. end:
  687. g_free(vmsg.data);
  688. return success;
  689. }
  690. void
  691. vu_deinit(VuDev *dev)
  692. {
  693. int i;
  694. for (i = 0; i < dev->nregions; i++) {
  695. VuDevRegion *r = &dev->regions[i];
  696. void *m = (void *) (uintptr_t) r->mmap_addr;
  697. if (m != MAP_FAILED) {
  698. munmap(m, r->size + r->mmap_offset);
  699. }
  700. }
  701. dev->nregions = 0;
  702. for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
  703. VuVirtq *vq = &dev->vq[i];
  704. if (vq->call_fd != -1) {
  705. close(vq->call_fd);
  706. vq->call_fd = -1;
  707. }
  708. if (vq->kick_fd != -1) {
  709. close(vq->kick_fd);
  710. vq->kick_fd = -1;
  711. }
  712. if (vq->err_fd != -1) {
  713. close(vq->err_fd);
  714. vq->err_fd = -1;
  715. }
  716. }
  717. vu_close_log(dev);
  718. if (dev->sock != -1) {
  719. close(dev->sock);
  720. }
  721. }
  722. void
  723. vu_init(VuDev *dev,
  724. int socket,
  725. vu_panic_cb panic,
  726. vu_set_watch_cb set_watch,
  727. vu_remove_watch_cb remove_watch,
  728. const VuDevIface *iface)
  729. {
  730. int i;
  731. assert(socket >= 0);
  732. assert(set_watch);
  733. assert(remove_watch);
  734. assert(iface);
  735. assert(panic);
  736. memset(dev, 0, sizeof(*dev));
  737. dev->sock = socket;
  738. dev->panic = panic;
  739. dev->set_watch = set_watch;
  740. dev->remove_watch = remove_watch;
  741. dev->iface = iface;
  742. dev->log_call_fd = -1;
  743. for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
  744. dev->vq[i] = (VuVirtq) {
  745. .call_fd = -1, .kick_fd = -1, .err_fd = -1,
  746. .notification = true,
  747. };
  748. }
  749. }
  750. VuVirtq *
  751. vu_get_queue(VuDev *dev, int qidx)
  752. {
  753. assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
  754. return &dev->vq[qidx];
  755. }
  756. bool
  757. vu_queue_enabled(VuDev *dev, VuVirtq *vq)
  758. {
  759. return vq->enable;
  760. }
  761. static inline uint16_t
  762. vring_avail_flags(VuVirtq *vq)
  763. {
  764. return vq->vring.avail->flags;
  765. }
  766. static inline uint16_t
  767. vring_avail_idx(VuVirtq *vq)
  768. {
  769. vq->shadow_avail_idx = vq->vring.avail->idx;
  770. return vq->shadow_avail_idx;
  771. }
  772. static inline uint16_t
  773. vring_avail_ring(VuVirtq *vq, int i)
  774. {
  775. return vq->vring.avail->ring[i];
  776. }
  777. static inline uint16_t
  778. vring_get_used_event(VuVirtq *vq)
  779. {
  780. return vring_avail_ring(vq, vq->vring.num);
  781. }
  782. static int
  783. virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
  784. {
  785. uint16_t num_heads = vring_avail_idx(vq) - idx;
  786. /* Check it isn't doing very strange things with descriptor numbers. */
  787. if (num_heads > vq->vring.num) {
  788. vu_panic(dev, "Guest moved used index from %u to %u",
  789. idx, vq->shadow_avail_idx);
  790. return -1;
  791. }
  792. if (num_heads) {
  793. /* On success, callers read a descriptor at vq->last_avail_idx.
  794. * Make sure descriptor read does not bypass avail index read. */
  795. smp_rmb();
  796. }
  797. return num_heads;
  798. }
  799. static bool
  800. virtqueue_get_head(VuDev *dev, VuVirtq *vq,
  801. unsigned int idx, unsigned int *head)
  802. {
  803. /* Grab the next descriptor number they're advertising, and increment
  804. * the index we've seen. */
  805. *head = vring_avail_ring(vq, idx % vq->vring.num);
  806. /* If their number is silly, that's a fatal mistake. */
  807. if (*head >= vq->vring.num) {
  808. vu_panic(dev, "Guest says index %u is available", head);
  809. return false;
  810. }
  811. return true;
  812. }
  813. enum {
  814. VIRTQUEUE_READ_DESC_ERROR = -1,
  815. VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
  816. VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
  817. };
  818. static int
  819. virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
  820. int i, unsigned int max, unsigned int *next)
  821. {
  822. /* If this descriptor says it doesn't chain, we're done. */
  823. if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
  824. return VIRTQUEUE_READ_DESC_DONE;
  825. }
  826. /* Check they're not leading us off end of descriptors. */
  827. *next = desc[i].next;
  828. /* Make sure compiler knows to grab that: we don't want it changing! */
  829. smp_wmb();
  830. if (*next >= max) {
  831. vu_panic(dev, "Desc next is %u", next);
  832. return VIRTQUEUE_READ_DESC_ERROR;
  833. }
  834. return VIRTQUEUE_READ_DESC_MORE;
  835. }
  836. void
  837. vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
  838. unsigned int *out_bytes,
  839. unsigned max_in_bytes, unsigned max_out_bytes)
  840. {
  841. unsigned int idx;
  842. unsigned int total_bufs, in_total, out_total;
  843. int rc;
  844. idx = vq->last_avail_idx;
  845. total_bufs = in_total = out_total = 0;
  846. if (unlikely(dev->broken) ||
  847. unlikely(!vq->vring.avail)) {
  848. goto done;
  849. }
  850. while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
  851. unsigned int max, num_bufs, indirect = 0;
  852. struct vring_desc *desc;
  853. unsigned int i;
  854. max = vq->vring.num;
  855. num_bufs = total_bufs;
  856. if (!virtqueue_get_head(dev, vq, idx++, &i)) {
  857. goto err;
  858. }
  859. desc = vq->vring.desc;
  860. if (desc[i].flags & VRING_DESC_F_INDIRECT) {
  861. if (desc[i].len % sizeof(struct vring_desc)) {
  862. vu_panic(dev, "Invalid size for indirect buffer table");
  863. goto err;
  864. }
  865. /* If we've got too many, that implies a descriptor loop. */
  866. if (num_bufs >= max) {
  867. vu_panic(dev, "Looped descriptor");
  868. goto err;
  869. }
  870. /* loop over the indirect descriptor table */
  871. indirect = 1;
  872. max = desc[i].len / sizeof(struct vring_desc);
  873. desc = vu_gpa_to_va(dev, desc[i].addr);
  874. num_bufs = i = 0;
  875. }
  876. do {
  877. /* If we've got too many, that implies a descriptor loop. */
  878. if (++num_bufs > max) {
  879. vu_panic(dev, "Looped descriptor");
  880. goto err;
  881. }
  882. if (desc[i].flags & VRING_DESC_F_WRITE) {
  883. in_total += desc[i].len;
  884. } else {
  885. out_total += desc[i].len;
  886. }
  887. if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
  888. goto done;
  889. }
  890. rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
  891. } while (rc == VIRTQUEUE_READ_DESC_MORE);
  892. if (rc == VIRTQUEUE_READ_DESC_ERROR) {
  893. goto err;
  894. }
  895. if (!indirect) {
  896. total_bufs = num_bufs;
  897. } else {
  898. total_bufs++;
  899. }
  900. }
  901. if (rc < 0) {
  902. goto err;
  903. }
  904. done:
  905. if (in_bytes) {
  906. *in_bytes = in_total;
  907. }
  908. if (out_bytes) {
  909. *out_bytes = out_total;
  910. }
  911. return;
  912. err:
  913. in_total = out_total = 0;
  914. goto done;
  915. }
  916. bool
  917. vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
  918. unsigned int out_bytes)
  919. {
  920. unsigned int in_total, out_total;
  921. vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
  922. in_bytes, out_bytes);
  923. return in_bytes <= in_total && out_bytes <= out_total;
  924. }
  925. /* Fetch avail_idx from VQ memory only when we really need to know if
  926. * guest has added some buffers. */
  927. bool
  928. vu_queue_empty(VuDev *dev, VuVirtq *vq)
  929. {
  930. if (unlikely(dev->broken) ||
  931. unlikely(!vq->vring.avail)) {
  932. return true;
  933. }
  934. if (vq->shadow_avail_idx != vq->last_avail_idx) {
  935. return false;
  936. }
  937. return vring_avail_idx(vq) == vq->last_avail_idx;
  938. }
  939. static inline
  940. bool has_feature(uint64_t features, unsigned int fbit)
  941. {
  942. assert(fbit < 64);
  943. return !!(features & (1ULL << fbit));
  944. }
  945. static inline
  946. bool vu_has_feature(VuDev *dev,
  947. unsigned int fbit)
  948. {
  949. return has_feature(dev->features, fbit);
  950. }
  951. static bool
  952. vring_notify(VuDev *dev, VuVirtq *vq)
  953. {
  954. uint16_t old, new;
  955. bool v;
  956. /* We need to expose used array entries before checking used event. */
  957. smp_mb();
  958. /* Always notify when queue is empty (when feature acknowledge) */
  959. if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
  960. !vq->inuse && vu_queue_empty(dev, vq)) {
  961. return true;
  962. }
  963. if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
  964. return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
  965. }
  966. v = vq->signalled_used_valid;
  967. vq->signalled_used_valid = true;
  968. old = vq->signalled_used;
  969. new = vq->signalled_used = vq->used_idx;
  970. return !v || vring_need_event(vring_get_used_event(vq), new, old);
  971. }
  972. void
  973. vu_queue_notify(VuDev *dev, VuVirtq *vq)
  974. {
  975. if (unlikely(dev->broken) ||
  976. unlikely(!vq->vring.avail)) {
  977. return;
  978. }
  979. if (!vring_notify(dev, vq)) {
  980. DPRINT("skipped notify...\n");
  981. return;
  982. }
  983. if (eventfd_write(vq->call_fd, 1) < 0) {
  984. vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
  985. }
  986. }
  987. static inline void
  988. vring_used_flags_set_bit(VuVirtq *vq, int mask)
  989. {
  990. uint16_t *flags;
  991. flags = (uint16_t *)((char*)vq->vring.used +
  992. offsetof(struct vring_used, flags));
  993. *flags |= mask;
  994. }
  995. static inline void
  996. vring_used_flags_unset_bit(VuVirtq *vq, int mask)
  997. {
  998. uint16_t *flags;
  999. flags = (uint16_t *)((char*)vq->vring.used +
  1000. offsetof(struct vring_used, flags));
  1001. *flags &= ~mask;
  1002. }
  1003. static inline void
  1004. vring_set_avail_event(VuVirtq *vq, uint16_t val)
  1005. {
  1006. if (!vq->notification) {
  1007. return;
  1008. }
  1009. *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
  1010. }
  1011. void
  1012. vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
  1013. {
  1014. vq->notification = enable;
  1015. if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
  1016. vring_set_avail_event(vq, vring_avail_idx(vq));
  1017. } else if (enable) {
  1018. vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
  1019. } else {
  1020. vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
  1021. }
  1022. if (enable) {
  1023. /* Expose avail event/used flags before caller checks the avail idx. */
  1024. smp_mb();
  1025. }
  1026. }
  1027. static void
  1028. virtqueue_map_desc(VuDev *dev,
  1029. unsigned int *p_num_sg, struct iovec *iov,
  1030. unsigned int max_num_sg, bool is_write,
  1031. uint64_t pa, size_t sz)
  1032. {
  1033. unsigned num_sg = *p_num_sg;
  1034. assert(num_sg <= max_num_sg);
  1035. if (!sz) {
  1036. vu_panic(dev, "virtio: zero sized buffers are not allowed");
  1037. return;
  1038. }
  1039. iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
  1040. iov[num_sg].iov_len = sz;
  1041. num_sg++;
  1042. *p_num_sg = num_sg;
  1043. }
  1044. /* Round number down to multiple */
  1045. #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
  1046. /* Round number up to multiple */
  1047. #define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
  1048. static void *
  1049. virtqueue_alloc_element(size_t sz,
  1050. unsigned out_num, unsigned in_num)
  1051. {
  1052. VuVirtqElement *elem;
  1053. size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
  1054. size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
  1055. size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
  1056. assert(sz >= sizeof(VuVirtqElement));
  1057. elem = malloc(out_sg_end);
  1058. elem->out_num = out_num;
  1059. elem->in_num = in_num;
  1060. elem->in_sg = (void *)elem + in_sg_ofs;
  1061. elem->out_sg = (void *)elem + out_sg_ofs;
  1062. return elem;
  1063. }
  1064. void *
  1065. vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
  1066. {
  1067. unsigned int i, head, max;
  1068. VuVirtqElement *elem;
  1069. unsigned out_num, in_num;
  1070. struct iovec iov[VIRTQUEUE_MAX_SIZE];
  1071. struct vring_desc *desc;
  1072. int rc;
  1073. if (unlikely(dev->broken) ||
  1074. unlikely(!vq->vring.avail)) {
  1075. return NULL;
  1076. }
  1077. if (vu_queue_empty(dev, vq)) {
  1078. return NULL;
  1079. }
  1080. /* Needed after virtio_queue_empty(), see comment in
  1081. * virtqueue_num_heads(). */
  1082. smp_rmb();
  1083. /* When we start there are none of either input nor output. */
  1084. out_num = in_num = 0;
  1085. max = vq->vring.num;
  1086. if (vq->inuse >= vq->vring.num) {
  1087. vu_panic(dev, "Virtqueue size exceeded");
  1088. return NULL;
  1089. }
  1090. if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
  1091. return NULL;
  1092. }
  1093. if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
  1094. vring_set_avail_event(vq, vq->last_avail_idx);
  1095. }
  1096. i = head;
  1097. desc = vq->vring.desc;
  1098. if (desc[i].flags & VRING_DESC_F_INDIRECT) {
  1099. if (desc[i].len % sizeof(struct vring_desc)) {
  1100. vu_panic(dev, "Invalid size for indirect buffer table");
  1101. }
  1102. /* loop over the indirect descriptor table */
  1103. max = desc[i].len / sizeof(struct vring_desc);
  1104. desc = vu_gpa_to_va(dev, desc[i].addr);
  1105. i = 0;
  1106. }
  1107. /* Collect all the descriptors */
  1108. do {
  1109. if (desc[i].flags & VRING_DESC_F_WRITE) {
  1110. virtqueue_map_desc(dev, &in_num, iov + out_num,
  1111. VIRTQUEUE_MAX_SIZE - out_num, true,
  1112. desc[i].addr, desc[i].len);
  1113. } else {
  1114. if (in_num) {
  1115. vu_panic(dev, "Incorrect order for descriptors");
  1116. return NULL;
  1117. }
  1118. virtqueue_map_desc(dev, &out_num, iov,
  1119. VIRTQUEUE_MAX_SIZE, false,
  1120. desc[i].addr, desc[i].len);
  1121. }
  1122. /* If we've got too many, that implies a descriptor loop. */
  1123. if ((in_num + out_num) > max) {
  1124. vu_panic(dev, "Looped descriptor");
  1125. }
  1126. rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
  1127. } while (rc == VIRTQUEUE_READ_DESC_MORE);
  1128. if (rc == VIRTQUEUE_READ_DESC_ERROR) {
  1129. return NULL;
  1130. }
  1131. /* Now copy what we have collected and mapped */
  1132. elem = virtqueue_alloc_element(sz, out_num, in_num);
  1133. elem->index = head;
  1134. for (i = 0; i < out_num; i++) {
  1135. elem->out_sg[i] = iov[i];
  1136. }
  1137. for (i = 0; i < in_num; i++) {
  1138. elem->in_sg[i] = iov[out_num + i];
  1139. }
  1140. vq->inuse++;
  1141. return elem;
  1142. }
  1143. bool
  1144. vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
  1145. {
  1146. if (num > vq->inuse) {
  1147. return false;
  1148. }
  1149. vq->last_avail_idx -= num;
  1150. vq->inuse -= num;
  1151. return true;
  1152. }
  1153. static inline
  1154. void vring_used_write(VuDev *dev, VuVirtq *vq,
  1155. struct vring_used_elem *uelem, int i)
  1156. {
  1157. struct vring_used *used = vq->vring.used;
  1158. used->ring[i] = *uelem;
  1159. vu_log_write(dev, vq->vring.log_guest_addr +
  1160. offsetof(struct vring_used, ring[i]),
  1161. sizeof(used->ring[i]));
  1162. }
  1163. static void
  1164. vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
  1165. const VuVirtqElement *elem,
  1166. unsigned int len)
  1167. {
  1168. struct vring_desc *desc = vq->vring.desc;
  1169. unsigned int i, max, min;
  1170. unsigned num_bufs = 0;
  1171. max = vq->vring.num;
  1172. i = elem->index;
  1173. if (desc[i].flags & VRING_DESC_F_INDIRECT) {
  1174. if (desc[i].len % sizeof(struct vring_desc)) {
  1175. vu_panic(dev, "Invalid size for indirect buffer table");
  1176. }
  1177. /* loop over the indirect descriptor table */
  1178. max = desc[i].len / sizeof(struct vring_desc);
  1179. desc = vu_gpa_to_va(dev, desc[i].addr);
  1180. i = 0;
  1181. }
  1182. do {
  1183. if (++num_bufs > max) {
  1184. vu_panic(dev, "Looped descriptor");
  1185. return;
  1186. }
  1187. if (desc[i].flags & VRING_DESC_F_WRITE) {
  1188. min = MIN(desc[i].len, len);
  1189. vu_log_write(dev, desc[i].addr, min);
  1190. len -= min;
  1191. }
  1192. } while (len > 0 &&
  1193. (virtqueue_read_next_desc(dev, desc, i, max, &i)
  1194. == VIRTQUEUE_READ_DESC_MORE));
  1195. }
  1196. void
  1197. vu_queue_fill(VuDev *dev, VuVirtq *vq,
  1198. const VuVirtqElement *elem,
  1199. unsigned int len, unsigned int idx)
  1200. {
  1201. struct vring_used_elem uelem;
  1202. if (unlikely(dev->broken) ||
  1203. unlikely(!vq->vring.avail)) {
  1204. return;
  1205. }
  1206. vu_log_queue_fill(dev, vq, elem, len);
  1207. idx = (idx + vq->used_idx) % vq->vring.num;
  1208. uelem.id = elem->index;
  1209. uelem.len = len;
  1210. vring_used_write(dev, vq, &uelem, idx);
  1211. }
  1212. static inline
  1213. void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
  1214. {
  1215. vq->vring.used->idx = val;
  1216. vu_log_write(dev,
  1217. vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
  1218. sizeof(vq->vring.used->idx));
  1219. vq->used_idx = val;
  1220. }
  1221. void
  1222. vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
  1223. {
  1224. uint16_t old, new;
  1225. if (unlikely(dev->broken) ||
  1226. unlikely(!vq->vring.avail)) {
  1227. return;
  1228. }
  1229. /* Make sure buffer is written before we update index. */
  1230. smp_wmb();
  1231. old = vq->used_idx;
  1232. new = old + count;
  1233. vring_used_idx_set(dev, vq, new);
  1234. vq->inuse -= count;
  1235. if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
  1236. vq->signalled_used_valid = false;
  1237. }
  1238. }
  1239. void
  1240. vu_queue_push(VuDev *dev, VuVirtq *vq,
  1241. const VuVirtqElement *elem, unsigned int len)
  1242. {
  1243. vu_queue_fill(dev, vq, elem, len, 0);
  1244. vu_queue_flush(dev, vq, 1);
  1245. }