vhost.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. /*
  2. * vhost support
  3. *
  4. * Copyright Red Hat, Inc. 2010
  5. *
  6. * Authors:
  7. * Michael S. Tsirkin <mst@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. */
  12. #include <sys/ioctl.h>
  13. #include "vhost.h"
  14. #include "hw/hw.h"
  15. #include "range.h"
  16. #include <linux/vhost.h>
  17. static void vhost_dev_sync_region(struct vhost_dev *dev,
  18. uint64_t mfirst, uint64_t mlast,
  19. uint64_t rfirst, uint64_t rlast)
  20. {
  21. uint64_t start = MAX(mfirst, rfirst);
  22. uint64_t end = MIN(mlast, rlast);
  23. vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
  24. vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
  25. uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
  26. assert(end / VHOST_LOG_CHUNK < dev->log_size);
  27. assert(start / VHOST_LOG_CHUNK < dev->log_size);
  28. if (end < start) {
  29. return;
  30. }
  31. for (;from < to; ++from) {
  32. vhost_log_chunk_t log;
  33. int bit;
  34. /* We first check with non-atomic: much cheaper,
  35. * and we expect non-dirty to be the common case. */
  36. if (!*from) {
  37. addr += VHOST_LOG_CHUNK;
  38. continue;
  39. }
  40. /* Data must be read atomically. We don't really
  41. * need the barrier semantics of __sync
  42. * builtins, but it's easier to use them than
  43. * roll our own. */
  44. log = __sync_fetch_and_and(from, 0);
  45. while ((bit = sizeof(log) > sizeof(int) ?
  46. ffsll(log) : ffs(log))) {
  47. ram_addr_t ram_addr;
  48. bit -= 1;
  49. ram_addr = cpu_get_physical_page_desc(addr + bit * VHOST_LOG_PAGE);
  50. cpu_physical_memory_set_dirty(ram_addr);
  51. log &= ~(0x1ull << bit);
  52. }
  53. addr += VHOST_LOG_CHUNK;
  54. }
  55. }
  56. static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
  57. target_phys_addr_t start_addr,
  58. target_phys_addr_t end_addr)
  59. {
  60. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  61. int i;
  62. if (!dev->log_enabled || !dev->started) {
  63. return 0;
  64. }
  65. for (i = 0; i < dev->mem->nregions; ++i) {
  66. struct vhost_memory_region *reg = dev->mem->regions + i;
  67. vhost_dev_sync_region(dev, start_addr, end_addr,
  68. reg->guest_phys_addr,
  69. range_get_last(reg->guest_phys_addr,
  70. reg->memory_size));
  71. }
  72. for (i = 0; i < dev->nvqs; ++i) {
  73. struct vhost_virtqueue *vq = dev->vqs + i;
  74. vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
  75. range_get_last(vq->used_phys, vq->used_size));
  76. }
  77. return 0;
  78. }
  79. /* Assign/unassign. Keep an unsorted array of non-overlapping
  80. * memory regions in dev->mem. */
  81. static void vhost_dev_unassign_memory(struct vhost_dev *dev,
  82. uint64_t start_addr,
  83. uint64_t size)
  84. {
  85. int from, to, n = dev->mem->nregions;
  86. /* Track overlapping/split regions for sanity checking. */
  87. int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
  88. for (from = 0, to = 0; from < n; ++from, ++to) {
  89. struct vhost_memory_region *reg = dev->mem->regions + to;
  90. uint64_t reglast;
  91. uint64_t memlast;
  92. uint64_t change;
  93. /* clone old region */
  94. if (to != from) {
  95. memcpy(reg, dev->mem->regions + from, sizeof *reg);
  96. }
  97. /* No overlap is simple */
  98. if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
  99. start_addr, size)) {
  100. continue;
  101. }
  102. /* Split only happens if supplied region
  103. * is in the middle of an existing one. Thus it can not
  104. * overlap with any other existing region. */
  105. assert(!split);
  106. reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  107. memlast = range_get_last(start_addr, size);
  108. /* Remove whole region */
  109. if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
  110. --dev->mem->nregions;
  111. --to;
  112. assert(to >= 0);
  113. ++overlap_middle;
  114. continue;
  115. }
  116. /* Shrink region */
  117. if (memlast >= reglast) {
  118. reg->memory_size = start_addr - reg->guest_phys_addr;
  119. assert(reg->memory_size);
  120. assert(!overlap_end);
  121. ++overlap_end;
  122. continue;
  123. }
  124. /* Shift region */
  125. if (start_addr <= reg->guest_phys_addr) {
  126. change = memlast + 1 - reg->guest_phys_addr;
  127. reg->memory_size -= change;
  128. reg->guest_phys_addr += change;
  129. reg->userspace_addr += change;
  130. assert(reg->memory_size);
  131. assert(!overlap_start);
  132. ++overlap_start;
  133. continue;
  134. }
  135. /* This only happens if supplied region
  136. * is in the middle of an existing one. Thus it can not
  137. * overlap with any other existing region. */
  138. assert(!overlap_start);
  139. assert(!overlap_end);
  140. assert(!overlap_middle);
  141. /* Split region: shrink first part, shift second part. */
  142. memcpy(dev->mem->regions + n, reg, sizeof *reg);
  143. reg->memory_size = start_addr - reg->guest_phys_addr;
  144. assert(reg->memory_size);
  145. change = memlast + 1 - reg->guest_phys_addr;
  146. reg = dev->mem->regions + n;
  147. reg->memory_size -= change;
  148. assert(reg->memory_size);
  149. reg->guest_phys_addr += change;
  150. reg->userspace_addr += change;
  151. /* Never add more than 1 region */
  152. assert(dev->mem->nregions == n);
  153. ++dev->mem->nregions;
  154. ++split;
  155. }
  156. }
  157. /* Called after unassign, so no regions overlap the given range. */
  158. static void vhost_dev_assign_memory(struct vhost_dev *dev,
  159. uint64_t start_addr,
  160. uint64_t size,
  161. uint64_t uaddr)
  162. {
  163. int from, to;
  164. struct vhost_memory_region *merged = NULL;
  165. for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
  166. struct vhost_memory_region *reg = dev->mem->regions + to;
  167. uint64_t prlast, urlast;
  168. uint64_t pmlast, umlast;
  169. uint64_t s, e, u;
  170. /* clone old region */
  171. if (to != from) {
  172. memcpy(reg, dev->mem->regions + from, sizeof *reg);
  173. }
  174. prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  175. pmlast = range_get_last(start_addr, size);
  176. urlast = range_get_last(reg->userspace_addr, reg->memory_size);
  177. umlast = range_get_last(uaddr, size);
  178. /* check for overlapping regions: should never happen. */
  179. assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
  180. /* Not an adjacent or overlapping region - do not merge. */
  181. if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
  182. (pmlast + 1 != reg->guest_phys_addr ||
  183. umlast + 1 != reg->userspace_addr)) {
  184. continue;
  185. }
  186. if (merged) {
  187. --to;
  188. assert(to >= 0);
  189. } else {
  190. merged = reg;
  191. }
  192. u = MIN(uaddr, reg->userspace_addr);
  193. s = MIN(start_addr, reg->guest_phys_addr);
  194. e = MAX(pmlast, prlast);
  195. uaddr = merged->userspace_addr = u;
  196. start_addr = merged->guest_phys_addr = s;
  197. size = merged->memory_size = e - s + 1;
  198. assert(merged->memory_size);
  199. }
  200. if (!merged) {
  201. struct vhost_memory_region *reg = dev->mem->regions + to;
  202. memset(reg, 0, sizeof *reg);
  203. reg->memory_size = size;
  204. assert(reg->memory_size);
  205. reg->guest_phys_addr = start_addr;
  206. reg->userspace_addr = uaddr;
  207. ++to;
  208. }
  209. assert(to <= dev->mem->nregions + 1);
  210. dev->mem->nregions = to;
  211. }
  212. static uint64_t vhost_get_log_size(struct vhost_dev *dev)
  213. {
  214. uint64_t log_size = 0;
  215. int i;
  216. for (i = 0; i < dev->mem->nregions; ++i) {
  217. struct vhost_memory_region *reg = dev->mem->regions + i;
  218. uint64_t last = range_get_last(reg->guest_phys_addr,
  219. reg->memory_size);
  220. log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
  221. }
  222. for (i = 0; i < dev->nvqs; ++i) {
  223. struct vhost_virtqueue *vq = dev->vqs + i;
  224. uint64_t last = vq->used_phys + vq->used_size - 1;
  225. log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
  226. }
  227. return log_size;
  228. }
  229. static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
  230. {
  231. vhost_log_chunk_t *log;
  232. uint64_t log_base;
  233. int r;
  234. if (size) {
  235. log = qemu_mallocz(size * sizeof *log);
  236. } else {
  237. log = NULL;
  238. }
  239. log_base = (uint64_t)(unsigned long)log;
  240. r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
  241. assert(r >= 0);
  242. vhost_client_sync_dirty_bitmap(&dev->client, 0,
  243. (target_phys_addr_t)~0x0ull);
  244. if (dev->log) {
  245. qemu_free(dev->log);
  246. }
  247. dev->log = log;
  248. dev->log_size = size;
  249. }
  250. static int vhost_verify_ring_mappings(struct vhost_dev *dev,
  251. uint64_t start_addr,
  252. uint64_t size)
  253. {
  254. int i;
  255. for (i = 0; i < dev->nvqs; ++i) {
  256. struct vhost_virtqueue *vq = dev->vqs + i;
  257. target_phys_addr_t l;
  258. void *p;
  259. if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
  260. continue;
  261. }
  262. l = vq->ring_size;
  263. p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
  264. if (!p || l != vq->ring_size) {
  265. fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
  266. return -ENOMEM;
  267. }
  268. if (p != vq->ring) {
  269. fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
  270. return -EBUSY;
  271. }
  272. cpu_physical_memory_unmap(p, l, 0, 0);
  273. }
  274. return 0;
  275. }
  276. static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
  277. uint64_t start_addr,
  278. uint64_t size)
  279. {
  280. int i, n = dev->mem->nregions;
  281. for (i = 0; i < n; ++i) {
  282. struct vhost_memory_region *reg = dev->mem->regions + i;
  283. if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
  284. start_addr, size)) {
  285. return reg;
  286. }
  287. }
  288. return NULL;
  289. }
  290. static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
  291. uint64_t start_addr,
  292. uint64_t size,
  293. uint64_t uaddr)
  294. {
  295. struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
  296. uint64_t reglast;
  297. uint64_t memlast;
  298. if (!reg) {
  299. return true;
  300. }
  301. reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  302. memlast = range_get_last(start_addr, size);
  303. /* Need to extend region? */
  304. if (start_addr < reg->guest_phys_addr || memlast > reglast) {
  305. return true;
  306. }
  307. /* userspace_addr changed? */
  308. return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
  309. }
  310. static void vhost_client_set_memory(CPUPhysMemoryClient *client,
  311. target_phys_addr_t start_addr,
  312. ram_addr_t size,
  313. ram_addr_t phys_offset,
  314. bool log_dirty)
  315. {
  316. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  317. ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
  318. int s = offsetof(struct vhost_memory, regions) +
  319. (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
  320. uint64_t log_size;
  321. int r;
  322. dev->mem = qemu_realloc(dev->mem, s);
  323. if (log_dirty) {
  324. flags = IO_MEM_UNASSIGNED;
  325. }
  326. assert(size);
  327. /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
  328. if (flags == IO_MEM_RAM) {
  329. if (!vhost_dev_cmp_memory(dev, start_addr, size,
  330. (uintptr_t)qemu_get_ram_ptr(phys_offset))) {
  331. /* Region exists with same address. Nothing to do. */
  332. return;
  333. }
  334. } else {
  335. if (!vhost_dev_find_reg(dev, start_addr, size)) {
  336. /* Removing region that we don't access. Nothing to do. */
  337. return;
  338. }
  339. }
  340. vhost_dev_unassign_memory(dev, start_addr, size);
  341. if (flags == IO_MEM_RAM) {
  342. /* Add given mapping, merging adjacent regions if any */
  343. vhost_dev_assign_memory(dev, start_addr, size,
  344. (uintptr_t)qemu_get_ram_ptr(phys_offset));
  345. } else {
  346. /* Remove old mapping for this memory, if any. */
  347. vhost_dev_unassign_memory(dev, start_addr, size);
  348. }
  349. if (!dev->started) {
  350. return;
  351. }
  352. if (dev->started) {
  353. r = vhost_verify_ring_mappings(dev, start_addr, size);
  354. assert(r >= 0);
  355. }
  356. if (!dev->log_enabled) {
  357. r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
  358. assert(r >= 0);
  359. return;
  360. }
  361. log_size = vhost_get_log_size(dev);
  362. /* We allocate an extra 4K bytes to log,
  363. * to reduce the * number of reallocations. */
  364. #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
  365. /* To log more, must increase log size before table update. */
  366. if (dev->log_size < log_size) {
  367. vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
  368. }
  369. r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
  370. assert(r >= 0);
  371. /* To log less, can only decrease log size after table update. */
  372. if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
  373. vhost_dev_log_resize(dev, log_size);
  374. }
  375. }
  376. static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
  377. struct vhost_virtqueue *vq,
  378. unsigned idx, bool enable_log)
  379. {
  380. struct vhost_vring_addr addr = {
  381. .index = idx,
  382. .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
  383. .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
  384. .used_user_addr = (uint64_t)(unsigned long)vq->used,
  385. .log_guest_addr = vq->used_phys,
  386. .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
  387. };
  388. int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
  389. if (r < 0) {
  390. return -errno;
  391. }
  392. return 0;
  393. }
  394. static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
  395. {
  396. uint64_t features = dev->acked_features;
  397. int r;
  398. if (enable_log) {
  399. features |= 0x1 << VHOST_F_LOG_ALL;
  400. }
  401. r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
  402. return r < 0 ? -errno : 0;
  403. }
  404. static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
  405. {
  406. int r, t, i;
  407. r = vhost_dev_set_features(dev, enable_log);
  408. if (r < 0) {
  409. goto err_features;
  410. }
  411. for (i = 0; i < dev->nvqs; ++i) {
  412. r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
  413. enable_log);
  414. if (r < 0) {
  415. goto err_vq;
  416. }
  417. }
  418. return 0;
  419. err_vq:
  420. for (; i >= 0; --i) {
  421. t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
  422. dev->log_enabled);
  423. assert(t >= 0);
  424. }
  425. t = vhost_dev_set_features(dev, dev->log_enabled);
  426. assert(t >= 0);
  427. err_features:
  428. return r;
  429. }
  430. static int vhost_client_migration_log(CPUPhysMemoryClient *client,
  431. int enable)
  432. {
  433. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  434. int r;
  435. if (!!enable == dev->log_enabled) {
  436. return 0;
  437. }
  438. if (!dev->started) {
  439. dev->log_enabled = enable;
  440. return 0;
  441. }
  442. if (!enable) {
  443. r = vhost_dev_set_log(dev, false);
  444. if (r < 0) {
  445. return r;
  446. }
  447. if (dev->log) {
  448. qemu_free(dev->log);
  449. }
  450. dev->log = NULL;
  451. dev->log_size = 0;
  452. } else {
  453. vhost_dev_log_resize(dev, vhost_get_log_size(dev));
  454. r = vhost_dev_set_log(dev, true);
  455. if (r < 0) {
  456. return r;
  457. }
  458. }
  459. dev->log_enabled = enable;
  460. return 0;
  461. }
  462. static int vhost_virtqueue_init(struct vhost_dev *dev,
  463. struct VirtIODevice *vdev,
  464. struct vhost_virtqueue *vq,
  465. unsigned idx)
  466. {
  467. target_phys_addr_t s, l, a;
  468. int r;
  469. struct vhost_vring_file file = {
  470. .index = idx,
  471. };
  472. struct vhost_vring_state state = {
  473. .index = idx,
  474. };
  475. struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
  476. if (!vdev->binding->set_host_notifier) {
  477. fprintf(stderr, "binding does not support host notifiers\n");
  478. return -ENOSYS;
  479. }
  480. vq->num = state.num = virtio_queue_get_num(vdev, idx);
  481. r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
  482. if (r) {
  483. return -errno;
  484. }
  485. state.num = virtio_queue_get_last_avail_idx(vdev, idx);
  486. r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
  487. if (r) {
  488. return -errno;
  489. }
  490. s = l = virtio_queue_get_desc_size(vdev, idx);
  491. a = virtio_queue_get_desc_addr(vdev, idx);
  492. vq->desc = cpu_physical_memory_map(a, &l, 0);
  493. if (!vq->desc || l != s) {
  494. r = -ENOMEM;
  495. goto fail_alloc_desc;
  496. }
  497. s = l = virtio_queue_get_avail_size(vdev, idx);
  498. a = virtio_queue_get_avail_addr(vdev, idx);
  499. vq->avail = cpu_physical_memory_map(a, &l, 0);
  500. if (!vq->avail || l != s) {
  501. r = -ENOMEM;
  502. goto fail_alloc_avail;
  503. }
  504. vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
  505. vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
  506. vq->used = cpu_physical_memory_map(a, &l, 1);
  507. if (!vq->used || l != s) {
  508. r = -ENOMEM;
  509. goto fail_alloc_used;
  510. }
  511. vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
  512. vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
  513. vq->ring = cpu_physical_memory_map(a, &l, 1);
  514. if (!vq->ring || l != s) {
  515. r = -ENOMEM;
  516. goto fail_alloc_ring;
  517. }
  518. r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
  519. if (r < 0) {
  520. r = -errno;
  521. goto fail_alloc;
  522. }
  523. r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
  524. if (r < 0) {
  525. fprintf(stderr, "Error binding host notifier: %d\n", -r);
  526. goto fail_host_notifier;
  527. }
  528. file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
  529. r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
  530. if (r) {
  531. r = -errno;
  532. goto fail_kick;
  533. }
  534. file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
  535. r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
  536. if (r) {
  537. r = -errno;
  538. goto fail_call;
  539. }
  540. return 0;
  541. fail_call:
  542. fail_kick:
  543. vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
  544. fail_host_notifier:
  545. fail_alloc:
  546. cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
  547. 0, 0);
  548. fail_alloc_ring:
  549. cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
  550. 0, 0);
  551. fail_alloc_used:
  552. cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
  553. 0, 0);
  554. fail_alloc_avail:
  555. cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
  556. 0, 0);
  557. fail_alloc_desc:
  558. return r;
  559. }
  560. static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
  561. struct VirtIODevice *vdev,
  562. struct vhost_virtqueue *vq,
  563. unsigned idx)
  564. {
  565. struct vhost_vring_state state = {
  566. .index = idx,
  567. };
  568. int r;
  569. r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
  570. if (r < 0) {
  571. fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
  572. fflush(stderr);
  573. }
  574. assert (r >= 0);
  575. r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
  576. if (r < 0) {
  577. fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
  578. fflush(stderr);
  579. }
  580. virtio_queue_set_last_avail_idx(vdev, idx, state.num);
  581. assert (r >= 0);
  582. cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
  583. 0, virtio_queue_get_ring_size(vdev, idx));
  584. cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
  585. 1, virtio_queue_get_used_size(vdev, idx));
  586. cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
  587. 0, virtio_queue_get_avail_size(vdev, idx));
  588. cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
  589. 0, virtio_queue_get_desc_size(vdev, idx));
  590. }
  591. int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
  592. {
  593. uint64_t features;
  594. int r;
  595. if (devfd >= 0) {
  596. hdev->control = devfd;
  597. } else {
  598. hdev->control = open("/dev/vhost-net", O_RDWR);
  599. if (hdev->control < 0) {
  600. return -errno;
  601. }
  602. }
  603. r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
  604. if (r < 0) {
  605. goto fail;
  606. }
  607. r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
  608. if (r < 0) {
  609. goto fail;
  610. }
  611. hdev->features = features;
  612. hdev->client.set_memory = vhost_client_set_memory;
  613. hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
  614. hdev->client.migration_log = vhost_client_migration_log;
  615. hdev->client.log_start = NULL;
  616. hdev->client.log_stop = NULL;
  617. hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
  618. hdev->log = NULL;
  619. hdev->log_size = 0;
  620. hdev->log_enabled = false;
  621. hdev->started = false;
  622. cpu_register_phys_memory_client(&hdev->client);
  623. hdev->force = force;
  624. return 0;
  625. fail:
  626. r = -errno;
  627. close(hdev->control);
  628. return r;
  629. }
  630. void vhost_dev_cleanup(struct vhost_dev *hdev)
  631. {
  632. cpu_unregister_phys_memory_client(&hdev->client);
  633. qemu_free(hdev->mem);
  634. close(hdev->control);
  635. }
  636. bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
  637. {
  638. return !vdev->binding->query_guest_notifiers ||
  639. vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
  640. hdev->force;
  641. }
  642. int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
  643. {
  644. int i, r;
  645. if (!vdev->binding->set_guest_notifiers) {
  646. fprintf(stderr, "binding does not support guest notifiers\n");
  647. r = -ENOSYS;
  648. goto fail;
  649. }
  650. r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
  651. if (r < 0) {
  652. fprintf(stderr, "Error binding guest notifier: %d\n", -r);
  653. goto fail_notifiers;
  654. }
  655. r = vhost_dev_set_features(hdev, hdev->log_enabled);
  656. if (r < 0) {
  657. goto fail_features;
  658. }
  659. r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
  660. if (r < 0) {
  661. r = -errno;
  662. goto fail_mem;
  663. }
  664. for (i = 0; i < hdev->nvqs; ++i) {
  665. r = vhost_virtqueue_init(hdev,
  666. vdev,
  667. hdev->vqs + i,
  668. i);
  669. if (r < 0) {
  670. goto fail_vq;
  671. }
  672. }
  673. if (hdev->log_enabled) {
  674. hdev->log_size = vhost_get_log_size(hdev);
  675. hdev->log = hdev->log_size ?
  676. qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
  677. r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
  678. (uint64_t)(unsigned long)hdev->log);
  679. if (r < 0) {
  680. r = -errno;
  681. goto fail_log;
  682. }
  683. }
  684. hdev->started = true;
  685. return 0;
  686. fail_log:
  687. fail_vq:
  688. while (--i >= 0) {
  689. vhost_virtqueue_cleanup(hdev,
  690. vdev,
  691. hdev->vqs + i,
  692. i);
  693. }
  694. fail_mem:
  695. fail_features:
  696. vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
  697. fail_notifiers:
  698. fail:
  699. return r;
  700. }
  701. void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
  702. {
  703. int i, r;
  704. for (i = 0; i < hdev->nvqs; ++i) {
  705. vhost_virtqueue_cleanup(hdev,
  706. vdev,
  707. hdev->vqs + i,
  708. i);
  709. }
  710. vhost_client_sync_dirty_bitmap(&hdev->client, 0,
  711. (target_phys_addr_t)~0x0ull);
  712. r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
  713. if (r < 0) {
  714. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
  715. fflush(stderr);
  716. }
  717. assert (r >= 0);
  718. hdev->started = false;
  719. qemu_free(hdev->log);
  720. hdev->log = NULL;
  721. hdev->log_size = 0;
  722. }