vhost.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. /*
  2. * vhost support
  3. *
  4. * Copyright Red Hat, Inc. 2010
  5. *
  6. * Authors:
  7. * Michael S. Tsirkin <mst@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. */
  12. #include <sys/ioctl.h>
  13. #include "vhost.h"
  14. #include "hw/hw.h"
  15. #include "range.h"
  16. #include <linux/vhost.h>
  17. static void vhost_dev_sync_region(struct vhost_dev *dev,
  18. uint64_t mfirst, uint64_t mlast,
  19. uint64_t rfirst, uint64_t rlast)
  20. {
  21. uint64_t start = MAX(mfirst, rfirst);
  22. uint64_t end = MIN(mlast, rlast);
  23. vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
  24. vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
  25. uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
  26. assert(end / VHOST_LOG_CHUNK < dev->log_size);
  27. assert(start / VHOST_LOG_CHUNK < dev->log_size);
  28. if (end < start) {
  29. return;
  30. }
  31. for (;from < to; ++from) {
  32. vhost_log_chunk_t log;
  33. int bit;
  34. /* We first check with non-atomic: much cheaper,
  35. * and we expect non-dirty to be the common case. */
  36. if (!*from) {
  37. addr += VHOST_LOG_CHUNK;
  38. continue;
  39. }
  40. /* Data must be read atomically. We don't really
  41. * need the barrier semantics of __sync
  42. * builtins, but it's easier to use them than
  43. * roll our own. */
  44. log = __sync_fetch_and_and(from, 0);
  45. while ((bit = sizeof(log) > sizeof(int) ?
  46. ffsll(log) : ffs(log))) {
  47. ram_addr_t ram_addr;
  48. bit -= 1;
  49. ram_addr = cpu_get_physical_page_desc(addr + bit * VHOST_LOG_PAGE);
  50. cpu_physical_memory_set_dirty(ram_addr);
  51. log &= ~(0x1ull << bit);
  52. }
  53. addr += VHOST_LOG_CHUNK;
  54. }
  55. }
  56. static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
  57. target_phys_addr_t start_addr,
  58. target_phys_addr_t end_addr)
  59. {
  60. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  61. int i;
  62. if (!dev->log_enabled || !dev->started) {
  63. return 0;
  64. }
  65. for (i = 0; i < dev->mem->nregions; ++i) {
  66. struct vhost_memory_region *reg = dev->mem->regions + i;
  67. vhost_dev_sync_region(dev, start_addr, end_addr,
  68. reg->guest_phys_addr,
  69. range_get_last(reg->guest_phys_addr,
  70. reg->memory_size));
  71. }
  72. for (i = 0; i < dev->nvqs; ++i) {
  73. struct vhost_virtqueue *vq = dev->vqs + i;
  74. vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
  75. range_get_last(vq->used_phys, vq->used_size));
  76. }
  77. return 0;
  78. }
  79. /* Assign/unassign. Keep an unsorted array of non-overlapping
  80. * memory regions in dev->mem. */
  81. static void vhost_dev_unassign_memory(struct vhost_dev *dev,
  82. uint64_t start_addr,
  83. uint64_t size)
  84. {
  85. int from, to, n = dev->mem->nregions;
  86. /* Track overlapping/split regions for sanity checking. */
  87. int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
  88. for (from = 0, to = 0; from < n; ++from, ++to) {
  89. struct vhost_memory_region *reg = dev->mem->regions + to;
  90. uint64_t reglast;
  91. uint64_t memlast;
  92. uint64_t change;
  93. /* clone old region */
  94. if (to != from) {
  95. memcpy(reg, dev->mem->regions + from, sizeof *reg);
  96. }
  97. /* No overlap is simple */
  98. if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
  99. start_addr, size)) {
  100. continue;
  101. }
  102. /* Split only happens if supplied region
  103. * is in the middle of an existing one. Thus it can not
  104. * overlap with any other existing region. */
  105. assert(!split);
  106. reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  107. memlast = range_get_last(start_addr, size);
  108. /* Remove whole region */
  109. if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
  110. --dev->mem->nregions;
  111. --to;
  112. ++overlap_middle;
  113. continue;
  114. }
  115. /* Shrink region */
  116. if (memlast >= reglast) {
  117. reg->memory_size = start_addr - reg->guest_phys_addr;
  118. assert(reg->memory_size);
  119. assert(!overlap_end);
  120. ++overlap_end;
  121. continue;
  122. }
  123. /* Shift region */
  124. if (start_addr <= reg->guest_phys_addr) {
  125. change = memlast + 1 - reg->guest_phys_addr;
  126. reg->memory_size -= change;
  127. reg->guest_phys_addr += change;
  128. reg->userspace_addr += change;
  129. assert(reg->memory_size);
  130. assert(!overlap_start);
  131. ++overlap_start;
  132. continue;
  133. }
  134. /* This only happens if supplied region
  135. * is in the middle of an existing one. Thus it can not
  136. * overlap with any other existing region. */
  137. assert(!overlap_start);
  138. assert(!overlap_end);
  139. assert(!overlap_middle);
  140. /* Split region: shrink first part, shift second part. */
  141. memcpy(dev->mem->regions + n, reg, sizeof *reg);
  142. reg->memory_size = start_addr - reg->guest_phys_addr;
  143. assert(reg->memory_size);
  144. change = memlast + 1 - reg->guest_phys_addr;
  145. reg = dev->mem->regions + n;
  146. reg->memory_size -= change;
  147. assert(reg->memory_size);
  148. reg->guest_phys_addr += change;
  149. reg->userspace_addr += change;
  150. /* Never add more than 1 region */
  151. assert(dev->mem->nregions == n);
  152. ++dev->mem->nregions;
  153. ++split;
  154. }
  155. }
  156. /* Called after unassign, so no regions overlap the given range. */
  157. static void vhost_dev_assign_memory(struct vhost_dev *dev,
  158. uint64_t start_addr,
  159. uint64_t size,
  160. uint64_t uaddr)
  161. {
  162. int from, to;
  163. struct vhost_memory_region *merged = NULL;
  164. for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
  165. struct vhost_memory_region *reg = dev->mem->regions + to;
  166. uint64_t prlast, urlast;
  167. uint64_t pmlast, umlast;
  168. uint64_t s, e, u;
  169. /* clone old region */
  170. if (to != from) {
  171. memcpy(reg, dev->mem->regions + from, sizeof *reg);
  172. }
  173. prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  174. pmlast = range_get_last(start_addr, size);
  175. urlast = range_get_last(reg->userspace_addr, reg->memory_size);
  176. umlast = range_get_last(uaddr, size);
  177. /* check for overlapping regions: should never happen. */
  178. assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
  179. /* Not an adjacent or overlapping region - do not merge. */
  180. if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
  181. (pmlast + 1 != reg->guest_phys_addr ||
  182. umlast + 1 != reg->userspace_addr)) {
  183. continue;
  184. }
  185. if (merged) {
  186. --to;
  187. assert(to >= 0);
  188. } else {
  189. merged = reg;
  190. }
  191. u = MIN(uaddr, reg->userspace_addr);
  192. s = MIN(start_addr, reg->guest_phys_addr);
  193. e = MAX(pmlast, prlast);
  194. uaddr = merged->userspace_addr = u;
  195. start_addr = merged->guest_phys_addr = s;
  196. size = merged->memory_size = e - s + 1;
  197. assert(merged->memory_size);
  198. }
  199. if (!merged) {
  200. struct vhost_memory_region *reg = dev->mem->regions + to;
  201. memset(reg, 0, sizeof *reg);
  202. reg->memory_size = size;
  203. assert(reg->memory_size);
  204. reg->guest_phys_addr = start_addr;
  205. reg->userspace_addr = uaddr;
  206. ++to;
  207. }
  208. assert(to <= dev->mem->nregions + 1);
  209. dev->mem->nregions = to;
  210. }
  211. static uint64_t vhost_get_log_size(struct vhost_dev *dev)
  212. {
  213. uint64_t log_size = 0;
  214. int i;
  215. for (i = 0; i < dev->mem->nregions; ++i) {
  216. struct vhost_memory_region *reg = dev->mem->regions + i;
  217. uint64_t last = range_get_last(reg->guest_phys_addr,
  218. reg->memory_size);
  219. log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
  220. }
  221. for (i = 0; i < dev->nvqs; ++i) {
  222. struct vhost_virtqueue *vq = dev->vqs + i;
  223. uint64_t last = vq->used_phys + vq->used_size - 1;
  224. log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
  225. }
  226. return log_size;
  227. }
  228. static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
  229. {
  230. vhost_log_chunk_t *log;
  231. uint64_t log_base;
  232. int r;
  233. if (size) {
  234. log = g_malloc0(size * sizeof *log);
  235. } else {
  236. log = NULL;
  237. }
  238. log_base = (uint64_t)(unsigned long)log;
  239. r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
  240. assert(r >= 0);
  241. vhost_client_sync_dirty_bitmap(&dev->client, 0,
  242. (target_phys_addr_t)~0x0ull);
  243. if (dev->log) {
  244. g_free(dev->log);
  245. }
  246. dev->log = log;
  247. dev->log_size = size;
  248. }
  249. static int vhost_verify_ring_mappings(struct vhost_dev *dev,
  250. uint64_t start_addr,
  251. uint64_t size)
  252. {
  253. int i;
  254. for (i = 0; i < dev->nvqs; ++i) {
  255. struct vhost_virtqueue *vq = dev->vqs + i;
  256. target_phys_addr_t l;
  257. void *p;
  258. if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
  259. continue;
  260. }
  261. l = vq->ring_size;
  262. p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
  263. if (!p || l != vq->ring_size) {
  264. fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
  265. return -ENOMEM;
  266. }
  267. if (p != vq->ring) {
  268. fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
  269. return -EBUSY;
  270. }
  271. cpu_physical_memory_unmap(p, l, 0, 0);
  272. }
  273. return 0;
  274. }
  275. static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
  276. uint64_t start_addr,
  277. uint64_t size)
  278. {
  279. int i, n = dev->mem->nregions;
  280. for (i = 0; i < n; ++i) {
  281. struct vhost_memory_region *reg = dev->mem->regions + i;
  282. if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
  283. start_addr, size)) {
  284. return reg;
  285. }
  286. }
  287. return NULL;
  288. }
  289. static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
  290. uint64_t start_addr,
  291. uint64_t size,
  292. uint64_t uaddr)
  293. {
  294. struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
  295. uint64_t reglast;
  296. uint64_t memlast;
  297. if (!reg) {
  298. return true;
  299. }
  300. reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
  301. memlast = range_get_last(start_addr, size);
  302. /* Need to extend region? */
  303. if (start_addr < reg->guest_phys_addr || memlast > reglast) {
  304. return true;
  305. }
  306. /* userspace_addr changed? */
  307. return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
  308. }
  309. static void vhost_client_set_memory(CPUPhysMemoryClient *client,
  310. target_phys_addr_t start_addr,
  311. ram_addr_t size,
  312. ram_addr_t phys_offset,
  313. bool log_dirty)
  314. {
  315. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  316. ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
  317. int s = offsetof(struct vhost_memory, regions) +
  318. (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
  319. uint64_t log_size;
  320. int r;
  321. dev->mem = g_realloc(dev->mem, s);
  322. if (log_dirty) {
  323. flags = IO_MEM_UNASSIGNED;
  324. }
  325. assert(size);
  326. /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
  327. if (flags == IO_MEM_RAM) {
  328. if (!vhost_dev_cmp_memory(dev, start_addr, size,
  329. (uintptr_t)qemu_get_ram_ptr(phys_offset))) {
  330. /* Region exists with same address. Nothing to do. */
  331. return;
  332. }
  333. } else {
  334. if (!vhost_dev_find_reg(dev, start_addr, size)) {
  335. /* Removing region that we don't access. Nothing to do. */
  336. return;
  337. }
  338. }
  339. vhost_dev_unassign_memory(dev, start_addr, size);
  340. if (flags == IO_MEM_RAM) {
  341. /* Add given mapping, merging adjacent regions if any */
  342. vhost_dev_assign_memory(dev, start_addr, size,
  343. (uintptr_t)qemu_get_ram_ptr(phys_offset));
  344. } else {
  345. /* Remove old mapping for this memory, if any. */
  346. vhost_dev_unassign_memory(dev, start_addr, size);
  347. }
  348. if (!dev->started) {
  349. return;
  350. }
  351. if (dev->started) {
  352. r = vhost_verify_ring_mappings(dev, start_addr, size);
  353. assert(r >= 0);
  354. }
  355. if (!dev->log_enabled) {
  356. r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
  357. assert(r >= 0);
  358. return;
  359. }
  360. log_size = vhost_get_log_size(dev);
  361. /* We allocate an extra 4K bytes to log,
  362. * to reduce the * number of reallocations. */
  363. #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
  364. /* To log more, must increase log size before table update. */
  365. if (dev->log_size < log_size) {
  366. vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
  367. }
  368. r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
  369. assert(r >= 0);
  370. /* To log less, can only decrease log size after table update. */
  371. if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
  372. vhost_dev_log_resize(dev, log_size);
  373. }
  374. }
  375. static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
  376. struct vhost_virtqueue *vq,
  377. unsigned idx, bool enable_log)
  378. {
  379. struct vhost_vring_addr addr = {
  380. .index = idx,
  381. .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
  382. .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
  383. .used_user_addr = (uint64_t)(unsigned long)vq->used,
  384. .log_guest_addr = vq->used_phys,
  385. .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
  386. };
  387. int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
  388. if (r < 0) {
  389. return -errno;
  390. }
  391. return 0;
  392. }
  393. static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
  394. {
  395. uint64_t features = dev->acked_features;
  396. int r;
  397. if (enable_log) {
  398. features |= 0x1 << VHOST_F_LOG_ALL;
  399. }
  400. r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
  401. return r < 0 ? -errno : 0;
  402. }
  403. static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
  404. {
  405. int r, t, i;
  406. r = vhost_dev_set_features(dev, enable_log);
  407. if (r < 0) {
  408. goto err_features;
  409. }
  410. for (i = 0; i < dev->nvqs; ++i) {
  411. r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
  412. enable_log);
  413. if (r < 0) {
  414. goto err_vq;
  415. }
  416. }
  417. return 0;
  418. err_vq:
  419. for (; i >= 0; --i) {
  420. t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
  421. dev->log_enabled);
  422. assert(t >= 0);
  423. }
  424. t = vhost_dev_set_features(dev, dev->log_enabled);
  425. assert(t >= 0);
  426. err_features:
  427. return r;
  428. }
  429. static int vhost_client_migration_log(CPUPhysMemoryClient *client,
  430. int enable)
  431. {
  432. struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
  433. int r;
  434. if (!!enable == dev->log_enabled) {
  435. return 0;
  436. }
  437. if (!dev->started) {
  438. dev->log_enabled = enable;
  439. return 0;
  440. }
  441. if (!enable) {
  442. r = vhost_dev_set_log(dev, false);
  443. if (r < 0) {
  444. return r;
  445. }
  446. if (dev->log) {
  447. g_free(dev->log);
  448. }
  449. dev->log = NULL;
  450. dev->log_size = 0;
  451. } else {
  452. vhost_dev_log_resize(dev, vhost_get_log_size(dev));
  453. r = vhost_dev_set_log(dev, true);
  454. if (r < 0) {
  455. return r;
  456. }
  457. }
  458. dev->log_enabled = enable;
  459. return 0;
  460. }
  461. static int vhost_virtqueue_init(struct vhost_dev *dev,
  462. struct VirtIODevice *vdev,
  463. struct vhost_virtqueue *vq,
  464. unsigned idx)
  465. {
  466. target_phys_addr_t s, l, a;
  467. int r;
  468. struct vhost_vring_file file = {
  469. .index = idx,
  470. };
  471. struct vhost_vring_state state = {
  472. .index = idx,
  473. };
  474. struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
  475. vq->num = state.num = virtio_queue_get_num(vdev, idx);
  476. r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
  477. if (r) {
  478. return -errno;
  479. }
  480. state.num = virtio_queue_get_last_avail_idx(vdev, idx);
  481. r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
  482. if (r) {
  483. return -errno;
  484. }
  485. s = l = virtio_queue_get_desc_size(vdev, idx);
  486. a = virtio_queue_get_desc_addr(vdev, idx);
  487. vq->desc = cpu_physical_memory_map(a, &l, 0);
  488. if (!vq->desc || l != s) {
  489. r = -ENOMEM;
  490. goto fail_alloc_desc;
  491. }
  492. s = l = virtio_queue_get_avail_size(vdev, idx);
  493. a = virtio_queue_get_avail_addr(vdev, idx);
  494. vq->avail = cpu_physical_memory_map(a, &l, 0);
  495. if (!vq->avail || l != s) {
  496. r = -ENOMEM;
  497. goto fail_alloc_avail;
  498. }
  499. vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
  500. vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
  501. vq->used = cpu_physical_memory_map(a, &l, 1);
  502. if (!vq->used || l != s) {
  503. r = -ENOMEM;
  504. goto fail_alloc_used;
  505. }
  506. vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
  507. vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
  508. vq->ring = cpu_physical_memory_map(a, &l, 1);
  509. if (!vq->ring || l != s) {
  510. r = -ENOMEM;
  511. goto fail_alloc_ring;
  512. }
  513. r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
  514. if (r < 0) {
  515. r = -errno;
  516. goto fail_alloc;
  517. }
  518. file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
  519. r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
  520. if (r) {
  521. r = -errno;
  522. goto fail_kick;
  523. }
  524. file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
  525. r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
  526. if (r) {
  527. r = -errno;
  528. goto fail_call;
  529. }
  530. return 0;
  531. fail_call:
  532. fail_kick:
  533. fail_alloc:
  534. cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
  535. 0, 0);
  536. fail_alloc_ring:
  537. cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
  538. 0, 0);
  539. fail_alloc_used:
  540. cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
  541. 0, 0);
  542. fail_alloc_avail:
  543. cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
  544. 0, 0);
  545. fail_alloc_desc:
  546. return r;
  547. }
  548. static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
  549. struct VirtIODevice *vdev,
  550. struct vhost_virtqueue *vq,
  551. unsigned idx)
  552. {
  553. struct vhost_vring_state state = {
  554. .index = idx,
  555. };
  556. int r;
  557. r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
  558. if (r < 0) {
  559. fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
  560. fflush(stderr);
  561. }
  562. virtio_queue_set_last_avail_idx(vdev, idx, state.num);
  563. assert (r >= 0);
  564. cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
  565. 0, virtio_queue_get_ring_size(vdev, idx));
  566. cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
  567. 1, virtio_queue_get_used_size(vdev, idx));
  568. cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
  569. 0, virtio_queue_get_avail_size(vdev, idx));
  570. cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
  571. 0, virtio_queue_get_desc_size(vdev, idx));
  572. }
  573. int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
  574. {
  575. uint64_t features;
  576. int r;
  577. if (devfd >= 0) {
  578. hdev->control = devfd;
  579. } else {
  580. hdev->control = open("/dev/vhost-net", O_RDWR);
  581. if (hdev->control < 0) {
  582. return -errno;
  583. }
  584. }
  585. r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
  586. if (r < 0) {
  587. goto fail;
  588. }
  589. r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
  590. if (r < 0) {
  591. goto fail;
  592. }
  593. hdev->features = features;
  594. hdev->client.set_memory = vhost_client_set_memory;
  595. hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
  596. hdev->client.migration_log = vhost_client_migration_log;
  597. hdev->client.log_start = NULL;
  598. hdev->client.log_stop = NULL;
  599. hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
  600. hdev->log = NULL;
  601. hdev->log_size = 0;
  602. hdev->log_enabled = false;
  603. hdev->started = false;
  604. cpu_register_phys_memory_client(&hdev->client);
  605. hdev->force = force;
  606. return 0;
  607. fail:
  608. r = -errno;
  609. close(hdev->control);
  610. return r;
  611. }
  612. void vhost_dev_cleanup(struct vhost_dev *hdev)
  613. {
  614. cpu_unregister_phys_memory_client(&hdev->client);
  615. g_free(hdev->mem);
  616. close(hdev->control);
  617. }
  618. bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
  619. {
  620. return !vdev->binding->query_guest_notifiers ||
  621. vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
  622. hdev->force;
  623. }
  624. /* Stop processing guest IO notifications in qemu.
  625. * Start processing them in vhost in kernel.
  626. */
  627. int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
  628. {
  629. int i, r;
  630. if (!vdev->binding->set_host_notifier) {
  631. fprintf(stderr, "binding does not support host notifiers\n");
  632. r = -ENOSYS;
  633. goto fail;
  634. }
  635. for (i = 0; i < hdev->nvqs; ++i) {
  636. r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
  637. if (r < 0) {
  638. fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
  639. goto fail_vq;
  640. }
  641. }
  642. return 0;
  643. fail_vq:
  644. while (--i >= 0) {
  645. r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
  646. if (r < 0) {
  647. fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
  648. fflush(stderr);
  649. }
  650. assert (r >= 0);
  651. }
  652. fail:
  653. return r;
  654. }
  655. /* Stop processing guest IO notifications in vhost.
  656. * Start processing them in qemu.
  657. * This might actually run the qemu handlers right away,
  658. * so virtio in qemu must be completely setup when this is called.
  659. */
  660. void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
  661. {
  662. int i, r;
  663. for (i = 0; i < hdev->nvqs; ++i) {
  664. r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
  665. if (r < 0) {
  666. fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
  667. fflush(stderr);
  668. }
  669. assert (r >= 0);
  670. }
  671. }
  672. /* Host notifiers must be enabled at this point. */
  673. int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
  674. {
  675. int i, r;
  676. if (!vdev->binding->set_guest_notifiers) {
  677. fprintf(stderr, "binding does not support guest notifiers\n");
  678. r = -ENOSYS;
  679. goto fail;
  680. }
  681. r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
  682. if (r < 0) {
  683. fprintf(stderr, "Error binding guest notifier: %d\n", -r);
  684. goto fail_notifiers;
  685. }
  686. r = vhost_dev_set_features(hdev, hdev->log_enabled);
  687. if (r < 0) {
  688. goto fail_features;
  689. }
  690. r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
  691. if (r < 0) {
  692. r = -errno;
  693. goto fail_mem;
  694. }
  695. for (i = 0; i < hdev->nvqs; ++i) {
  696. r = vhost_virtqueue_init(hdev,
  697. vdev,
  698. hdev->vqs + i,
  699. i);
  700. if (r < 0) {
  701. goto fail_vq;
  702. }
  703. }
  704. if (hdev->log_enabled) {
  705. hdev->log_size = vhost_get_log_size(hdev);
  706. hdev->log = hdev->log_size ?
  707. g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
  708. r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
  709. (uint64_t)(unsigned long)hdev->log);
  710. if (r < 0) {
  711. r = -errno;
  712. goto fail_log;
  713. }
  714. }
  715. hdev->started = true;
  716. return 0;
  717. fail_log:
  718. fail_vq:
  719. while (--i >= 0) {
  720. vhost_virtqueue_cleanup(hdev,
  721. vdev,
  722. hdev->vqs + i,
  723. i);
  724. }
  725. fail_mem:
  726. fail_features:
  727. vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
  728. fail_notifiers:
  729. fail:
  730. return r;
  731. }
  732. /* Host notifiers must be enabled at this point. */
  733. void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
  734. {
  735. int i, r;
  736. for (i = 0; i < hdev->nvqs; ++i) {
  737. vhost_virtqueue_cleanup(hdev,
  738. vdev,
  739. hdev->vqs + i,
  740. i);
  741. }
  742. vhost_client_sync_dirty_bitmap(&hdev->client, 0,
  743. (target_phys_addr_t)~0x0ull);
  744. r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
  745. if (r < 0) {
  746. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
  747. fflush(stderr);
  748. }
  749. assert (r >= 0);
  750. hdev->started = false;
  751. g_free(hdev->log);
  752. hdev->log = NULL;
  753. hdev->log_size = 0;
  754. }