vhost_net.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * vhost-net support
  3. *
  4. * Copyright Red Hat, Inc. 2010
  5. *
  6. * Authors:
  7. * Michael S. Tsirkin <mst@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "net/net.h"
  17. #include "net/tap.h"
  18. #include "net/vhost-user.h"
  19. #include "net/vhost-vdpa.h"
  20. #include "standard-headers/linux/vhost_types.h"
  21. #include "hw/virtio/virtio-net.h"
  22. #include "net/vhost_net.h"
  23. #include "qapi/error.h"
  24. #include "qemu/error-report.h"
  25. #include "qemu/main-loop.h"
  26. #include <sys/socket.h>
  27. #include <net/if.h>
  28. #include <netinet/in.h>
  29. #include "standard-headers/linux/virtio_ring.h"
  30. #include "hw/virtio/vhost.h"
  31. #include "hw/virtio/virtio-bus.h"
  32. /* Features supported by host kernel. */
  33. static const int kernel_feature_bits[] = {
  34. VIRTIO_F_NOTIFY_ON_EMPTY,
  35. VIRTIO_RING_F_INDIRECT_DESC,
  36. VIRTIO_RING_F_EVENT_IDX,
  37. VIRTIO_NET_F_MRG_RXBUF,
  38. VIRTIO_F_VERSION_1,
  39. VIRTIO_NET_F_MTU,
  40. VIRTIO_F_IOMMU_PLATFORM,
  41. VIRTIO_F_RING_PACKED,
  42. VIRTIO_NET_F_HASH_REPORT,
  43. VHOST_INVALID_FEATURE_BIT
  44. };
  45. /* Features supported by others. */
  46. static const int user_feature_bits[] = {
  47. VIRTIO_F_NOTIFY_ON_EMPTY,
  48. VIRTIO_RING_F_INDIRECT_DESC,
  49. VIRTIO_RING_F_EVENT_IDX,
  50. VIRTIO_F_ANY_LAYOUT,
  51. VIRTIO_F_VERSION_1,
  52. VIRTIO_NET_F_CSUM,
  53. VIRTIO_NET_F_GUEST_CSUM,
  54. VIRTIO_NET_F_GSO,
  55. VIRTIO_NET_F_GUEST_TSO4,
  56. VIRTIO_NET_F_GUEST_TSO6,
  57. VIRTIO_NET_F_GUEST_ECN,
  58. VIRTIO_NET_F_GUEST_UFO,
  59. VIRTIO_NET_F_HOST_TSO4,
  60. VIRTIO_NET_F_HOST_TSO6,
  61. VIRTIO_NET_F_HOST_ECN,
  62. VIRTIO_NET_F_HOST_UFO,
  63. VIRTIO_NET_F_MRG_RXBUF,
  64. VIRTIO_NET_F_MTU,
  65. VIRTIO_F_IOMMU_PLATFORM,
  66. VIRTIO_F_RING_PACKED,
  67. VIRTIO_NET_F_RSS,
  68. VIRTIO_NET_F_HASH_REPORT,
  69. /* This bit implies RARP isn't sent by QEMU out of band */
  70. VIRTIO_NET_F_GUEST_ANNOUNCE,
  71. VIRTIO_NET_F_MQ,
  72. VHOST_INVALID_FEATURE_BIT
  73. };
  74. static const int *vhost_net_get_feature_bits(struct vhost_net *net)
  75. {
  76. const int *feature_bits = 0;
  77. switch (net->nc->info->type) {
  78. case NET_CLIENT_DRIVER_TAP:
  79. feature_bits = kernel_feature_bits;
  80. break;
  81. case NET_CLIENT_DRIVER_VHOST_USER:
  82. feature_bits = user_feature_bits;
  83. break;
  84. #ifdef CONFIG_VHOST_NET_VDPA
  85. case NET_CLIENT_DRIVER_VHOST_VDPA:
  86. feature_bits = vdpa_feature_bits;
  87. break;
  88. #endif
  89. default:
  90. error_report("Feature bits not defined for this type: %d",
  91. net->nc->info->type);
  92. break;
  93. }
  94. return feature_bits;
  95. }
  96. uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
  97. {
  98. return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
  99. features);
  100. }
  101. int vhost_net_get_config(struct vhost_net *net, uint8_t *config,
  102. uint32_t config_len)
  103. {
  104. return vhost_dev_get_config(&net->dev, config, config_len, NULL);
  105. }
  106. int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
  107. uint32_t offset, uint32_t size, uint32_t flags)
  108. {
  109. return vhost_dev_set_config(&net->dev, data, offset, size, flags);
  110. }
  111. void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
  112. {
  113. net->dev.acked_features = net->dev.backend_features;
  114. vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
  115. }
  116. uint64_t vhost_net_get_max_queues(VHostNetState *net)
  117. {
  118. return net->dev.max_queues;
  119. }
  120. uint64_t vhost_net_get_acked_features(VHostNetState *net)
  121. {
  122. return net->dev.acked_features;
  123. }
  124. static int vhost_net_get_fd(NetClientState *backend)
  125. {
  126. switch (backend->info->type) {
  127. case NET_CLIENT_DRIVER_TAP:
  128. return tap_get_fd(backend);
  129. default:
  130. fprintf(stderr, "vhost-net requires tap backend\n");
  131. return -ENOSYS;
  132. }
  133. }
  134. struct vhost_net *vhost_net_init(VhostNetOptions *options)
  135. {
  136. int r;
  137. bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
  138. struct vhost_net *net = g_new0(struct vhost_net, 1);
  139. uint64_t features = 0;
  140. Error *local_err = NULL;
  141. if (!options->net_backend) {
  142. fprintf(stderr, "vhost-net requires net backend to be setup\n");
  143. goto fail;
  144. }
  145. net->nc = options->net_backend;
  146. net->dev.nvqs = options->nvqs;
  147. net->dev.max_queues = 1;
  148. net->dev.vqs = net->vqs;
  149. if (backend_kernel) {
  150. r = vhost_net_get_fd(options->net_backend);
  151. if (r < 0) {
  152. goto fail;
  153. }
  154. net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
  155. ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
  156. net->backend = r;
  157. net->dev.protocol_features = 0;
  158. } else {
  159. net->dev.backend_features = 0;
  160. net->dev.protocol_features = 0;
  161. net->backend = -1;
  162. /* vhost-user needs vq_index to initiate a specific queue pair */
  163. net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
  164. }
  165. r = vhost_dev_init(&net->dev, options->opaque,
  166. options->backend_type, options->busyloop_timeout,
  167. &local_err);
  168. if (r < 0) {
  169. error_report_err(local_err);
  170. goto fail;
  171. }
  172. if (backend_kernel) {
  173. if (!qemu_has_vnet_hdr_len(options->net_backend,
  174. sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
  175. net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
  176. }
  177. if (~net->dev.features & net->dev.backend_features) {
  178. fprintf(stderr, "vhost lacks feature mask %" PRIu64
  179. " for backend\n",
  180. (uint64_t)(~net->dev.features & net->dev.backend_features));
  181. goto fail;
  182. }
  183. }
  184. /* Set sane init value. Override when guest acks. */
  185. #ifdef CONFIG_VHOST_NET_USER
  186. if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
  187. features = vhost_user_get_acked_features(net->nc);
  188. if (~net->dev.features & features) {
  189. fprintf(stderr, "vhost lacks feature mask %" PRIu64
  190. " for backend\n",
  191. (uint64_t)(~net->dev.features & features));
  192. goto fail;
  193. }
  194. }
  195. #endif
  196. vhost_net_ack_features(net, features);
  197. return net;
  198. fail:
  199. vhost_dev_cleanup(&net->dev);
  200. g_free(net);
  201. return NULL;
  202. }
  203. static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
  204. int vq_index_end)
  205. {
  206. net->dev.vq_index = vq_index;
  207. net->dev.vq_index_end = vq_index_end;
  208. }
  209. static int vhost_net_start_one(struct vhost_net *net,
  210. VirtIODevice *dev)
  211. {
  212. struct vhost_vring_file file = { };
  213. int r;
  214. r = vhost_dev_enable_notifiers(&net->dev, dev);
  215. if (r < 0) {
  216. goto fail_notifiers;
  217. }
  218. r = vhost_dev_start(&net->dev, dev);
  219. if (r < 0) {
  220. goto fail_start;
  221. }
  222. if (net->nc->info->poll) {
  223. net->nc->info->poll(net->nc, false);
  224. }
  225. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  226. qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
  227. file.fd = net->backend;
  228. for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
  229. if (!virtio_queue_enabled(dev, net->dev.vq_index +
  230. file.index)) {
  231. /* Queue might not be ready for start */
  232. continue;
  233. }
  234. r = vhost_net_set_backend(&net->dev, &file);
  235. if (r < 0) {
  236. r = -errno;
  237. goto fail;
  238. }
  239. }
  240. }
  241. return 0;
  242. fail:
  243. file.fd = -1;
  244. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  245. while (file.index-- > 0) {
  246. if (!virtio_queue_enabled(dev, net->dev.vq_index +
  247. file.index)) {
  248. /* Queue might not be ready for start */
  249. continue;
  250. }
  251. int r = vhost_net_set_backend(&net->dev, &file);
  252. assert(r >= 0);
  253. }
  254. }
  255. if (net->nc->info->poll) {
  256. net->nc->info->poll(net->nc, true);
  257. }
  258. vhost_dev_stop(&net->dev, dev);
  259. fail_start:
  260. vhost_dev_disable_notifiers(&net->dev, dev);
  261. fail_notifiers:
  262. return r;
  263. }
  264. static void vhost_net_stop_one(struct vhost_net *net,
  265. VirtIODevice *dev)
  266. {
  267. struct vhost_vring_file file = { .fd = -1 };
  268. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  269. for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
  270. int r = vhost_net_set_backend(&net->dev, &file);
  271. assert(r >= 0);
  272. }
  273. }
  274. if (net->nc->info->poll) {
  275. net->nc->info->poll(net->nc, true);
  276. }
  277. vhost_dev_stop(&net->dev, dev);
  278. vhost_dev_disable_notifiers(&net->dev, dev);
  279. }
  280. int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
  281. int data_queue_pairs, int cvq)
  282. {
  283. BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
  284. VirtioBusState *vbus = VIRTIO_BUS(qbus);
  285. VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
  286. int total_notifiers = data_queue_pairs * 2 + cvq;
  287. VirtIONet *n = VIRTIO_NET(dev);
  288. int nvhosts = data_queue_pairs + cvq;
  289. struct vhost_net *net;
  290. int r, e, i, index_end = data_queue_pairs * 2;
  291. NetClientState *peer;
  292. if (cvq) {
  293. index_end += 1;
  294. }
  295. if (!k->set_guest_notifiers) {
  296. error_report("binding does not support guest notifiers");
  297. return -ENOSYS;
  298. }
  299. for (i = 0; i < nvhosts; i++) {
  300. if (i < data_queue_pairs) {
  301. peer = qemu_get_peer(ncs, i);
  302. } else { /* Control Virtqueue */
  303. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  304. }
  305. net = get_vhost_net(peer);
  306. vhost_net_set_vq_index(net, i * 2, index_end);
  307. /* Suppress the masking guest notifiers on vhost user
  308. * because vhost user doesn't interrupt masking/unmasking
  309. * properly.
  310. */
  311. if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
  312. dev->use_guest_notifier_mask = false;
  313. }
  314. }
  315. r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
  316. if (r < 0) {
  317. error_report("Error binding guest notifier: %d", -r);
  318. goto err;
  319. }
  320. for (i = 0; i < nvhosts; i++) {
  321. if (i < data_queue_pairs) {
  322. peer = qemu_get_peer(ncs, i);
  323. } else {
  324. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  325. }
  326. r = vhost_net_start_one(get_vhost_net(peer), dev);
  327. if (r < 0) {
  328. goto err_start;
  329. }
  330. if (peer->vring_enable) {
  331. /* restore vring enable state */
  332. r = vhost_set_vring_enable(peer, peer->vring_enable);
  333. if (r < 0) {
  334. goto err_start;
  335. }
  336. }
  337. }
  338. return 0;
  339. err_start:
  340. while (--i >= 0) {
  341. peer = qemu_get_peer(ncs , i);
  342. vhost_net_stop_one(get_vhost_net(peer), dev);
  343. }
  344. e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
  345. if (e < 0) {
  346. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
  347. fflush(stderr);
  348. }
  349. err:
  350. return r;
  351. }
  352. void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
  353. int data_queue_pairs, int cvq)
  354. {
  355. BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
  356. VirtioBusState *vbus = VIRTIO_BUS(qbus);
  357. VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
  358. VirtIONet *n = VIRTIO_NET(dev);
  359. NetClientState *peer;
  360. int total_notifiers = data_queue_pairs * 2 + cvq;
  361. int nvhosts = data_queue_pairs + cvq;
  362. int i, r;
  363. for (i = 0; i < nvhosts; i++) {
  364. if (i < data_queue_pairs) {
  365. peer = qemu_get_peer(ncs, i);
  366. } else {
  367. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  368. }
  369. vhost_net_stop_one(get_vhost_net(peer), dev);
  370. }
  371. r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
  372. if (r < 0) {
  373. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
  374. fflush(stderr);
  375. }
  376. assert(r >= 0);
  377. }
  378. void vhost_net_cleanup(struct vhost_net *net)
  379. {
  380. vhost_dev_cleanup(&net->dev);
  381. }
  382. int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
  383. {
  384. const VhostOps *vhost_ops = net->dev.vhost_ops;
  385. assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
  386. assert(vhost_ops->vhost_migration_done);
  387. return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
  388. }
  389. bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
  390. {
  391. return vhost_virtqueue_pending(&net->dev, idx);
  392. }
  393. void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
  394. int idx, bool mask)
  395. {
  396. vhost_virtqueue_mask(&net->dev, dev, idx, mask);
  397. }
  398. VHostNetState *get_vhost_net(NetClientState *nc)
  399. {
  400. VHostNetState *vhost_net = 0;
  401. if (!nc) {
  402. return 0;
  403. }
  404. switch (nc->info->type) {
  405. case NET_CLIENT_DRIVER_TAP:
  406. vhost_net = tap_get_vhost_net(nc);
  407. break;
  408. #ifdef CONFIG_VHOST_NET_USER
  409. case NET_CLIENT_DRIVER_VHOST_USER:
  410. vhost_net = vhost_user_get_vhost_net(nc);
  411. assert(vhost_net);
  412. break;
  413. #endif
  414. #ifdef CONFIG_VHOST_NET_VDPA
  415. case NET_CLIENT_DRIVER_VHOST_VDPA:
  416. vhost_net = vhost_vdpa_get_vhost_net(nc);
  417. assert(vhost_net);
  418. break;
  419. #endif
  420. default:
  421. break;
  422. }
  423. return vhost_net;
  424. }
  425. int vhost_set_vring_enable(NetClientState *nc, int enable)
  426. {
  427. VHostNetState *net = get_vhost_net(nc);
  428. const VhostOps *vhost_ops = net->dev.vhost_ops;
  429. nc->vring_enable = enable;
  430. if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
  431. return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
  432. }
  433. return 0;
  434. }
  435. int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
  436. {
  437. const VhostOps *vhost_ops = net->dev.vhost_ops;
  438. if (!vhost_ops->vhost_net_set_mtu) {
  439. return 0;
  440. }
  441. return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
  442. }