vhost_net.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /*
  2. * vhost-net support
  3. *
  4. * Copyright Red Hat, Inc. 2010
  5. *
  6. * Authors:
  7. * Michael S. Tsirkin <mst@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "net/net.h"
  17. #include "net/tap.h"
  18. #include "net/vhost-user.h"
  19. #include "net/vhost-vdpa.h"
  20. #include "standard-headers/linux/vhost_types.h"
  21. #include "hw/virtio/virtio-net.h"
  22. #include "net/vhost_net.h"
  23. #include "qapi/error.h"
  24. #include "qemu/error-report.h"
  25. #include "qemu/main-loop.h"
  26. #include <sys/socket.h>
  27. #include <net/if.h>
  28. #include <netinet/in.h>
  29. #include "standard-headers/linux/virtio_ring.h"
  30. #include "hw/virtio/vhost.h"
  31. #include "hw/virtio/virtio-bus.h"
  32. #include "linux-headers/linux/vhost.h"
  33. /* Features supported by host kernel. */
  34. static const int kernel_feature_bits[] = {
  35. VIRTIO_F_NOTIFY_ON_EMPTY,
  36. VIRTIO_RING_F_INDIRECT_DESC,
  37. VIRTIO_RING_F_EVENT_IDX,
  38. VIRTIO_NET_F_MRG_RXBUF,
  39. VIRTIO_F_VERSION_1,
  40. VIRTIO_NET_F_MTU,
  41. VIRTIO_F_IOMMU_PLATFORM,
  42. VIRTIO_F_RING_PACKED,
  43. VIRTIO_F_RING_RESET,
  44. VIRTIO_NET_F_HASH_REPORT,
  45. VHOST_INVALID_FEATURE_BIT
  46. };
  47. /* Features supported by others. */
  48. static const int user_feature_bits[] = {
  49. VIRTIO_F_NOTIFY_ON_EMPTY,
  50. VIRTIO_RING_F_INDIRECT_DESC,
  51. VIRTIO_RING_F_EVENT_IDX,
  52. VIRTIO_F_ANY_LAYOUT,
  53. VIRTIO_F_VERSION_1,
  54. VIRTIO_NET_F_CSUM,
  55. VIRTIO_NET_F_GUEST_CSUM,
  56. VIRTIO_NET_F_GSO,
  57. VIRTIO_NET_F_GUEST_TSO4,
  58. VIRTIO_NET_F_GUEST_TSO6,
  59. VIRTIO_NET_F_GUEST_ECN,
  60. VIRTIO_NET_F_GUEST_UFO,
  61. VIRTIO_NET_F_HOST_TSO4,
  62. VIRTIO_NET_F_HOST_TSO6,
  63. VIRTIO_NET_F_HOST_ECN,
  64. VIRTIO_NET_F_HOST_UFO,
  65. VIRTIO_NET_F_MRG_RXBUF,
  66. VIRTIO_NET_F_MTU,
  67. VIRTIO_F_IOMMU_PLATFORM,
  68. VIRTIO_F_RING_PACKED,
  69. VIRTIO_F_RING_RESET,
  70. VIRTIO_NET_F_RSS,
  71. VIRTIO_NET_F_HASH_REPORT,
  72. /* This bit implies RARP isn't sent by QEMU out of band */
  73. VIRTIO_NET_F_GUEST_ANNOUNCE,
  74. VIRTIO_NET_F_MQ,
  75. VHOST_INVALID_FEATURE_BIT
  76. };
  77. static const int *vhost_net_get_feature_bits(struct vhost_net *net)
  78. {
  79. const int *feature_bits = 0;
  80. switch (net->nc->info->type) {
  81. case NET_CLIENT_DRIVER_TAP:
  82. feature_bits = kernel_feature_bits;
  83. break;
  84. case NET_CLIENT_DRIVER_VHOST_USER:
  85. feature_bits = user_feature_bits;
  86. break;
  87. #ifdef CONFIG_VHOST_NET_VDPA
  88. case NET_CLIENT_DRIVER_VHOST_VDPA:
  89. feature_bits = vdpa_feature_bits;
  90. break;
  91. #endif
  92. default:
  93. error_report("Feature bits not defined for this type: %d",
  94. net->nc->info->type);
  95. break;
  96. }
  97. return feature_bits;
  98. }
  99. uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
  100. {
  101. return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
  102. features);
  103. }
  104. int vhost_net_get_config(struct vhost_net *net, uint8_t *config,
  105. uint32_t config_len)
  106. {
  107. return vhost_dev_get_config(&net->dev, config, config_len, NULL);
  108. }
  109. int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
  110. uint32_t offset, uint32_t size, uint32_t flags)
  111. {
  112. return vhost_dev_set_config(&net->dev, data, offset, size, flags);
  113. }
  114. void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
  115. {
  116. net->dev.acked_features = net->dev.backend_features;
  117. vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
  118. }
  119. uint64_t vhost_net_get_max_queues(VHostNetState *net)
  120. {
  121. return net->dev.max_queues;
  122. }
  123. uint64_t vhost_net_get_acked_features(VHostNetState *net)
  124. {
  125. return net->dev.acked_features;
  126. }
  127. void vhost_net_save_acked_features(NetClientState *nc)
  128. {
  129. #ifdef CONFIG_VHOST_NET_USER
  130. if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
  131. vhost_user_save_acked_features(nc);
  132. }
  133. #endif
  134. }
  135. static int vhost_net_get_fd(NetClientState *backend)
  136. {
  137. switch (backend->info->type) {
  138. case NET_CLIENT_DRIVER_TAP:
  139. return tap_get_fd(backend);
  140. default:
  141. fprintf(stderr, "vhost-net requires tap backend\n");
  142. return -ENOSYS;
  143. }
  144. }
  145. struct vhost_net *vhost_net_init(VhostNetOptions *options)
  146. {
  147. int r;
  148. bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
  149. struct vhost_net *net = g_new0(struct vhost_net, 1);
  150. uint64_t features = 0;
  151. Error *local_err = NULL;
  152. if (!options->net_backend) {
  153. fprintf(stderr, "vhost-net requires net backend to be setup\n");
  154. goto fail;
  155. }
  156. net->nc = options->net_backend;
  157. net->dev.nvqs = options->nvqs;
  158. net->dev.max_queues = 1;
  159. net->dev.vqs = net->vqs;
  160. if (backend_kernel) {
  161. r = vhost_net_get_fd(options->net_backend);
  162. if (r < 0) {
  163. goto fail;
  164. }
  165. net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
  166. ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
  167. net->backend = r;
  168. net->dev.protocol_features = 0;
  169. } else {
  170. net->dev.backend_features = 0;
  171. net->dev.protocol_features = 0;
  172. net->backend = -1;
  173. /* vhost-user needs vq_index to initiate a specific queue pair */
  174. net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
  175. }
  176. r = vhost_dev_init(&net->dev, options->opaque,
  177. options->backend_type, options->busyloop_timeout,
  178. &local_err);
  179. if (r < 0) {
  180. error_report_err(local_err);
  181. goto fail;
  182. }
  183. if (backend_kernel) {
  184. if (!qemu_has_vnet_hdr_len(options->net_backend,
  185. sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
  186. net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
  187. }
  188. if (~net->dev.features & net->dev.backend_features) {
  189. fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
  190. " for backend\n",
  191. (uint64_t)(~net->dev.features & net->dev.backend_features));
  192. goto fail;
  193. }
  194. }
  195. /* Set sane init value. Override when guest acks. */
  196. #ifdef CONFIG_VHOST_NET_USER
  197. if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
  198. features = vhost_user_get_acked_features(net->nc);
  199. if (~net->dev.features & features) {
  200. fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
  201. " for backend\n",
  202. (uint64_t)(~net->dev.features & features));
  203. goto fail;
  204. }
  205. }
  206. #endif
  207. vhost_net_ack_features(net, features);
  208. return net;
  209. fail:
  210. vhost_dev_cleanup(&net->dev);
  211. g_free(net);
  212. return NULL;
  213. }
  214. static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
  215. int vq_index_end)
  216. {
  217. net->dev.vq_index = vq_index;
  218. net->dev.vq_index_end = vq_index_end;
  219. }
  220. static int vhost_net_start_one(struct vhost_net *net,
  221. VirtIODevice *dev)
  222. {
  223. struct vhost_vring_file file = { };
  224. int r;
  225. if (net->nc->info->start) {
  226. r = net->nc->info->start(net->nc);
  227. if (r < 0) {
  228. return r;
  229. }
  230. }
  231. r = vhost_dev_enable_notifiers(&net->dev, dev);
  232. if (r < 0) {
  233. goto fail_notifiers;
  234. }
  235. r = vhost_dev_start(&net->dev, dev, false);
  236. if (r < 0) {
  237. goto fail_start;
  238. }
  239. if (net->nc->info->poll) {
  240. net->nc->info->poll(net->nc, false);
  241. }
  242. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  243. qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
  244. file.fd = net->backend;
  245. for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
  246. if (!virtio_queue_enabled(dev, net->dev.vq_index +
  247. file.index)) {
  248. /* Queue might not be ready for start */
  249. continue;
  250. }
  251. r = vhost_net_set_backend(&net->dev, &file);
  252. if (r < 0) {
  253. r = -errno;
  254. goto fail;
  255. }
  256. }
  257. }
  258. if (net->nc->info->load) {
  259. r = net->nc->info->load(net->nc);
  260. if (r < 0) {
  261. goto fail;
  262. }
  263. }
  264. return 0;
  265. fail:
  266. file.fd = -1;
  267. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  268. while (file.index-- > 0) {
  269. if (!virtio_queue_enabled(dev, net->dev.vq_index +
  270. file.index)) {
  271. /* Queue might not be ready for start */
  272. continue;
  273. }
  274. int r = vhost_net_set_backend(&net->dev, &file);
  275. assert(r >= 0);
  276. }
  277. }
  278. if (net->nc->info->poll) {
  279. net->nc->info->poll(net->nc, true);
  280. }
  281. vhost_dev_stop(&net->dev, dev, false);
  282. fail_start:
  283. vhost_dev_disable_notifiers(&net->dev, dev);
  284. fail_notifiers:
  285. return r;
  286. }
  287. static void vhost_net_stop_one(struct vhost_net *net,
  288. VirtIODevice *dev)
  289. {
  290. struct vhost_vring_file file = { .fd = -1 };
  291. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  292. for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
  293. int r = vhost_net_set_backend(&net->dev, &file);
  294. assert(r >= 0);
  295. }
  296. }
  297. if (net->nc->info->poll) {
  298. net->nc->info->poll(net->nc, true);
  299. }
  300. vhost_dev_stop(&net->dev, dev, false);
  301. if (net->nc->info->stop) {
  302. net->nc->info->stop(net->nc);
  303. }
  304. vhost_dev_disable_notifiers(&net->dev, dev);
  305. }
  306. int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
  307. int data_queue_pairs, int cvq)
  308. {
  309. BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
  310. VirtioBusState *vbus = VIRTIO_BUS(qbus);
  311. VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
  312. int total_notifiers = data_queue_pairs * 2 + cvq;
  313. VirtIONet *n = VIRTIO_NET(dev);
  314. int nvhosts = data_queue_pairs + cvq;
  315. struct vhost_net *net;
  316. int r, e, i, index_end = data_queue_pairs * 2;
  317. NetClientState *peer;
  318. if (cvq) {
  319. index_end += 1;
  320. }
  321. if (!k->set_guest_notifiers) {
  322. error_report("binding does not support guest notifiers");
  323. return -ENOSYS;
  324. }
  325. for (i = 0; i < nvhosts; i++) {
  326. if (i < data_queue_pairs) {
  327. peer = qemu_get_peer(ncs, i);
  328. } else { /* Control Virtqueue */
  329. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  330. }
  331. net = get_vhost_net(peer);
  332. vhost_net_set_vq_index(net, i * 2, index_end);
  333. /* Suppress the masking guest notifiers on vhost user
  334. * because vhost user doesn't interrupt masking/unmasking
  335. * properly.
  336. */
  337. if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
  338. dev->use_guest_notifier_mask = false;
  339. }
  340. }
  341. r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
  342. if (r < 0) {
  343. error_report("Error binding guest notifier: %d", -r);
  344. goto err;
  345. }
  346. for (i = 0; i < nvhosts; i++) {
  347. if (i < data_queue_pairs) {
  348. peer = qemu_get_peer(ncs, i);
  349. } else {
  350. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  351. }
  352. if (peer->vring_enable) {
  353. /* restore vring enable state */
  354. r = vhost_set_vring_enable(peer, peer->vring_enable);
  355. if (r < 0) {
  356. goto err_start;
  357. }
  358. }
  359. r = vhost_net_start_one(get_vhost_net(peer), dev);
  360. if (r < 0) {
  361. goto err_start;
  362. }
  363. }
  364. return 0;
  365. err_start:
  366. while (--i >= 0) {
  367. peer = qemu_get_peer(ncs, i < data_queue_pairs ?
  368. i : n->max_queue_pairs);
  369. vhost_net_stop_one(get_vhost_net(peer), dev);
  370. }
  371. e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
  372. if (e < 0) {
  373. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
  374. fflush(stderr);
  375. }
  376. err:
  377. return r;
  378. }
  379. void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
  380. int data_queue_pairs, int cvq)
  381. {
  382. BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
  383. VirtioBusState *vbus = VIRTIO_BUS(qbus);
  384. VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
  385. VirtIONet *n = VIRTIO_NET(dev);
  386. NetClientState *peer;
  387. int total_notifiers = data_queue_pairs * 2 + cvq;
  388. int nvhosts = data_queue_pairs + cvq;
  389. int i, r;
  390. for (i = 0; i < nvhosts; i++) {
  391. if (i < data_queue_pairs) {
  392. peer = qemu_get_peer(ncs, i);
  393. } else {
  394. peer = qemu_get_peer(ncs, n->max_queue_pairs);
  395. }
  396. vhost_net_stop_one(get_vhost_net(peer), dev);
  397. }
  398. r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
  399. if (r < 0) {
  400. fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
  401. fflush(stderr);
  402. }
  403. assert(r >= 0);
  404. }
  405. void vhost_net_cleanup(struct vhost_net *net)
  406. {
  407. vhost_dev_cleanup(&net->dev);
  408. }
  409. int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
  410. {
  411. const VhostOps *vhost_ops = net->dev.vhost_ops;
  412. assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
  413. assert(vhost_ops->vhost_migration_done);
  414. return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
  415. }
  416. bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
  417. {
  418. return vhost_virtqueue_pending(&net->dev, idx);
  419. }
  420. void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
  421. int idx, bool mask)
  422. {
  423. vhost_virtqueue_mask(&net->dev, dev, idx, mask);
  424. }
  425. bool vhost_net_config_pending(VHostNetState *net)
  426. {
  427. return vhost_config_pending(&net->dev);
  428. }
  429. void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
  430. {
  431. vhost_config_mask(&net->dev, dev, mask);
  432. }
  433. VHostNetState *get_vhost_net(NetClientState *nc)
  434. {
  435. VHostNetState *vhost_net = 0;
  436. if (!nc) {
  437. return 0;
  438. }
  439. switch (nc->info->type) {
  440. case NET_CLIENT_DRIVER_TAP:
  441. vhost_net = tap_get_vhost_net(nc);
  442. break;
  443. #ifdef CONFIG_VHOST_NET_USER
  444. case NET_CLIENT_DRIVER_VHOST_USER:
  445. vhost_net = vhost_user_get_vhost_net(nc);
  446. assert(vhost_net);
  447. break;
  448. #endif
  449. #ifdef CONFIG_VHOST_NET_VDPA
  450. case NET_CLIENT_DRIVER_VHOST_VDPA:
  451. vhost_net = vhost_vdpa_get_vhost_net(nc);
  452. assert(vhost_net);
  453. break;
  454. #endif
  455. default:
  456. break;
  457. }
  458. return vhost_net;
  459. }
  460. int vhost_set_vring_enable(NetClientState *nc, int enable)
  461. {
  462. VHostNetState *net = get_vhost_net(nc);
  463. const VhostOps *vhost_ops = net->dev.vhost_ops;
  464. nc->vring_enable = enable;
  465. if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
  466. return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
  467. }
  468. return 0;
  469. }
  470. int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
  471. {
  472. const VhostOps *vhost_ops = net->dev.vhost_ops;
  473. if (!vhost_ops->vhost_net_set_mtu) {
  474. return 0;
  475. }
  476. return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
  477. }
  478. void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
  479. int vq_index)
  480. {
  481. VHostNetState *net = get_vhost_net(nc->peer);
  482. const VhostOps *vhost_ops = net->dev.vhost_ops;
  483. struct vhost_vring_file file = { .fd = -1 };
  484. int idx;
  485. /* should only be called after backend is connected */
  486. assert(vhost_ops);
  487. idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
  488. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  489. file.index = idx;
  490. int r = vhost_net_set_backend(&net->dev, &file);
  491. assert(r >= 0);
  492. }
  493. vhost_virtqueue_stop(&net->dev,
  494. vdev,
  495. net->dev.vqs + idx,
  496. net->dev.vq_index + idx);
  497. }
  498. int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
  499. int vq_index)
  500. {
  501. VHostNetState *net = get_vhost_net(nc->peer);
  502. const VhostOps *vhost_ops = net->dev.vhost_ops;
  503. struct vhost_vring_file file = { };
  504. int idx, r;
  505. if (!net->dev.started) {
  506. return -EBUSY;
  507. }
  508. /* should only be called after backend is connected */
  509. assert(vhost_ops);
  510. idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
  511. r = vhost_virtqueue_start(&net->dev,
  512. vdev,
  513. net->dev.vqs + idx,
  514. net->dev.vq_index + idx);
  515. if (r < 0) {
  516. goto err_start;
  517. }
  518. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  519. file.index = idx;
  520. file.fd = net->backend;
  521. r = vhost_net_set_backend(&net->dev, &file);
  522. if (r < 0) {
  523. r = -errno;
  524. goto err_start;
  525. }
  526. }
  527. return 0;
  528. err_start:
  529. error_report("Error when restarting the queue.");
  530. if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
  531. file.fd = VHOST_FILE_UNBIND;
  532. file.index = idx;
  533. int r = vhost_net_set_backend(&net->dev, &file);
  534. assert(r >= 0);
  535. }
  536. vhost_dev_stop(&net->dev, vdev, false);
  537. return r;
  538. }