2
0

virtio-net.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078
  1. /*
  2. * Virtio Network Device
  3. *
  4. * Copyright IBM, Corp. 2007
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include "iov.h"
  14. #include "virtio.h"
  15. #include "net.h"
  16. #include "net/checksum.h"
  17. #include "net/tap.h"
  18. #include "qemu-error.h"
  19. #include "qemu-timer.h"
  20. #include "virtio-net.h"
  21. #include "vhost_net.h"
  22. #define VIRTIO_NET_VM_VERSION 11
  23. #define MAC_TABLE_ENTRIES 64
  24. #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
  25. typedef struct VirtIONet
  26. {
  27. VirtIODevice vdev;
  28. uint8_t mac[ETH_ALEN];
  29. uint16_t status;
  30. VirtQueue *rx_vq;
  31. VirtQueue *tx_vq;
  32. VirtQueue *ctrl_vq;
  33. NICState *nic;
  34. QEMUTimer *tx_timer;
  35. QEMUBH *tx_bh;
  36. uint32_t tx_timeout;
  37. int32_t tx_burst;
  38. int tx_waiting;
  39. uint32_t has_vnet_hdr;
  40. uint8_t has_ufo;
  41. struct {
  42. VirtQueueElement elem;
  43. ssize_t len;
  44. } async_tx;
  45. int mergeable_rx_bufs;
  46. uint8_t promisc;
  47. uint8_t allmulti;
  48. uint8_t alluni;
  49. uint8_t nomulti;
  50. uint8_t nouni;
  51. uint8_t nobcast;
  52. uint8_t vhost_started;
  53. struct {
  54. int in_use;
  55. int first_multi;
  56. uint8_t multi_overflow;
  57. uint8_t uni_overflow;
  58. uint8_t *macs;
  59. } mac_table;
  60. uint32_t *vlans;
  61. DeviceState *qdev;
  62. } VirtIONet;
  63. /* TODO
  64. * - we could suppress RX interrupt if we were so inclined.
  65. */
  66. static VirtIONet *to_virtio_net(VirtIODevice *vdev)
  67. {
  68. return (VirtIONet *)vdev;
  69. }
  70. static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  71. {
  72. VirtIONet *n = to_virtio_net(vdev);
  73. struct virtio_net_config netcfg;
  74. stw_p(&netcfg.status, n->status);
  75. memcpy(netcfg.mac, n->mac, ETH_ALEN);
  76. memcpy(config, &netcfg, sizeof(netcfg));
  77. }
  78. static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  79. {
  80. VirtIONet *n = to_virtio_net(vdev);
  81. struct virtio_net_config netcfg;
  82. memcpy(&netcfg, config, sizeof(netcfg));
  83. if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
  84. memcpy(n->mac, netcfg.mac, ETH_ALEN);
  85. qemu_format_nic_info_str(&n->nic->nc, n->mac);
  86. }
  87. }
  88. static bool virtio_net_started(VirtIONet *n, uint8_t status)
  89. {
  90. return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
  91. (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
  92. }
  93. static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
  94. {
  95. if (!n->nic->nc.peer) {
  96. return;
  97. }
  98. if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
  99. return;
  100. }
  101. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  102. return;
  103. }
  104. if (!!n->vhost_started == virtio_net_started(n, status) &&
  105. !n->nic->nc.peer->link_down) {
  106. return;
  107. }
  108. if (!n->vhost_started) {
  109. int r;
  110. if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
  111. return;
  112. }
  113. r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
  114. if (r < 0) {
  115. error_report("unable to start vhost net: %d: "
  116. "falling back on userspace virtio", -r);
  117. } else {
  118. n->vhost_started = 1;
  119. }
  120. } else {
  121. vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
  122. n->vhost_started = 0;
  123. }
  124. }
  125. static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
  126. {
  127. VirtIONet *n = to_virtio_net(vdev);
  128. virtio_net_vhost_status(n, status);
  129. if (!n->tx_waiting) {
  130. return;
  131. }
  132. if (virtio_net_started(n, status) && !n->vhost_started) {
  133. if (n->tx_timer) {
  134. qemu_mod_timer(n->tx_timer,
  135. qemu_get_clock_ns(vm_clock) + n->tx_timeout);
  136. } else {
  137. qemu_bh_schedule(n->tx_bh);
  138. }
  139. } else {
  140. if (n->tx_timer) {
  141. qemu_del_timer(n->tx_timer);
  142. } else {
  143. qemu_bh_cancel(n->tx_bh);
  144. }
  145. }
  146. }
  147. static void virtio_net_set_link_status(NetClientState *nc)
  148. {
  149. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  150. uint16_t old_status = n->status;
  151. if (nc->link_down)
  152. n->status &= ~VIRTIO_NET_S_LINK_UP;
  153. else
  154. n->status |= VIRTIO_NET_S_LINK_UP;
  155. if (n->status != old_status)
  156. virtio_notify_config(&n->vdev);
  157. virtio_net_set_status(&n->vdev, n->vdev.status);
  158. }
  159. static void virtio_net_reset(VirtIODevice *vdev)
  160. {
  161. VirtIONet *n = to_virtio_net(vdev);
  162. /* Reset back to compatibility mode */
  163. n->promisc = 1;
  164. n->allmulti = 0;
  165. n->alluni = 0;
  166. n->nomulti = 0;
  167. n->nouni = 0;
  168. n->nobcast = 0;
  169. /* Flush any MAC and VLAN filter table state */
  170. n->mac_table.in_use = 0;
  171. n->mac_table.first_multi = 0;
  172. n->mac_table.multi_overflow = 0;
  173. n->mac_table.uni_overflow = 0;
  174. memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
  175. memset(n->vlans, 0, MAX_VLAN >> 3);
  176. }
  177. static int peer_has_vnet_hdr(VirtIONet *n)
  178. {
  179. if (!n->nic->nc.peer)
  180. return 0;
  181. if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
  182. return 0;
  183. n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
  184. return n->has_vnet_hdr;
  185. }
  186. static int peer_has_ufo(VirtIONet *n)
  187. {
  188. if (!peer_has_vnet_hdr(n))
  189. return 0;
  190. n->has_ufo = tap_has_ufo(n->nic->nc.peer);
  191. return n->has_ufo;
  192. }
  193. static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
  194. {
  195. VirtIONet *n = to_virtio_net(vdev);
  196. features |= (1 << VIRTIO_NET_F_MAC);
  197. if (peer_has_vnet_hdr(n)) {
  198. tap_using_vnet_hdr(n->nic->nc.peer, 1);
  199. } else {
  200. features &= ~(0x1 << VIRTIO_NET_F_CSUM);
  201. features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
  202. features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
  203. features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
  204. features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
  205. features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
  206. features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
  207. features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
  208. }
  209. if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
  210. features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
  211. features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
  212. }
  213. if (!n->nic->nc.peer ||
  214. n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
  215. return features;
  216. }
  217. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  218. return features;
  219. }
  220. return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
  221. }
  222. static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
  223. {
  224. uint32_t features = 0;
  225. /* Linux kernel 2.6.25. It understood MAC (as everyone must),
  226. * but also these: */
  227. features |= (1 << VIRTIO_NET_F_MAC);
  228. features |= (1 << VIRTIO_NET_F_CSUM);
  229. features |= (1 << VIRTIO_NET_F_HOST_TSO4);
  230. features |= (1 << VIRTIO_NET_F_HOST_TSO6);
  231. features |= (1 << VIRTIO_NET_F_HOST_ECN);
  232. return features;
  233. }
  234. static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
  235. {
  236. VirtIONet *n = to_virtio_net(vdev);
  237. n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
  238. if (n->has_vnet_hdr) {
  239. tap_set_offload(n->nic->nc.peer,
  240. (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
  241. (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
  242. (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
  243. (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
  244. (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
  245. }
  246. if (!n->nic->nc.peer ||
  247. n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
  248. return;
  249. }
  250. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  251. return;
  252. }
  253. vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
  254. }
  255. static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
  256. VirtQueueElement *elem)
  257. {
  258. uint8_t on;
  259. if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
  260. error_report("virtio-net ctrl invalid rx mode command");
  261. exit(1);
  262. }
  263. on = ldub_p(elem->out_sg[1].iov_base);
  264. if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
  265. n->promisc = on;
  266. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
  267. n->allmulti = on;
  268. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
  269. n->alluni = on;
  270. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
  271. n->nomulti = on;
  272. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
  273. n->nouni = on;
  274. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
  275. n->nobcast = on;
  276. else
  277. return VIRTIO_NET_ERR;
  278. return VIRTIO_NET_OK;
  279. }
  280. static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
  281. VirtQueueElement *elem)
  282. {
  283. struct virtio_net_ctrl_mac mac_data;
  284. if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
  285. elem->out_sg[1].iov_len < sizeof(mac_data) ||
  286. elem->out_sg[2].iov_len < sizeof(mac_data))
  287. return VIRTIO_NET_ERR;
  288. n->mac_table.in_use = 0;
  289. n->mac_table.first_multi = 0;
  290. n->mac_table.uni_overflow = 0;
  291. n->mac_table.multi_overflow = 0;
  292. memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
  293. mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
  294. if (sizeof(mac_data.entries) +
  295. (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
  296. return VIRTIO_NET_ERR;
  297. if (mac_data.entries <= MAC_TABLE_ENTRIES) {
  298. memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
  299. mac_data.entries * ETH_ALEN);
  300. n->mac_table.in_use += mac_data.entries;
  301. } else {
  302. n->mac_table.uni_overflow = 1;
  303. }
  304. n->mac_table.first_multi = n->mac_table.in_use;
  305. mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
  306. if (sizeof(mac_data.entries) +
  307. (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
  308. return VIRTIO_NET_ERR;
  309. if (mac_data.entries) {
  310. if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
  311. memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
  312. elem->out_sg[2].iov_base + sizeof(mac_data),
  313. mac_data.entries * ETH_ALEN);
  314. n->mac_table.in_use += mac_data.entries;
  315. } else {
  316. n->mac_table.multi_overflow = 1;
  317. }
  318. }
  319. return VIRTIO_NET_OK;
  320. }
  321. static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
  322. VirtQueueElement *elem)
  323. {
  324. uint16_t vid;
  325. if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
  326. error_report("virtio-net ctrl invalid vlan command");
  327. return VIRTIO_NET_ERR;
  328. }
  329. vid = lduw_p(elem->out_sg[1].iov_base);
  330. if (vid >= MAX_VLAN)
  331. return VIRTIO_NET_ERR;
  332. if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
  333. n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
  334. else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
  335. n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
  336. else
  337. return VIRTIO_NET_ERR;
  338. return VIRTIO_NET_OK;
  339. }
  340. static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
  341. {
  342. VirtIONet *n = to_virtio_net(vdev);
  343. struct virtio_net_ctrl_hdr ctrl;
  344. virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
  345. VirtQueueElement elem;
  346. while (virtqueue_pop(vq, &elem)) {
  347. if ((elem.in_num < 1) || (elem.out_num < 1)) {
  348. error_report("virtio-net ctrl missing headers");
  349. exit(1);
  350. }
  351. if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
  352. elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
  353. error_report("virtio-net ctrl header not in correct element");
  354. exit(1);
  355. }
  356. ctrl.class = ldub_p(elem.out_sg[0].iov_base);
  357. ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));
  358. if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
  359. status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
  360. else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
  361. status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
  362. else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
  363. status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
  364. stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
  365. virtqueue_push(vq, &elem, sizeof(status));
  366. virtio_notify(vdev, vq);
  367. }
  368. }
  369. /* RX */
  370. static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
  371. {
  372. VirtIONet *n = to_virtio_net(vdev);
  373. qemu_flush_queued_packets(&n->nic->nc);
  374. }
  375. static int virtio_net_can_receive(NetClientState *nc)
  376. {
  377. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  378. if (!n->vdev.vm_running) {
  379. return 0;
  380. }
  381. if (!virtio_queue_ready(n->rx_vq) ||
  382. !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
  383. return 0;
  384. return 1;
  385. }
  386. static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
  387. {
  388. if (virtio_queue_empty(n->rx_vq) ||
  389. (n->mergeable_rx_bufs &&
  390. !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
  391. virtio_queue_set_notification(n->rx_vq, 1);
  392. /* To avoid a race condition where the guest has made some buffers
  393. * available after the above check but before notification was
  394. * enabled, check for available buffers again.
  395. */
  396. if (virtio_queue_empty(n->rx_vq) ||
  397. (n->mergeable_rx_bufs &&
  398. !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
  399. return 0;
  400. }
  401. virtio_queue_set_notification(n->rx_vq, 0);
  402. return 1;
  403. }
  404. /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
  405. * it never finds out that the packets don't have valid checksums. This
  406. * causes dhclient to get upset. Fedora's carried a patch for ages to
  407. * fix this with Xen but it hasn't appeared in an upstream release of
  408. * dhclient yet.
  409. *
  410. * To avoid breaking existing guests, we catch udp packets and add
  411. * checksums. This is terrible but it's better than hacking the guest
  412. * kernels.
  413. *
  414. * N.B. if we introduce a zero-copy API, this operation is no longer free so
  415. * we should provide a mechanism to disable it to avoid polluting the host
  416. * cache.
  417. */
  418. static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
  419. const uint8_t *buf, size_t size)
  420. {
  421. if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
  422. (size > 27 && size < 1500) && /* normal sized MTU */
  423. (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
  424. (buf[23] == 17) && /* ip.protocol == UDP */
  425. (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
  426. /* FIXME this cast is evil */
  427. net_checksum_calculate((uint8_t *)buf, size);
  428. hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
  429. }
  430. }
  431. static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
  432. const void *buf, size_t size, size_t hdr_len)
  433. {
  434. struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
  435. int offset = 0;
  436. hdr->flags = 0;
  437. hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
  438. if (n->has_vnet_hdr) {
  439. memcpy(hdr, buf, sizeof(*hdr));
  440. offset = sizeof(*hdr);
  441. work_around_broken_dhclient(hdr, buf + offset, size - offset);
  442. }
  443. /* We only ever receive a struct virtio_net_hdr from the tapfd,
  444. * but we may be passing along a larger header to the guest.
  445. */
  446. iov[0].iov_base += hdr_len;
  447. iov[0].iov_len -= hdr_len;
  448. return offset;
  449. }
  450. static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
  451. {
  452. static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  453. static const uint8_t vlan[] = {0x81, 0x00};
  454. uint8_t *ptr = (uint8_t *)buf;
  455. int i;
  456. if (n->promisc)
  457. return 1;
  458. if (n->has_vnet_hdr) {
  459. ptr += sizeof(struct virtio_net_hdr);
  460. }
  461. if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
  462. int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
  463. if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
  464. return 0;
  465. }
  466. if (ptr[0] & 1) { // multicast
  467. if (!memcmp(ptr, bcast, sizeof(bcast))) {
  468. return !n->nobcast;
  469. } else if (n->nomulti) {
  470. return 0;
  471. } else if (n->allmulti || n->mac_table.multi_overflow) {
  472. return 1;
  473. }
  474. for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
  475. if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
  476. return 1;
  477. }
  478. }
  479. } else { // unicast
  480. if (n->nouni) {
  481. return 0;
  482. } else if (n->alluni || n->mac_table.uni_overflow) {
  483. return 1;
  484. } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
  485. return 1;
  486. }
  487. for (i = 0; i < n->mac_table.first_multi; i++) {
  488. if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
  489. return 1;
  490. }
  491. }
  492. }
  493. return 0;
  494. }
  495. static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
  496. {
  497. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  498. struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
  499. size_t guest_hdr_len, offset, i, host_hdr_len;
  500. if (!virtio_net_can_receive(&n->nic->nc))
  501. return -1;
  502. /* hdr_len refers to the header we supply to the guest */
  503. guest_hdr_len = n->mergeable_rx_bufs ?
  504. sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
  505. host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  506. if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
  507. return 0;
  508. if (!receive_filter(n, buf, size))
  509. return size;
  510. offset = i = 0;
  511. while (offset < size) {
  512. VirtQueueElement elem;
  513. int len, total;
  514. struct iovec sg[VIRTQUEUE_MAX_SIZE];
  515. total = 0;
  516. if (virtqueue_pop(n->rx_vq, &elem) == 0) {
  517. if (i == 0)
  518. return -1;
  519. error_report("virtio-net unexpected empty queue: "
  520. "i %zd mergeable %d offset %zd, size %zd, "
  521. "guest hdr len %zd, host hdr len %zd guest features 0x%x",
  522. i, n->mergeable_rx_bufs, offset, size,
  523. guest_hdr_len, host_hdr_len, n->vdev.guest_features);
  524. exit(1);
  525. }
  526. if (elem.in_num < 1) {
  527. error_report("virtio-net receive queue contains no in buffers");
  528. exit(1);
  529. }
  530. if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
  531. error_report("virtio-net header not in first element");
  532. exit(1);
  533. }
  534. memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
  535. if (i == 0) {
  536. if (n->mergeable_rx_bufs)
  537. mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
  538. offset += receive_header(n, sg, elem.in_num,
  539. buf + offset, size - offset, guest_hdr_len);
  540. total += guest_hdr_len;
  541. }
  542. /* copy in packet. ugh */
  543. len = iov_from_buf(sg, elem.in_num, 0,
  544. buf + offset, size - offset);
  545. total += len;
  546. offset += len;
  547. /* If buffers can't be merged, at this point we
  548. * must have consumed the complete packet.
  549. * Otherwise, drop it. */
  550. if (!n->mergeable_rx_bufs && offset < size) {
  551. #if 0
  552. error_report("virtio-net truncated non-mergeable packet: "
  553. "i %zd mergeable %d offset %zd, size %zd, "
  554. "guest hdr len %zd, host hdr len %zd",
  555. i, n->mergeable_rx_bufs,
  556. offset, size, guest_hdr_len, host_hdr_len);
  557. #endif
  558. return size;
  559. }
  560. /* signal other side */
  561. virtqueue_fill(n->rx_vq, &elem, total, i++);
  562. }
  563. if (mhdr) {
  564. stw_p(&mhdr->num_buffers, i);
  565. }
  566. virtqueue_flush(n->rx_vq, i);
  567. virtio_notify(&n->vdev, n->rx_vq);
  568. return size;
  569. }
  570. static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
  571. static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
  572. {
  573. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  574. virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
  575. virtio_notify(&n->vdev, n->tx_vq);
  576. n->async_tx.elem.out_num = n->async_tx.len = 0;
  577. virtio_queue_set_notification(n->tx_vq, 1);
  578. virtio_net_flush_tx(n, n->tx_vq);
  579. }
  580. /* TX */
  581. static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
  582. {
  583. VirtQueueElement elem;
  584. int32_t num_packets = 0;
  585. if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
  586. return num_packets;
  587. }
  588. assert(n->vdev.vm_running);
  589. if (n->async_tx.elem.out_num) {
  590. virtio_queue_set_notification(n->tx_vq, 0);
  591. return num_packets;
  592. }
  593. while (virtqueue_pop(vq, &elem)) {
  594. ssize_t ret, len = 0;
  595. unsigned int out_num = elem.out_num;
  596. struct iovec *out_sg = &elem.out_sg[0];
  597. unsigned hdr_len;
  598. /* hdr_len refers to the header received from the guest */
  599. hdr_len = n->mergeable_rx_bufs ?
  600. sizeof(struct virtio_net_hdr_mrg_rxbuf) :
  601. sizeof(struct virtio_net_hdr);
  602. if (out_num < 1 || out_sg->iov_len != hdr_len) {
  603. error_report("virtio-net header not in first element");
  604. exit(1);
  605. }
  606. /* ignore the header if GSO is not supported */
  607. if (!n->has_vnet_hdr) {
  608. out_num--;
  609. out_sg++;
  610. len += hdr_len;
  611. } else if (n->mergeable_rx_bufs) {
  612. /* tapfd expects a struct virtio_net_hdr */
  613. hdr_len -= sizeof(struct virtio_net_hdr);
  614. out_sg->iov_len -= hdr_len;
  615. len += hdr_len;
  616. }
  617. ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
  618. virtio_net_tx_complete);
  619. if (ret == 0) {
  620. virtio_queue_set_notification(n->tx_vq, 0);
  621. n->async_tx.elem = elem;
  622. n->async_tx.len = len;
  623. return -EBUSY;
  624. }
  625. len += ret;
  626. virtqueue_push(vq, &elem, len);
  627. virtio_notify(&n->vdev, vq);
  628. if (++num_packets >= n->tx_burst) {
  629. break;
  630. }
  631. }
  632. return num_packets;
  633. }
  634. static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
  635. {
  636. VirtIONet *n = to_virtio_net(vdev);
  637. /* This happens when device was stopped but VCPU wasn't. */
  638. if (!n->vdev.vm_running) {
  639. n->tx_waiting = 1;
  640. return;
  641. }
  642. if (n->tx_waiting) {
  643. virtio_queue_set_notification(vq, 1);
  644. qemu_del_timer(n->tx_timer);
  645. n->tx_waiting = 0;
  646. virtio_net_flush_tx(n, vq);
  647. } else {
  648. qemu_mod_timer(n->tx_timer,
  649. qemu_get_clock_ns(vm_clock) + n->tx_timeout);
  650. n->tx_waiting = 1;
  651. virtio_queue_set_notification(vq, 0);
  652. }
  653. }
  654. static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
  655. {
  656. VirtIONet *n = to_virtio_net(vdev);
  657. if (unlikely(n->tx_waiting)) {
  658. return;
  659. }
  660. n->tx_waiting = 1;
  661. /* This happens when device was stopped but VCPU wasn't. */
  662. if (!n->vdev.vm_running) {
  663. return;
  664. }
  665. virtio_queue_set_notification(vq, 0);
  666. qemu_bh_schedule(n->tx_bh);
  667. }
  668. static void virtio_net_tx_timer(void *opaque)
  669. {
  670. VirtIONet *n = opaque;
  671. assert(n->vdev.vm_running);
  672. n->tx_waiting = 0;
  673. /* Just in case the driver is not ready on more */
  674. if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
  675. return;
  676. virtio_queue_set_notification(n->tx_vq, 1);
  677. virtio_net_flush_tx(n, n->tx_vq);
  678. }
  679. static void virtio_net_tx_bh(void *opaque)
  680. {
  681. VirtIONet *n = opaque;
  682. int32_t ret;
  683. assert(n->vdev.vm_running);
  684. n->tx_waiting = 0;
  685. /* Just in case the driver is not ready on more */
  686. if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
  687. return;
  688. ret = virtio_net_flush_tx(n, n->tx_vq);
  689. if (ret == -EBUSY) {
  690. return; /* Notification re-enable handled by tx_complete */
  691. }
  692. /* If we flush a full burst of packets, assume there are
  693. * more coming and immediately reschedule */
  694. if (ret >= n->tx_burst) {
  695. qemu_bh_schedule(n->tx_bh);
  696. n->tx_waiting = 1;
  697. return;
  698. }
  699. /* If less than a full burst, re-enable notification and flush
  700. * anything that may have come in while we weren't looking. If
  701. * we find something, assume the guest is still active and reschedule */
  702. virtio_queue_set_notification(n->tx_vq, 1);
  703. if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
  704. virtio_queue_set_notification(n->tx_vq, 0);
  705. qemu_bh_schedule(n->tx_bh);
  706. n->tx_waiting = 1;
  707. }
  708. }
  709. static void virtio_net_save(QEMUFile *f, void *opaque)
  710. {
  711. VirtIONet *n = opaque;
  712. /* At this point, backend must be stopped, otherwise
  713. * it might keep writing to memory. */
  714. assert(!n->vhost_started);
  715. virtio_save(&n->vdev, f);
  716. qemu_put_buffer(f, n->mac, ETH_ALEN);
  717. qemu_put_be32(f, n->tx_waiting);
  718. qemu_put_be32(f, n->mergeable_rx_bufs);
  719. qemu_put_be16(f, n->status);
  720. qemu_put_byte(f, n->promisc);
  721. qemu_put_byte(f, n->allmulti);
  722. qemu_put_be32(f, n->mac_table.in_use);
  723. qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
  724. qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
  725. qemu_put_be32(f, n->has_vnet_hdr);
  726. qemu_put_byte(f, n->mac_table.multi_overflow);
  727. qemu_put_byte(f, n->mac_table.uni_overflow);
  728. qemu_put_byte(f, n->alluni);
  729. qemu_put_byte(f, n->nomulti);
  730. qemu_put_byte(f, n->nouni);
  731. qemu_put_byte(f, n->nobcast);
  732. qemu_put_byte(f, n->has_ufo);
  733. }
  734. static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
  735. {
  736. VirtIONet *n = opaque;
  737. int i;
  738. int ret;
  739. if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
  740. return -EINVAL;
  741. ret = virtio_load(&n->vdev, f);
  742. if (ret) {
  743. return ret;
  744. }
  745. qemu_get_buffer(f, n->mac, ETH_ALEN);
  746. n->tx_waiting = qemu_get_be32(f);
  747. n->mergeable_rx_bufs = qemu_get_be32(f);
  748. if (version_id >= 3)
  749. n->status = qemu_get_be16(f);
  750. if (version_id >= 4) {
  751. if (version_id < 8) {
  752. n->promisc = qemu_get_be32(f);
  753. n->allmulti = qemu_get_be32(f);
  754. } else {
  755. n->promisc = qemu_get_byte(f);
  756. n->allmulti = qemu_get_byte(f);
  757. }
  758. }
  759. if (version_id >= 5) {
  760. n->mac_table.in_use = qemu_get_be32(f);
  761. /* MAC_TABLE_ENTRIES may be different from the saved image */
  762. if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
  763. qemu_get_buffer(f, n->mac_table.macs,
  764. n->mac_table.in_use * ETH_ALEN);
  765. } else if (n->mac_table.in_use) {
  766. qemu_fseek(f, n->mac_table.in_use * ETH_ALEN, SEEK_CUR);
  767. n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
  768. n->mac_table.in_use = 0;
  769. }
  770. }
  771. if (version_id >= 6)
  772. qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
  773. if (version_id >= 7) {
  774. if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
  775. error_report("virtio-net: saved image requires vnet_hdr=on");
  776. return -1;
  777. }
  778. if (n->has_vnet_hdr) {
  779. tap_using_vnet_hdr(n->nic->nc.peer, 1);
  780. tap_set_offload(n->nic->nc.peer,
  781. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
  782. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
  783. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
  784. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN) & 1,
  785. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) & 1);
  786. }
  787. }
  788. if (version_id >= 9) {
  789. n->mac_table.multi_overflow = qemu_get_byte(f);
  790. n->mac_table.uni_overflow = qemu_get_byte(f);
  791. }
  792. if (version_id >= 10) {
  793. n->alluni = qemu_get_byte(f);
  794. n->nomulti = qemu_get_byte(f);
  795. n->nouni = qemu_get_byte(f);
  796. n->nobcast = qemu_get_byte(f);
  797. }
  798. if (version_id >= 11) {
  799. if (qemu_get_byte(f) && !peer_has_ufo(n)) {
  800. error_report("virtio-net: saved image requires TUN_F_UFO support");
  801. return -1;
  802. }
  803. }
  804. /* Find the first multicast entry in the saved MAC filter */
  805. for (i = 0; i < n->mac_table.in_use; i++) {
  806. if (n->mac_table.macs[i * ETH_ALEN] & 1) {
  807. break;
  808. }
  809. }
  810. n->mac_table.first_multi = i;
  811. return 0;
  812. }
  813. static void virtio_net_cleanup(NetClientState *nc)
  814. {
  815. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  816. n->nic = NULL;
  817. }
  818. static NetClientInfo net_virtio_info = {
  819. .type = NET_CLIENT_OPTIONS_KIND_NIC,
  820. .size = sizeof(NICState),
  821. .can_receive = virtio_net_can_receive,
  822. .receive = virtio_net_receive,
  823. .cleanup = virtio_net_cleanup,
  824. .link_status_changed = virtio_net_set_link_status,
  825. };
  826. VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
  827. virtio_net_conf *net)
  828. {
  829. VirtIONet *n;
  830. n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
  831. sizeof(struct virtio_net_config),
  832. sizeof(VirtIONet));
  833. n->vdev.get_config = virtio_net_get_config;
  834. n->vdev.set_config = virtio_net_set_config;
  835. n->vdev.get_features = virtio_net_get_features;
  836. n->vdev.set_features = virtio_net_set_features;
  837. n->vdev.bad_features = virtio_net_bad_features;
  838. n->vdev.reset = virtio_net_reset;
  839. n->vdev.set_status = virtio_net_set_status;
  840. n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
  841. if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
  842. error_report("virtio-net: "
  843. "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
  844. net->tx);
  845. error_report("Defaulting to \"bh\"");
  846. }
  847. if (net->tx && !strcmp(net->tx, "timer")) {
  848. n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
  849. n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
  850. n->tx_timeout = net->txtimer;
  851. } else {
  852. n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
  853. n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
  854. }
  855. n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
  856. qemu_macaddr_default_if_unset(&conf->macaddr);
  857. memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
  858. n->status = VIRTIO_NET_S_LINK_UP;
  859. n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
  860. qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
  861. n->tx_waiting = 0;
  862. n->tx_burst = net->txburst;
  863. n->mergeable_rx_bufs = 0;
  864. n->promisc = 1; /* for compatibility */
  865. n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
  866. n->vlans = g_malloc0(MAX_VLAN >> 3);
  867. n->qdev = dev;
  868. register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
  869. virtio_net_save, virtio_net_load, n);
  870. add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
  871. return &n->vdev;
  872. }
  873. void virtio_net_exit(VirtIODevice *vdev)
  874. {
  875. VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
  876. /* This will stop vhost backend if appropriate. */
  877. virtio_net_set_status(vdev, 0);
  878. qemu_purge_queued_packets(&n->nic->nc);
  879. unregister_savevm(n->qdev, "virtio-net", n);
  880. g_free(n->mac_table.macs);
  881. g_free(n->vlans);
  882. if (n->tx_timer) {
  883. qemu_del_timer(n->tx_timer);
  884. qemu_free_timer(n->tx_timer);
  885. } else {
  886. qemu_bh_delete(n->tx_bh);
  887. }
  888. qemu_del_net_client(&n->nic->nc);
  889. virtio_cleanup(&n->vdev);
  890. }