virtio-net.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078
  1. /*
  2. * Virtio Network Device
  3. *
  4. * Copyright IBM, Corp. 2007
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include "iov.h"
  14. #include "virtio.h"
  15. #include "net.h"
  16. #include "net/checksum.h"
  17. #include "net/tap.h"
  18. #include "qemu-error.h"
  19. #include "qemu-timer.h"
  20. #include "virtio-net.h"
  21. #include "vhost_net.h"
  22. #define VIRTIO_NET_VM_VERSION 11
  23. #define MAC_TABLE_ENTRIES 64
  24. #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
  25. typedef struct VirtIONet
  26. {
  27. VirtIODevice vdev;
  28. uint8_t mac[ETH_ALEN];
  29. uint16_t status;
  30. VirtQueue *rx_vq;
  31. VirtQueue *tx_vq;
  32. VirtQueue *ctrl_vq;
  33. NICState *nic;
  34. QEMUTimer *tx_timer;
  35. QEMUBH *tx_bh;
  36. uint32_t tx_timeout;
  37. int32_t tx_burst;
  38. int tx_waiting;
  39. uint32_t has_vnet_hdr;
  40. uint8_t has_ufo;
  41. struct {
  42. VirtQueueElement elem;
  43. ssize_t len;
  44. } async_tx;
  45. int mergeable_rx_bufs;
  46. uint8_t promisc;
  47. uint8_t allmulti;
  48. uint8_t alluni;
  49. uint8_t nomulti;
  50. uint8_t nouni;
  51. uint8_t nobcast;
  52. uint8_t vhost_started;
  53. struct {
  54. int in_use;
  55. int first_multi;
  56. uint8_t multi_overflow;
  57. uint8_t uni_overflow;
  58. uint8_t *macs;
  59. } mac_table;
  60. uint32_t *vlans;
  61. DeviceState *qdev;
  62. } VirtIONet;
  63. /* TODO
  64. * - we could suppress RX interrupt if we were so inclined.
  65. */
  66. static VirtIONet *to_virtio_net(VirtIODevice *vdev)
  67. {
  68. return (VirtIONet *)vdev;
  69. }
  70. static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
  71. {
  72. VirtIONet *n = to_virtio_net(vdev);
  73. struct virtio_net_config netcfg;
  74. stw_p(&netcfg.status, n->status);
  75. memcpy(netcfg.mac, n->mac, ETH_ALEN);
  76. memcpy(config, &netcfg, sizeof(netcfg));
  77. }
  78. static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
  79. {
  80. VirtIONet *n = to_virtio_net(vdev);
  81. struct virtio_net_config netcfg;
  82. memcpy(&netcfg, config, sizeof(netcfg));
  83. if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
  84. memcpy(n->mac, netcfg.mac, ETH_ALEN);
  85. qemu_format_nic_info_str(&n->nic->nc, n->mac);
  86. }
  87. }
  88. static bool virtio_net_started(VirtIONet *n, uint8_t status)
  89. {
  90. return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
  91. (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
  92. }
  93. static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
  94. {
  95. if (!n->nic->nc.peer) {
  96. return;
  97. }
  98. if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
  99. return;
  100. }
  101. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  102. return;
  103. }
  104. if (!!n->vhost_started == virtio_net_started(n, status) &&
  105. !n->nic->nc.peer->link_down) {
  106. return;
  107. }
  108. if (!n->vhost_started) {
  109. int r;
  110. if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
  111. return;
  112. }
  113. r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
  114. if (r < 0) {
  115. error_report("unable to start vhost net: %d: "
  116. "falling back on userspace virtio", -r);
  117. } else {
  118. n->vhost_started = 1;
  119. }
  120. } else {
  121. vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
  122. n->vhost_started = 0;
  123. }
  124. }
  125. static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
  126. {
  127. VirtIONet *n = to_virtio_net(vdev);
  128. virtio_net_vhost_status(n, status);
  129. if (!n->tx_waiting) {
  130. return;
  131. }
  132. if (virtio_net_started(n, status) && !n->vhost_started) {
  133. if (n->tx_timer) {
  134. qemu_mod_timer(n->tx_timer,
  135. qemu_get_clock_ns(vm_clock) + n->tx_timeout);
  136. } else {
  137. qemu_bh_schedule(n->tx_bh);
  138. }
  139. } else {
  140. if (n->tx_timer) {
  141. qemu_del_timer(n->tx_timer);
  142. } else {
  143. qemu_bh_cancel(n->tx_bh);
  144. }
  145. }
  146. }
  147. static void virtio_net_set_link_status(VLANClientState *nc)
  148. {
  149. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  150. uint16_t old_status = n->status;
  151. if (nc->link_down)
  152. n->status &= ~VIRTIO_NET_S_LINK_UP;
  153. else
  154. n->status |= VIRTIO_NET_S_LINK_UP;
  155. if (n->status != old_status)
  156. virtio_notify_config(&n->vdev);
  157. virtio_net_set_status(&n->vdev, n->vdev.status);
  158. }
  159. static void virtio_net_reset(VirtIODevice *vdev)
  160. {
  161. VirtIONet *n = to_virtio_net(vdev);
  162. /* Reset back to compatibility mode */
  163. n->promisc = 1;
  164. n->allmulti = 0;
  165. n->alluni = 0;
  166. n->nomulti = 0;
  167. n->nouni = 0;
  168. n->nobcast = 0;
  169. /* Flush any MAC and VLAN filter table state */
  170. n->mac_table.in_use = 0;
  171. n->mac_table.first_multi = 0;
  172. n->mac_table.multi_overflow = 0;
  173. n->mac_table.uni_overflow = 0;
  174. memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
  175. memset(n->vlans, 0, MAX_VLAN >> 3);
  176. }
  177. static int peer_has_vnet_hdr(VirtIONet *n)
  178. {
  179. if (!n->nic->nc.peer)
  180. return 0;
  181. if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
  182. return 0;
  183. n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
  184. return n->has_vnet_hdr;
  185. }
  186. static int peer_has_ufo(VirtIONet *n)
  187. {
  188. if (!peer_has_vnet_hdr(n))
  189. return 0;
  190. n->has_ufo = tap_has_ufo(n->nic->nc.peer);
  191. return n->has_ufo;
  192. }
  193. static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
  194. {
  195. VirtIONet *n = to_virtio_net(vdev);
  196. features |= (1 << VIRTIO_NET_F_MAC);
  197. if (peer_has_vnet_hdr(n)) {
  198. tap_using_vnet_hdr(n->nic->nc.peer, 1);
  199. } else {
  200. features &= ~(0x1 << VIRTIO_NET_F_CSUM);
  201. features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
  202. features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
  203. features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
  204. features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
  205. features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
  206. features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
  207. features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
  208. }
  209. if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
  210. features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
  211. features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
  212. }
  213. if (!n->nic->nc.peer ||
  214. n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
  215. return features;
  216. }
  217. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  218. return features;
  219. }
  220. return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
  221. }
  222. static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
  223. {
  224. uint32_t features = 0;
  225. /* Linux kernel 2.6.25. It understood MAC (as everyone must),
  226. * but also these: */
  227. features |= (1 << VIRTIO_NET_F_MAC);
  228. features |= (1 << VIRTIO_NET_F_CSUM);
  229. features |= (1 << VIRTIO_NET_F_HOST_TSO4);
  230. features |= (1 << VIRTIO_NET_F_HOST_TSO6);
  231. features |= (1 << VIRTIO_NET_F_HOST_ECN);
  232. return features;
  233. }
  234. static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
  235. {
  236. VirtIONet *n = to_virtio_net(vdev);
  237. n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
  238. if (n->has_vnet_hdr) {
  239. tap_set_offload(n->nic->nc.peer,
  240. (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
  241. (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
  242. (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
  243. (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
  244. (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
  245. }
  246. if (!n->nic->nc.peer ||
  247. n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
  248. return;
  249. }
  250. if (!tap_get_vhost_net(n->nic->nc.peer)) {
  251. return;
  252. }
  253. vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
  254. }
  255. static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
  256. VirtQueueElement *elem)
  257. {
  258. uint8_t on;
  259. if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
  260. error_report("virtio-net ctrl invalid rx mode command");
  261. exit(1);
  262. }
  263. on = ldub_p(elem->out_sg[1].iov_base);
  264. if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
  265. n->promisc = on;
  266. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
  267. n->allmulti = on;
  268. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
  269. n->alluni = on;
  270. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
  271. n->nomulti = on;
  272. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
  273. n->nouni = on;
  274. else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
  275. n->nobcast = on;
  276. else
  277. return VIRTIO_NET_ERR;
  278. return VIRTIO_NET_OK;
  279. }
  280. static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
  281. VirtQueueElement *elem)
  282. {
  283. struct virtio_net_ctrl_mac mac_data;
  284. if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
  285. elem->out_sg[1].iov_len < sizeof(mac_data) ||
  286. elem->out_sg[2].iov_len < sizeof(mac_data))
  287. return VIRTIO_NET_ERR;
  288. n->mac_table.in_use = 0;
  289. n->mac_table.first_multi = 0;
  290. n->mac_table.uni_overflow = 0;
  291. n->mac_table.multi_overflow = 0;
  292. memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
  293. mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
  294. if (sizeof(mac_data.entries) +
  295. (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
  296. return VIRTIO_NET_ERR;
  297. if (mac_data.entries <= MAC_TABLE_ENTRIES) {
  298. memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
  299. mac_data.entries * ETH_ALEN);
  300. n->mac_table.in_use += mac_data.entries;
  301. } else {
  302. n->mac_table.uni_overflow = 1;
  303. }
  304. n->mac_table.first_multi = n->mac_table.in_use;
  305. mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
  306. if (sizeof(mac_data.entries) +
  307. (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
  308. return VIRTIO_NET_ERR;
  309. if (mac_data.entries) {
  310. if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
  311. memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
  312. elem->out_sg[2].iov_base + sizeof(mac_data),
  313. mac_data.entries * ETH_ALEN);
  314. n->mac_table.in_use += mac_data.entries;
  315. } else {
  316. n->mac_table.multi_overflow = 1;
  317. }
  318. }
  319. return VIRTIO_NET_OK;
  320. }
  321. static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
  322. VirtQueueElement *elem)
  323. {
  324. uint16_t vid;
  325. if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
  326. error_report("virtio-net ctrl invalid vlan command");
  327. return VIRTIO_NET_ERR;
  328. }
  329. vid = lduw_p(elem->out_sg[1].iov_base);
  330. if (vid >= MAX_VLAN)
  331. return VIRTIO_NET_ERR;
  332. if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
  333. n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
  334. else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
  335. n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
  336. else
  337. return VIRTIO_NET_ERR;
  338. return VIRTIO_NET_OK;
  339. }
  340. static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
  341. {
  342. VirtIONet *n = to_virtio_net(vdev);
  343. struct virtio_net_ctrl_hdr ctrl;
  344. virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
  345. VirtQueueElement elem;
  346. while (virtqueue_pop(vq, &elem)) {
  347. if ((elem.in_num < 1) || (elem.out_num < 1)) {
  348. error_report("virtio-net ctrl missing headers");
  349. exit(1);
  350. }
  351. if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
  352. elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
  353. error_report("virtio-net ctrl header not in correct element");
  354. exit(1);
  355. }
  356. ctrl.class = ldub_p(elem.out_sg[0].iov_base);
  357. ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));
  358. if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
  359. status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
  360. else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
  361. status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
  362. else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
  363. status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
  364. stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);
  365. virtqueue_push(vq, &elem, sizeof(status));
  366. virtio_notify(vdev, vq);
  367. }
  368. }
  369. /* RX */
  370. static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
  371. {
  372. VirtIONet *n = to_virtio_net(vdev);
  373. qemu_flush_queued_packets(&n->nic->nc);
  374. /* We now have RX buffers, signal to the IO thread to break out of the
  375. * select to re-poll the tap file descriptor */
  376. qemu_notify_event();
  377. }
  378. static int virtio_net_can_receive(VLANClientState *nc)
  379. {
  380. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  381. if (!n->vdev.vm_running) {
  382. return 0;
  383. }
  384. if (!virtio_queue_ready(n->rx_vq) ||
  385. !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
  386. return 0;
  387. return 1;
  388. }
  389. static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
  390. {
  391. if (virtio_queue_empty(n->rx_vq) ||
  392. (n->mergeable_rx_bufs &&
  393. !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
  394. virtio_queue_set_notification(n->rx_vq, 1);
  395. /* To avoid a race condition where the guest has made some buffers
  396. * available after the above check but before notification was
  397. * enabled, check for available buffers again.
  398. */
  399. if (virtio_queue_empty(n->rx_vq) ||
  400. (n->mergeable_rx_bufs &&
  401. !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
  402. return 0;
  403. }
  404. virtio_queue_set_notification(n->rx_vq, 0);
  405. return 1;
  406. }
  407. /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
  408. * it never finds out that the packets don't have valid checksums. This
  409. * causes dhclient to get upset. Fedora's carried a patch for ages to
  410. * fix this with Xen but it hasn't appeared in an upstream release of
  411. * dhclient yet.
  412. *
  413. * To avoid breaking existing guests, we catch udp packets and add
  414. * checksums. This is terrible but it's better than hacking the guest
  415. * kernels.
  416. *
  417. * N.B. if we introduce a zero-copy API, this operation is no longer free so
  418. * we should provide a mechanism to disable it to avoid polluting the host
  419. * cache.
  420. */
  421. static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
  422. const uint8_t *buf, size_t size)
  423. {
  424. if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
  425. (size > 27 && size < 1500) && /* normal sized MTU */
  426. (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
  427. (buf[23] == 17) && /* ip.protocol == UDP */
  428. (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
  429. /* FIXME this cast is evil */
  430. net_checksum_calculate((uint8_t *)buf, size);
  431. hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
  432. }
  433. }
  434. static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
  435. const void *buf, size_t size, size_t hdr_len)
  436. {
  437. struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
  438. int offset = 0;
  439. hdr->flags = 0;
  440. hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
  441. if (n->has_vnet_hdr) {
  442. memcpy(hdr, buf, sizeof(*hdr));
  443. offset = sizeof(*hdr);
  444. work_around_broken_dhclient(hdr, buf + offset, size - offset);
  445. }
  446. /* We only ever receive a struct virtio_net_hdr from the tapfd,
  447. * but we may be passing along a larger header to the guest.
  448. */
  449. iov[0].iov_base += hdr_len;
  450. iov[0].iov_len -= hdr_len;
  451. return offset;
  452. }
  453. static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
  454. {
  455. static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
  456. static const uint8_t vlan[] = {0x81, 0x00};
  457. uint8_t *ptr = (uint8_t *)buf;
  458. int i;
  459. if (n->promisc)
  460. return 1;
  461. if (n->has_vnet_hdr) {
  462. ptr += sizeof(struct virtio_net_hdr);
  463. }
  464. if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
  465. int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
  466. if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
  467. return 0;
  468. }
  469. if (ptr[0] & 1) { // multicast
  470. if (!memcmp(ptr, bcast, sizeof(bcast))) {
  471. return !n->nobcast;
  472. } else if (n->nomulti) {
  473. return 0;
  474. } else if (n->allmulti || n->mac_table.multi_overflow) {
  475. return 1;
  476. }
  477. for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
  478. if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
  479. return 1;
  480. }
  481. }
  482. } else { // unicast
  483. if (n->nouni) {
  484. return 0;
  485. } else if (n->alluni || n->mac_table.uni_overflow) {
  486. return 1;
  487. } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
  488. return 1;
  489. }
  490. for (i = 0; i < n->mac_table.first_multi; i++) {
  491. if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
  492. return 1;
  493. }
  494. }
  495. }
  496. return 0;
  497. }
  498. static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
  499. {
  500. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  501. struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
  502. size_t guest_hdr_len, offset, i, host_hdr_len;
  503. if (!virtio_net_can_receive(&n->nic->nc))
  504. return -1;
  505. /* hdr_len refers to the header we supply to the guest */
  506. guest_hdr_len = n->mergeable_rx_bufs ?
  507. sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
  508. host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  509. if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
  510. return 0;
  511. if (!receive_filter(n, buf, size))
  512. return size;
  513. offset = i = 0;
  514. while (offset < size) {
  515. VirtQueueElement elem;
  516. int len, total;
  517. struct iovec sg[VIRTQUEUE_MAX_SIZE];
  518. total = 0;
  519. if (virtqueue_pop(n->rx_vq, &elem) == 0) {
  520. if (i == 0)
  521. return -1;
  522. error_report("virtio-net unexpected empty queue: "
  523. "i %zd mergeable %d offset %zd, size %zd, "
  524. "guest hdr len %zd, host hdr len %zd guest features 0x%x",
  525. i, n->mergeable_rx_bufs, offset, size,
  526. guest_hdr_len, host_hdr_len, n->vdev.guest_features);
  527. exit(1);
  528. }
  529. if (elem.in_num < 1) {
  530. error_report("virtio-net receive queue contains no in buffers");
  531. exit(1);
  532. }
  533. if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
  534. error_report("virtio-net header not in first element");
  535. exit(1);
  536. }
  537. memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
  538. if (i == 0) {
  539. if (n->mergeable_rx_bufs)
  540. mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
  541. offset += receive_header(n, sg, elem.in_num,
  542. buf + offset, size - offset, guest_hdr_len);
  543. total += guest_hdr_len;
  544. }
  545. /* copy in packet. ugh */
  546. len = iov_from_buf(sg, elem.in_num,
  547. buf + offset, 0, size - offset);
  548. total += len;
  549. offset += len;
  550. /* If buffers can't be merged, at this point we
  551. * must have consumed the complete packet.
  552. * Otherwise, drop it. */
  553. if (!n->mergeable_rx_bufs && offset < size) {
  554. #if 0
  555. error_report("virtio-net truncated non-mergeable packet: "
  556. "i %zd mergeable %d offset %zd, size %zd, "
  557. "guest hdr len %zd, host hdr len %zd",
  558. i, n->mergeable_rx_bufs,
  559. offset, size, guest_hdr_len, host_hdr_len);
  560. #endif
  561. return size;
  562. }
  563. /* signal other side */
  564. virtqueue_fill(n->rx_vq, &elem, total, i++);
  565. }
  566. if (mhdr) {
  567. stw_p(&mhdr->num_buffers, i);
  568. }
  569. virtqueue_flush(n->rx_vq, i);
  570. virtio_notify(&n->vdev, n->rx_vq);
  571. return size;
  572. }
  573. static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
  574. static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
  575. {
  576. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  577. virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
  578. virtio_notify(&n->vdev, n->tx_vq);
  579. n->async_tx.elem.out_num = n->async_tx.len = 0;
  580. virtio_queue_set_notification(n->tx_vq, 1);
  581. virtio_net_flush_tx(n, n->tx_vq);
  582. }
  583. /* TX */
  584. static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
  585. {
  586. VirtQueueElement elem;
  587. int32_t num_packets = 0;
  588. if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
  589. return num_packets;
  590. }
  591. assert(n->vdev.vm_running);
  592. if (n->async_tx.elem.out_num) {
  593. virtio_queue_set_notification(n->tx_vq, 0);
  594. return num_packets;
  595. }
  596. while (virtqueue_pop(vq, &elem)) {
  597. ssize_t ret, len = 0;
  598. unsigned int out_num = elem.out_num;
  599. struct iovec *out_sg = &elem.out_sg[0];
  600. unsigned hdr_len;
  601. /* hdr_len refers to the header received from the guest */
  602. hdr_len = n->mergeable_rx_bufs ?
  603. sizeof(struct virtio_net_hdr_mrg_rxbuf) :
  604. sizeof(struct virtio_net_hdr);
  605. if (out_num < 1 || out_sg->iov_len != hdr_len) {
  606. error_report("virtio-net header not in first element");
  607. exit(1);
  608. }
  609. /* ignore the header if GSO is not supported */
  610. if (!n->has_vnet_hdr) {
  611. out_num--;
  612. out_sg++;
  613. len += hdr_len;
  614. } else if (n->mergeable_rx_bufs) {
  615. /* tapfd expects a struct virtio_net_hdr */
  616. hdr_len -= sizeof(struct virtio_net_hdr);
  617. out_sg->iov_len -= hdr_len;
  618. len += hdr_len;
  619. }
  620. ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
  621. virtio_net_tx_complete);
  622. if (ret == 0) {
  623. virtio_queue_set_notification(n->tx_vq, 0);
  624. n->async_tx.elem = elem;
  625. n->async_tx.len = len;
  626. return -EBUSY;
  627. }
  628. len += ret;
  629. virtqueue_push(vq, &elem, len);
  630. virtio_notify(&n->vdev, vq);
  631. if (++num_packets >= n->tx_burst) {
  632. break;
  633. }
  634. }
  635. return num_packets;
  636. }
  637. static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
  638. {
  639. VirtIONet *n = to_virtio_net(vdev);
  640. /* This happens when device was stopped but VCPU wasn't. */
  641. if (!n->vdev.vm_running) {
  642. n->tx_waiting = 1;
  643. return;
  644. }
  645. if (n->tx_waiting) {
  646. virtio_queue_set_notification(vq, 1);
  647. qemu_del_timer(n->tx_timer);
  648. n->tx_waiting = 0;
  649. virtio_net_flush_tx(n, vq);
  650. } else {
  651. qemu_mod_timer(n->tx_timer,
  652. qemu_get_clock_ns(vm_clock) + n->tx_timeout);
  653. n->tx_waiting = 1;
  654. virtio_queue_set_notification(vq, 0);
  655. }
  656. }
  657. static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
  658. {
  659. VirtIONet *n = to_virtio_net(vdev);
  660. if (unlikely(n->tx_waiting)) {
  661. return;
  662. }
  663. n->tx_waiting = 1;
  664. /* This happens when device was stopped but VCPU wasn't. */
  665. if (!n->vdev.vm_running) {
  666. return;
  667. }
  668. virtio_queue_set_notification(vq, 0);
  669. qemu_bh_schedule(n->tx_bh);
  670. }
  671. static void virtio_net_tx_timer(void *opaque)
  672. {
  673. VirtIONet *n = opaque;
  674. assert(n->vdev.vm_running);
  675. n->tx_waiting = 0;
  676. /* Just in case the driver is not ready on more */
  677. if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
  678. return;
  679. virtio_queue_set_notification(n->tx_vq, 1);
  680. virtio_net_flush_tx(n, n->tx_vq);
  681. }
  682. static void virtio_net_tx_bh(void *opaque)
  683. {
  684. VirtIONet *n = opaque;
  685. int32_t ret;
  686. assert(n->vdev.vm_running);
  687. n->tx_waiting = 0;
  688. /* Just in case the driver is not ready on more */
  689. if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
  690. return;
  691. ret = virtio_net_flush_tx(n, n->tx_vq);
  692. if (ret == -EBUSY) {
  693. return; /* Notification re-enable handled by tx_complete */
  694. }
  695. /* If we flush a full burst of packets, assume there are
  696. * more coming and immediately reschedule */
  697. if (ret >= n->tx_burst) {
  698. qemu_bh_schedule(n->tx_bh);
  699. n->tx_waiting = 1;
  700. return;
  701. }
  702. /* If less than a full burst, re-enable notification and flush
  703. * anything that may have come in while we weren't looking. If
  704. * we find something, assume the guest is still active and reschedule */
  705. virtio_queue_set_notification(n->tx_vq, 1);
  706. if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
  707. virtio_queue_set_notification(n->tx_vq, 0);
  708. qemu_bh_schedule(n->tx_bh);
  709. n->tx_waiting = 1;
  710. }
  711. }
  712. static void virtio_net_save(QEMUFile *f, void *opaque)
  713. {
  714. VirtIONet *n = opaque;
  715. /* At this point, backend must be stopped, otherwise
  716. * it might keep writing to memory. */
  717. assert(!n->vhost_started);
  718. virtio_save(&n->vdev, f);
  719. qemu_put_buffer(f, n->mac, ETH_ALEN);
  720. qemu_put_be32(f, n->tx_waiting);
  721. qemu_put_be32(f, n->mergeable_rx_bufs);
  722. qemu_put_be16(f, n->status);
  723. qemu_put_byte(f, n->promisc);
  724. qemu_put_byte(f, n->allmulti);
  725. qemu_put_be32(f, n->mac_table.in_use);
  726. qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
  727. qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
  728. qemu_put_be32(f, n->has_vnet_hdr);
  729. qemu_put_byte(f, n->mac_table.multi_overflow);
  730. qemu_put_byte(f, n->mac_table.uni_overflow);
  731. qemu_put_byte(f, n->alluni);
  732. qemu_put_byte(f, n->nomulti);
  733. qemu_put_byte(f, n->nouni);
  734. qemu_put_byte(f, n->nobcast);
  735. qemu_put_byte(f, n->has_ufo);
  736. }
  737. static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
  738. {
  739. VirtIONet *n = opaque;
  740. int i;
  741. if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
  742. return -EINVAL;
  743. virtio_load(&n->vdev, f);
  744. qemu_get_buffer(f, n->mac, ETH_ALEN);
  745. n->tx_waiting = qemu_get_be32(f);
  746. n->mergeable_rx_bufs = qemu_get_be32(f);
  747. if (version_id >= 3)
  748. n->status = qemu_get_be16(f);
  749. if (version_id >= 4) {
  750. if (version_id < 8) {
  751. n->promisc = qemu_get_be32(f);
  752. n->allmulti = qemu_get_be32(f);
  753. } else {
  754. n->promisc = qemu_get_byte(f);
  755. n->allmulti = qemu_get_byte(f);
  756. }
  757. }
  758. if (version_id >= 5) {
  759. n->mac_table.in_use = qemu_get_be32(f);
  760. /* MAC_TABLE_ENTRIES may be different from the saved image */
  761. if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
  762. qemu_get_buffer(f, n->mac_table.macs,
  763. n->mac_table.in_use * ETH_ALEN);
  764. } else if (n->mac_table.in_use) {
  765. qemu_fseek(f, n->mac_table.in_use * ETH_ALEN, SEEK_CUR);
  766. n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
  767. n->mac_table.in_use = 0;
  768. }
  769. }
  770. if (version_id >= 6)
  771. qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
  772. if (version_id >= 7) {
  773. if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
  774. error_report("virtio-net: saved image requires vnet_hdr=on");
  775. return -1;
  776. }
  777. if (n->has_vnet_hdr) {
  778. tap_using_vnet_hdr(n->nic->nc.peer, 1);
  779. tap_set_offload(n->nic->nc.peer,
  780. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
  781. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
  782. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
  783. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN) & 1,
  784. (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) & 1);
  785. }
  786. }
  787. if (version_id >= 9) {
  788. n->mac_table.multi_overflow = qemu_get_byte(f);
  789. n->mac_table.uni_overflow = qemu_get_byte(f);
  790. }
  791. if (version_id >= 10) {
  792. n->alluni = qemu_get_byte(f);
  793. n->nomulti = qemu_get_byte(f);
  794. n->nouni = qemu_get_byte(f);
  795. n->nobcast = qemu_get_byte(f);
  796. }
  797. if (version_id >= 11) {
  798. if (qemu_get_byte(f) && !peer_has_ufo(n)) {
  799. error_report("virtio-net: saved image requires TUN_F_UFO support");
  800. return -1;
  801. }
  802. }
  803. /* Find the first multicast entry in the saved MAC filter */
  804. for (i = 0; i < n->mac_table.in_use; i++) {
  805. if (n->mac_table.macs[i * ETH_ALEN] & 1) {
  806. break;
  807. }
  808. }
  809. n->mac_table.first_multi = i;
  810. return 0;
  811. }
  812. static void virtio_net_cleanup(VLANClientState *nc)
  813. {
  814. VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
  815. n->nic = NULL;
  816. }
  817. static NetClientInfo net_virtio_info = {
  818. .type = NET_CLIENT_TYPE_NIC,
  819. .size = sizeof(NICState),
  820. .can_receive = virtio_net_can_receive,
  821. .receive = virtio_net_receive,
  822. .cleanup = virtio_net_cleanup,
  823. .link_status_changed = virtio_net_set_link_status,
  824. };
  825. VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
  826. virtio_net_conf *net)
  827. {
  828. VirtIONet *n;
  829. n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
  830. sizeof(struct virtio_net_config),
  831. sizeof(VirtIONet));
  832. n->vdev.get_config = virtio_net_get_config;
  833. n->vdev.set_config = virtio_net_set_config;
  834. n->vdev.get_features = virtio_net_get_features;
  835. n->vdev.set_features = virtio_net_set_features;
  836. n->vdev.bad_features = virtio_net_bad_features;
  837. n->vdev.reset = virtio_net_reset;
  838. n->vdev.set_status = virtio_net_set_status;
  839. n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
  840. if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
  841. error_report("virtio-net: "
  842. "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
  843. net->tx);
  844. error_report("Defaulting to \"bh\"");
  845. }
  846. if (net->tx && !strcmp(net->tx, "timer")) {
  847. n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
  848. n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
  849. n->tx_timeout = net->txtimer;
  850. } else {
  851. n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
  852. n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
  853. }
  854. n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
  855. qemu_macaddr_default_if_unset(&conf->macaddr);
  856. memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
  857. n->status = VIRTIO_NET_S_LINK_UP;
  858. n->nic = qemu_new_nic(&net_virtio_info, conf, dev->info->name, dev->id, n);
  859. qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
  860. n->tx_waiting = 0;
  861. n->tx_burst = net->txburst;
  862. n->mergeable_rx_bufs = 0;
  863. n->promisc = 1; /* for compatibility */
  864. n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
  865. n->vlans = g_malloc0(MAX_VLAN >> 3);
  866. n->qdev = dev;
  867. register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
  868. virtio_net_save, virtio_net_load, n);
  869. add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
  870. return &n->vdev;
  871. }
  872. void virtio_net_exit(VirtIODevice *vdev)
  873. {
  874. VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
  875. /* This will stop vhost backend if appropriate. */
  876. virtio_net_set_status(vdev, 0);
  877. qemu_purge_queued_packets(&n->nic->nc);
  878. unregister_savevm(n->qdev, "virtio-net", n);
  879. g_free(n->mac_table.macs);
  880. g_free(n->vlans);
  881. if (n->tx_timer) {
  882. qemu_del_timer(n->tx_timer);
  883. qemu_free_timer(n->tx_timer);
  884. } else {
  885. qemu_bh_delete(n->tx_bh);
  886. }
  887. qemu_del_vlan_client(&n->nic->nc);
  888. virtio_cleanup(&n->vdev);
  889. }