2
0

tap.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "tap_int.h"
  27. #include <sys/ioctl.h>
  28. #include <sys/wait.h>
  29. #include <sys/socket.h>
  30. #include <net/if.h>
  31. #include "net/eth.h"
  32. #include "net/net.h"
  33. #include "clients.h"
  34. #include "monitor/monitor.h"
  35. #include "sysemu/sysemu.h"
  36. #include "qapi/error.h"
  37. #include "qemu/cutils.h"
  38. #include "qemu/error-report.h"
  39. #include "qemu/main-loop.h"
  40. #include "qemu/sockets.h"
  41. #include "net/tap.h"
  42. #include "net/vhost_net.h"
  43. typedef struct TAPState {
  44. NetClientState nc;
  45. int fd;
  46. char down_script[1024];
  47. char down_script_arg[128];
  48. uint8_t buf[NET_BUFSIZE];
  49. bool read_poll;
  50. bool write_poll;
  51. bool using_vnet_hdr;
  52. bool has_ufo;
  53. bool enabled;
  54. VHostNetState *vhost_net;
  55. unsigned host_vnet_hdr_len;
  56. Notifier exit;
  57. } TAPState;
  58. static void launch_script(const char *setup_script, const char *ifname,
  59. int fd, Error **errp);
  60. static void tap_send(void *opaque);
  61. static void tap_writable(void *opaque);
  62. static void tap_update_fd_handler(TAPState *s)
  63. {
  64. qemu_set_fd_handler(s->fd,
  65. s->read_poll && s->enabled ? tap_send : NULL,
  66. s->write_poll && s->enabled ? tap_writable : NULL,
  67. s);
  68. }
  69. static void tap_read_poll(TAPState *s, bool enable)
  70. {
  71. s->read_poll = enable;
  72. tap_update_fd_handler(s);
  73. }
  74. static void tap_write_poll(TAPState *s, bool enable)
  75. {
  76. s->write_poll = enable;
  77. tap_update_fd_handler(s);
  78. }
  79. static void tap_writable(void *opaque)
  80. {
  81. TAPState *s = opaque;
  82. tap_write_poll(s, false);
  83. qemu_flush_queued_packets(&s->nc);
  84. }
  85. static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
  86. {
  87. ssize_t len;
  88. len = RETRY_ON_EINTR(writev(s->fd, iov, iovcnt));
  89. if (len == -1 && errno == EAGAIN) {
  90. tap_write_poll(s, true);
  91. return 0;
  92. }
  93. return len;
  94. }
  95. static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
  96. int iovcnt)
  97. {
  98. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  99. const struct iovec *iovp = iov;
  100. struct iovec iov_copy[iovcnt + 1];
  101. struct virtio_net_hdr_mrg_rxbuf hdr = { };
  102. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  103. iov_copy[0].iov_base = &hdr;
  104. iov_copy[0].iov_len = s->host_vnet_hdr_len;
  105. memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
  106. iovp = iov_copy;
  107. iovcnt++;
  108. }
  109. return tap_write_packet(s, iovp, iovcnt);
  110. }
  111. static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
  112. {
  113. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  114. struct iovec iov[2];
  115. int iovcnt = 0;
  116. struct virtio_net_hdr_mrg_rxbuf hdr = { };
  117. if (s->host_vnet_hdr_len) {
  118. iov[iovcnt].iov_base = &hdr;
  119. iov[iovcnt].iov_len = s->host_vnet_hdr_len;
  120. iovcnt++;
  121. }
  122. iov[iovcnt].iov_base = (char *)buf;
  123. iov[iovcnt].iov_len = size;
  124. iovcnt++;
  125. return tap_write_packet(s, iov, iovcnt);
  126. }
  127. static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
  128. {
  129. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  130. struct iovec iov[1];
  131. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  132. return tap_receive_raw(nc, buf, size);
  133. }
  134. iov[0].iov_base = (char *)buf;
  135. iov[0].iov_len = size;
  136. return tap_write_packet(s, iov, 1);
  137. }
  138. #ifndef __sun__
  139. ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
  140. {
  141. return read(tapfd, buf, maxlen);
  142. }
  143. #endif
  144. static void tap_send_completed(NetClientState *nc, ssize_t len)
  145. {
  146. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  147. tap_read_poll(s, true);
  148. }
  149. static void tap_send(void *opaque)
  150. {
  151. TAPState *s = opaque;
  152. int size;
  153. int packets = 0;
  154. while (true) {
  155. uint8_t *buf = s->buf;
  156. uint8_t min_pkt[ETH_ZLEN];
  157. size_t min_pktsz = sizeof(min_pkt);
  158. size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
  159. if (size <= 0) {
  160. break;
  161. }
  162. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  163. buf += s->host_vnet_hdr_len;
  164. size -= s->host_vnet_hdr_len;
  165. }
  166. if (net_peer_needs_padding(&s->nc)) {
  167. if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
  168. buf = min_pkt;
  169. size = min_pktsz;
  170. }
  171. }
  172. size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
  173. if (size == 0) {
  174. tap_read_poll(s, false);
  175. break;
  176. } else if (size < 0) {
  177. break;
  178. }
  179. /*
  180. * When the host keeps receiving more packets while tap_send() is
  181. * running we can hog the QEMU global mutex. Limit the number of
  182. * packets that are processed per tap_send() callback to prevent
  183. * stalling the guest.
  184. */
  185. packets++;
  186. if (packets >= 50) {
  187. break;
  188. }
  189. }
  190. }
  191. static bool tap_has_ufo(NetClientState *nc)
  192. {
  193. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  194. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  195. return s->has_ufo;
  196. }
  197. static bool tap_has_vnet_hdr(NetClientState *nc)
  198. {
  199. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  200. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  201. return !!s->host_vnet_hdr_len;
  202. }
  203. static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
  204. {
  205. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  206. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  207. return !!tap_probe_vnet_hdr_len(s->fd, len);
  208. }
  209. static int tap_get_vnet_hdr_len(NetClientState *nc)
  210. {
  211. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  212. return s->host_vnet_hdr_len;
  213. }
  214. static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
  215. {
  216. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  217. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  218. assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
  219. len == sizeof(struct virtio_net_hdr) ||
  220. len == sizeof(struct virtio_net_hdr_v1_hash));
  221. tap_fd_set_vnet_hdr_len(s->fd, len);
  222. s->host_vnet_hdr_len = len;
  223. }
  224. static bool tap_get_using_vnet_hdr(NetClientState *nc)
  225. {
  226. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  227. return s->using_vnet_hdr;
  228. }
  229. static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
  230. {
  231. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  232. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  233. assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
  234. s->using_vnet_hdr = using_vnet_hdr;
  235. }
  236. static int tap_set_vnet_le(NetClientState *nc, bool is_le)
  237. {
  238. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  239. return tap_fd_set_vnet_le(s->fd, is_le);
  240. }
  241. static int tap_set_vnet_be(NetClientState *nc, bool is_be)
  242. {
  243. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  244. return tap_fd_set_vnet_be(s->fd, is_be);
  245. }
  246. static void tap_set_offload(NetClientState *nc, int csum, int tso4,
  247. int tso6, int ecn, int ufo)
  248. {
  249. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  250. if (s->fd < 0) {
  251. return;
  252. }
  253. tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
  254. }
  255. static void tap_exit_notify(Notifier *notifier, void *data)
  256. {
  257. TAPState *s = container_of(notifier, TAPState, exit);
  258. Error *err = NULL;
  259. if (s->down_script[0]) {
  260. launch_script(s->down_script, s->down_script_arg, s->fd, &err);
  261. if (err) {
  262. error_report_err(err);
  263. }
  264. }
  265. }
  266. static void tap_cleanup(NetClientState *nc)
  267. {
  268. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  269. if (s->vhost_net) {
  270. vhost_net_cleanup(s->vhost_net);
  271. g_free(s->vhost_net);
  272. s->vhost_net = NULL;
  273. }
  274. qemu_purge_queued_packets(nc);
  275. tap_exit_notify(&s->exit, NULL);
  276. qemu_remove_exit_notifier(&s->exit);
  277. tap_read_poll(s, false);
  278. tap_write_poll(s, false);
  279. close(s->fd);
  280. s->fd = -1;
  281. }
  282. static void tap_poll(NetClientState *nc, bool enable)
  283. {
  284. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  285. tap_read_poll(s, enable);
  286. tap_write_poll(s, enable);
  287. }
  288. static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
  289. {
  290. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  291. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  292. return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
  293. }
  294. int tap_get_fd(NetClientState *nc)
  295. {
  296. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  297. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  298. return s->fd;
  299. }
  300. /* fd support */
  301. static NetClientInfo net_tap_info = {
  302. .type = NET_CLIENT_DRIVER_TAP,
  303. .size = sizeof(TAPState),
  304. .receive = tap_receive,
  305. .receive_raw = tap_receive_raw,
  306. .receive_iov = tap_receive_iov,
  307. .poll = tap_poll,
  308. .cleanup = tap_cleanup,
  309. .has_ufo = tap_has_ufo,
  310. .has_vnet_hdr = tap_has_vnet_hdr,
  311. .has_vnet_hdr_len = tap_has_vnet_hdr_len,
  312. .get_using_vnet_hdr = tap_get_using_vnet_hdr,
  313. .using_vnet_hdr = tap_using_vnet_hdr,
  314. .set_offload = tap_set_offload,
  315. .get_vnet_hdr_len = tap_get_vnet_hdr_len,
  316. .set_vnet_hdr_len = tap_set_vnet_hdr_len,
  317. .set_vnet_le = tap_set_vnet_le,
  318. .set_vnet_be = tap_set_vnet_be,
  319. .set_steering_ebpf = tap_set_steering_ebpf,
  320. };
  321. static TAPState *net_tap_fd_init(NetClientState *peer,
  322. const char *model,
  323. const char *name,
  324. int fd,
  325. int vnet_hdr)
  326. {
  327. NetClientState *nc;
  328. TAPState *s;
  329. nc = qemu_new_net_client(&net_tap_info, peer, model, name);
  330. s = DO_UPCAST(TAPState, nc, nc);
  331. s->fd = fd;
  332. s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  333. s->using_vnet_hdr = false;
  334. s->has_ufo = tap_probe_has_ufo(s->fd);
  335. s->enabled = true;
  336. tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
  337. /*
  338. * Make sure host header length is set correctly in tap:
  339. * it might have been modified by another instance of qemu.
  340. */
  341. if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
  342. tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
  343. }
  344. tap_read_poll(s, true);
  345. s->vhost_net = NULL;
  346. s->exit.notify = tap_exit_notify;
  347. qemu_add_exit_notifier(&s->exit);
  348. return s;
  349. }
  350. static void launch_script(const char *setup_script, const char *ifname,
  351. int fd, Error **errp)
  352. {
  353. int pid, status;
  354. char *args[3];
  355. char **parg;
  356. /* try to launch network script */
  357. pid = fork();
  358. if (pid < 0) {
  359. error_setg_errno(errp, errno, "could not launch network script %s",
  360. setup_script);
  361. return;
  362. }
  363. if (pid == 0) {
  364. int open_max = sysconf(_SC_OPEN_MAX), i;
  365. for (i = 3; i < open_max; i++) {
  366. if (i != fd) {
  367. close(i);
  368. }
  369. }
  370. parg = args;
  371. *parg++ = (char *)setup_script;
  372. *parg++ = (char *)ifname;
  373. *parg = NULL;
  374. execv(setup_script, args);
  375. _exit(1);
  376. } else {
  377. while (waitpid(pid, &status, 0) != pid) {
  378. /* loop */
  379. }
  380. if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
  381. return;
  382. }
  383. error_setg(errp, "network script %s failed with status %d",
  384. setup_script, status);
  385. }
  386. }
  387. static int recv_fd(int c)
  388. {
  389. int fd;
  390. uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
  391. struct msghdr msg = {
  392. .msg_control = msgbuf,
  393. .msg_controllen = sizeof(msgbuf),
  394. };
  395. struct cmsghdr *cmsg;
  396. struct iovec iov;
  397. uint8_t req[1];
  398. ssize_t len;
  399. cmsg = CMSG_FIRSTHDR(&msg);
  400. cmsg->cmsg_level = SOL_SOCKET;
  401. cmsg->cmsg_type = SCM_RIGHTS;
  402. cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
  403. msg.msg_controllen = cmsg->cmsg_len;
  404. iov.iov_base = req;
  405. iov.iov_len = sizeof(req);
  406. msg.msg_iov = &iov;
  407. msg.msg_iovlen = 1;
  408. len = recvmsg(c, &msg, 0);
  409. if (len > 0) {
  410. memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
  411. return fd;
  412. }
  413. return len;
  414. }
  415. static int net_bridge_run_helper(const char *helper, const char *bridge,
  416. Error **errp)
  417. {
  418. sigset_t oldmask, mask;
  419. g_autofree char *default_helper = NULL;
  420. int pid, status;
  421. char *args[5];
  422. char **parg;
  423. int sv[2];
  424. sigemptyset(&mask);
  425. sigaddset(&mask, SIGCHLD);
  426. sigprocmask(SIG_BLOCK, &mask, &oldmask);
  427. if (!helper) {
  428. helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
  429. }
  430. if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
  431. error_setg_errno(errp, errno, "socketpair() failed");
  432. return -1;
  433. }
  434. /* try to launch bridge helper */
  435. pid = fork();
  436. if (pid < 0) {
  437. error_setg_errno(errp, errno, "Can't fork bridge helper");
  438. return -1;
  439. }
  440. if (pid == 0) {
  441. int open_max = sysconf(_SC_OPEN_MAX), i;
  442. char *fd_buf = NULL;
  443. char *br_buf = NULL;
  444. char *helper_cmd = NULL;
  445. for (i = 3; i < open_max; i++) {
  446. if (i != sv[1]) {
  447. close(i);
  448. }
  449. }
  450. fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
  451. if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
  452. /* assume helper is a command */
  453. if (strstr(helper, "--br=") == NULL) {
  454. br_buf = g_strdup_printf("%s%s", "--br=", bridge);
  455. }
  456. helper_cmd = g_strdup_printf("%s %s %s %s", helper,
  457. "--use-vnet", fd_buf, br_buf ? br_buf : "");
  458. parg = args;
  459. *parg++ = (char *)"sh";
  460. *parg++ = (char *)"-c";
  461. *parg++ = helper_cmd;
  462. *parg++ = NULL;
  463. execv("/bin/sh", args);
  464. g_free(helper_cmd);
  465. } else {
  466. /* assume helper is just the executable path name */
  467. br_buf = g_strdup_printf("%s%s", "--br=", bridge);
  468. parg = args;
  469. *parg++ = (char *)helper;
  470. *parg++ = (char *)"--use-vnet";
  471. *parg++ = fd_buf;
  472. *parg++ = br_buf;
  473. *parg++ = NULL;
  474. execv(helper, args);
  475. }
  476. g_free(fd_buf);
  477. g_free(br_buf);
  478. _exit(1);
  479. } else {
  480. int fd;
  481. int saved_errno;
  482. close(sv[1]);
  483. fd = RETRY_ON_EINTR(recv_fd(sv[0]));
  484. saved_errno = errno;
  485. close(sv[0]);
  486. while (waitpid(pid, &status, 0) != pid) {
  487. /* loop */
  488. }
  489. sigprocmask(SIG_SETMASK, &oldmask, NULL);
  490. if (fd < 0) {
  491. error_setg_errno(errp, saved_errno,
  492. "failed to recv file descriptor");
  493. return -1;
  494. }
  495. if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
  496. error_setg(errp, "bridge helper failed");
  497. return -1;
  498. }
  499. return fd;
  500. }
  501. }
  502. int net_init_bridge(const Netdev *netdev, const char *name,
  503. NetClientState *peer, Error **errp)
  504. {
  505. const NetdevBridgeOptions *bridge;
  506. const char *helper, *br;
  507. TAPState *s;
  508. int fd, vnet_hdr;
  509. assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
  510. bridge = &netdev->u.bridge;
  511. helper = bridge->helper;
  512. br = bridge->br ?: DEFAULT_BRIDGE_INTERFACE;
  513. fd = net_bridge_run_helper(helper, br, errp);
  514. if (fd == -1) {
  515. return -1;
  516. }
  517. if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
  518. error_setg_errno(errp, errno, "Failed to set FD nonblocking");
  519. return -1;
  520. }
  521. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  522. if (vnet_hdr < 0) {
  523. close(fd);
  524. return -1;
  525. }
  526. s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
  527. qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br);
  528. return 0;
  529. }
  530. static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
  531. const char *setup_script, char *ifname,
  532. size_t ifname_sz, int mq_required, Error **errp)
  533. {
  534. Error *err = NULL;
  535. int fd, vnet_hdr_required;
  536. if (tap->has_vnet_hdr) {
  537. *vnet_hdr = tap->vnet_hdr;
  538. vnet_hdr_required = *vnet_hdr;
  539. } else {
  540. *vnet_hdr = 1;
  541. vnet_hdr_required = 0;
  542. }
  543. fd = RETRY_ON_EINTR(tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
  544. mq_required, errp));
  545. if (fd < 0) {
  546. return -1;
  547. }
  548. if (setup_script &&
  549. setup_script[0] != '\0' &&
  550. strcmp(setup_script, "no") != 0) {
  551. launch_script(setup_script, ifname, fd, &err);
  552. if (err) {
  553. error_propagate(errp, err);
  554. close(fd);
  555. return -1;
  556. }
  557. }
  558. return fd;
  559. }
  560. #define MAX_TAP_QUEUES 1024
  561. static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
  562. const char *model, const char *name,
  563. const char *ifname, const char *script,
  564. const char *downscript, const char *vhostfdname,
  565. int vnet_hdr, int fd, Error **errp)
  566. {
  567. Error *err = NULL;
  568. TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
  569. int vhostfd;
  570. tap_set_sndbuf(s->fd, tap, &err);
  571. if (err) {
  572. error_propagate(errp, err);
  573. goto failed;
  574. }
  575. if (tap->fd || tap->fds) {
  576. qemu_set_info_str(&s->nc, "fd=%d", fd);
  577. } else if (tap->helper) {
  578. qemu_set_info_str(&s->nc, "helper=%s", tap->helper);
  579. } else {
  580. qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname,
  581. script, downscript);
  582. if (strcmp(downscript, "no") != 0) {
  583. snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
  584. snprintf(s->down_script_arg, sizeof(s->down_script_arg),
  585. "%s", ifname);
  586. }
  587. }
  588. if (tap->has_vhost ? tap->vhost :
  589. vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
  590. VhostNetOptions options;
  591. options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
  592. options.net_backend = &s->nc;
  593. if (tap->has_poll_us) {
  594. options.busyloop_timeout = tap->poll_us;
  595. } else {
  596. options.busyloop_timeout = 0;
  597. }
  598. if (vhostfdname) {
  599. vhostfd = monitor_fd_param(monitor_cur(), vhostfdname, &err);
  600. if (vhostfd == -1) {
  601. if (tap->has_vhostforce && tap->vhostforce) {
  602. error_propagate(errp, err);
  603. } else {
  604. warn_report_err(err);
  605. }
  606. goto failed;
  607. }
  608. if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
  609. error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
  610. name, fd);
  611. goto failed;
  612. }
  613. } else {
  614. vhostfd = open("/dev/vhost-net", O_RDWR);
  615. if (vhostfd < 0) {
  616. if (tap->has_vhostforce && tap->vhostforce) {
  617. error_setg_errno(errp, errno,
  618. "tap: open vhost char device failed");
  619. } else {
  620. warn_report("tap: open vhost char device failed: %s",
  621. strerror(errno));
  622. }
  623. goto failed;
  624. }
  625. if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
  626. error_setg_errno(errp, errno, "Failed to set FD nonblocking");
  627. goto failed;
  628. }
  629. }
  630. options.opaque = (void *)(uintptr_t)vhostfd;
  631. options.nvqs = 2;
  632. s->vhost_net = vhost_net_init(&options);
  633. if (!s->vhost_net) {
  634. if (tap->has_vhostforce && tap->vhostforce) {
  635. error_setg(errp, VHOST_NET_INIT_FAILED);
  636. } else {
  637. warn_report(VHOST_NET_INIT_FAILED);
  638. }
  639. goto failed;
  640. }
  641. } else if (vhostfdname) {
  642. error_setg(errp, "vhostfd(s)= is not valid without vhost");
  643. goto failed;
  644. }
  645. return;
  646. failed:
  647. qemu_del_net_client(&s->nc);
  648. }
  649. static int get_fds(char *str, char *fds[], int max)
  650. {
  651. char *ptr = str, *this;
  652. size_t len = strlen(str);
  653. int i = 0;
  654. while (i < max && ptr < str + len) {
  655. this = strchr(ptr, ':');
  656. if (this == NULL) {
  657. fds[i] = g_strdup(ptr);
  658. } else {
  659. fds[i] = g_strndup(ptr, this - ptr);
  660. }
  661. i++;
  662. if (this == NULL) {
  663. break;
  664. } else {
  665. ptr = this + 1;
  666. }
  667. }
  668. return i;
  669. }
  670. int net_init_tap(const Netdev *netdev, const char *name,
  671. NetClientState *peer, Error **errp)
  672. {
  673. const NetdevTapOptions *tap;
  674. int fd, vnet_hdr = 0, i = 0, queues;
  675. /* for the no-fd, no-helper case */
  676. const char *script;
  677. const char *downscript;
  678. Error *err = NULL;
  679. const char *vhostfdname;
  680. char ifname[128];
  681. int ret = 0;
  682. assert(netdev->type == NET_CLIENT_DRIVER_TAP);
  683. tap = &netdev->u.tap;
  684. queues = tap->has_queues ? tap->queues : 1;
  685. vhostfdname = tap->vhostfd;
  686. script = tap->script;
  687. downscript = tap->downscript;
  688. /* QEMU hubs do not support multiqueue tap, in this case peer is set.
  689. * For -netdev, peer is always NULL. */
  690. if (peer && (tap->has_queues || tap->fds || tap->vhostfds)) {
  691. error_setg(errp, "Multiqueue tap cannot be used with hubs");
  692. return -1;
  693. }
  694. if (tap->fd) {
  695. if (tap->ifname || tap->script || tap->downscript ||
  696. tap->has_vnet_hdr || tap->helper || tap->has_queues ||
  697. tap->fds || tap->vhostfds) {
  698. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  699. "helper=, queues=, fds=, and vhostfds= "
  700. "are invalid with fd=");
  701. return -1;
  702. }
  703. fd = monitor_fd_param(monitor_cur(), tap->fd, errp);
  704. if (fd == -1) {
  705. return -1;
  706. }
  707. if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
  708. error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
  709. name, fd);
  710. close(fd);
  711. return -1;
  712. }
  713. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  714. if (vnet_hdr < 0) {
  715. close(fd);
  716. return -1;
  717. }
  718. net_init_tap_one(tap, peer, "tap", name, NULL,
  719. script, downscript,
  720. vhostfdname, vnet_hdr, fd, &err);
  721. if (err) {
  722. error_propagate(errp, err);
  723. close(fd);
  724. return -1;
  725. }
  726. } else if (tap->fds) {
  727. char **fds;
  728. char **vhost_fds;
  729. int nfds = 0, nvhosts = 0;
  730. if (tap->ifname || tap->script || tap->downscript ||
  731. tap->has_vnet_hdr || tap->helper || tap->has_queues ||
  732. tap->vhostfd) {
  733. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  734. "helper=, queues=, and vhostfd= "
  735. "are invalid with fds=");
  736. return -1;
  737. }
  738. fds = g_new0(char *, MAX_TAP_QUEUES);
  739. vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
  740. nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
  741. if (tap->vhostfds) {
  742. nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
  743. if (nfds != nvhosts) {
  744. error_setg(errp, "The number of fds passed does not match "
  745. "the number of vhostfds passed");
  746. ret = -1;
  747. goto free_fail;
  748. }
  749. }
  750. for (i = 0; i < nfds; i++) {
  751. fd = monitor_fd_param(monitor_cur(), fds[i], errp);
  752. if (fd == -1) {
  753. ret = -1;
  754. goto free_fail;
  755. }
  756. ret = g_unix_set_fd_nonblocking(fd, true, NULL);
  757. if (!ret) {
  758. error_setg_errno(errp, errno, "%s: Can't use file descriptor %d",
  759. name, fd);
  760. goto free_fail;
  761. }
  762. if (i == 0) {
  763. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  764. if (vnet_hdr < 0) {
  765. ret = -1;
  766. goto free_fail;
  767. }
  768. } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
  769. error_setg(errp,
  770. "vnet_hdr not consistent across given tap fds");
  771. ret = -1;
  772. goto free_fail;
  773. }
  774. net_init_tap_one(tap, peer, "tap", name, ifname,
  775. script, downscript,
  776. tap->vhostfds ? vhost_fds[i] : NULL,
  777. vnet_hdr, fd, &err);
  778. if (err) {
  779. error_propagate(errp, err);
  780. ret = -1;
  781. goto free_fail;
  782. }
  783. }
  784. free_fail:
  785. for (i = 0; i < nvhosts; i++) {
  786. g_free(vhost_fds[i]);
  787. }
  788. for (i = 0; i < nfds; i++) {
  789. g_free(fds[i]);
  790. }
  791. g_free(fds);
  792. g_free(vhost_fds);
  793. return ret;
  794. } else if (tap->helper) {
  795. if (tap->ifname || tap->script || tap->downscript ||
  796. tap->has_vnet_hdr || tap->has_queues || tap->vhostfds) {
  797. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  798. "queues=, and vhostfds= are invalid with helper=");
  799. return -1;
  800. }
  801. fd = net_bridge_run_helper(tap->helper,
  802. tap->br ?: DEFAULT_BRIDGE_INTERFACE,
  803. errp);
  804. if (fd == -1) {
  805. return -1;
  806. }
  807. if (!g_unix_set_fd_nonblocking(fd, true, NULL)) {
  808. error_setg_errno(errp, errno, "Failed to set FD nonblocking");
  809. return -1;
  810. }
  811. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  812. if (vnet_hdr < 0) {
  813. close(fd);
  814. return -1;
  815. }
  816. net_init_tap_one(tap, peer, "bridge", name, ifname,
  817. script, downscript, vhostfdname,
  818. vnet_hdr, fd, &err);
  819. if (err) {
  820. error_propagate(errp, err);
  821. close(fd);
  822. return -1;
  823. }
  824. } else {
  825. g_autofree char *default_script = NULL;
  826. g_autofree char *default_downscript = NULL;
  827. if (tap->vhostfds) {
  828. error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
  829. return -1;
  830. }
  831. if (!script) {
  832. script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
  833. }
  834. if (!downscript) {
  835. downscript = default_downscript =
  836. get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
  837. }
  838. if (tap->ifname) {
  839. pstrcpy(ifname, sizeof ifname, tap->ifname);
  840. } else {
  841. ifname[0] = '\0';
  842. }
  843. for (i = 0; i < queues; i++) {
  844. fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
  845. ifname, sizeof ifname, queues > 1, errp);
  846. if (fd == -1) {
  847. return -1;
  848. }
  849. if (queues > 1 && i == 0 && !tap->ifname) {
  850. if (tap_fd_get_ifname(fd, ifname)) {
  851. error_setg(errp, "Fail to get ifname");
  852. close(fd);
  853. return -1;
  854. }
  855. }
  856. net_init_tap_one(tap, peer, "tap", name, ifname,
  857. i >= 1 ? "no" : script,
  858. i >= 1 ? "no" : downscript,
  859. vhostfdname, vnet_hdr, fd, &err);
  860. if (err) {
  861. error_propagate(errp, err);
  862. close(fd);
  863. return -1;
  864. }
  865. }
  866. }
  867. return 0;
  868. }
  869. VHostNetState *tap_get_vhost_net(NetClientState *nc)
  870. {
  871. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  872. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  873. return s->vhost_net;
  874. }
  875. int tap_enable(NetClientState *nc)
  876. {
  877. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  878. int ret;
  879. if (s->enabled) {
  880. return 0;
  881. } else {
  882. ret = tap_fd_enable(s->fd);
  883. if (ret == 0) {
  884. s->enabled = true;
  885. tap_update_fd_handler(s);
  886. }
  887. return ret;
  888. }
  889. }
  890. int tap_disable(NetClientState *nc)
  891. {
  892. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  893. int ret;
  894. if (s->enabled == 0) {
  895. return 0;
  896. } else {
  897. ret = tap_fd_disable(s->fd);
  898. if (ret == 0) {
  899. qemu_purge_queued_packets(nc);
  900. s->enabled = false;
  901. tap_update_fd_handler(s);
  902. }
  903. return ret;
  904. }
  905. }