tap.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. * Copyright (c) 2009 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "tap_int.h"
  27. #include <sys/ioctl.h>
  28. #include <sys/wait.h>
  29. #include <sys/socket.h>
  30. #include <net/if.h>
  31. #include "net/net.h"
  32. #include "clients.h"
  33. #include "monitor/monitor.h"
  34. #include "sysemu/sysemu.h"
  35. #include "qapi/error.h"
  36. #include "qemu-common.h"
  37. #include "qemu/cutils.h"
  38. #include "qemu/error-report.h"
  39. #include "qemu/main-loop.h"
  40. #include "qemu/sockets.h"
  41. #include "net/tap.h"
  42. #include "net/vhost_net.h"
  43. typedef struct TAPState {
  44. NetClientState nc;
  45. int fd;
  46. char down_script[1024];
  47. char down_script_arg[128];
  48. uint8_t buf[NET_BUFSIZE];
  49. bool read_poll;
  50. bool write_poll;
  51. bool using_vnet_hdr;
  52. bool has_ufo;
  53. bool enabled;
  54. VHostNetState *vhost_net;
  55. unsigned host_vnet_hdr_len;
  56. Notifier exit;
  57. } TAPState;
  58. static void launch_script(const char *setup_script, const char *ifname,
  59. int fd, Error **errp);
  60. static void tap_send(void *opaque);
  61. static void tap_writable(void *opaque);
  62. static void tap_update_fd_handler(TAPState *s)
  63. {
  64. qemu_set_fd_handler(s->fd,
  65. s->read_poll && s->enabled ? tap_send : NULL,
  66. s->write_poll && s->enabled ? tap_writable : NULL,
  67. s);
  68. }
  69. static void tap_read_poll(TAPState *s, bool enable)
  70. {
  71. s->read_poll = enable;
  72. tap_update_fd_handler(s);
  73. }
  74. static void tap_write_poll(TAPState *s, bool enable)
  75. {
  76. s->write_poll = enable;
  77. tap_update_fd_handler(s);
  78. }
  79. static void tap_writable(void *opaque)
  80. {
  81. TAPState *s = opaque;
  82. tap_write_poll(s, false);
  83. qemu_flush_queued_packets(&s->nc);
  84. }
  85. static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
  86. {
  87. ssize_t len;
  88. do {
  89. len = writev(s->fd, iov, iovcnt);
  90. } while (len == -1 && errno == EINTR);
  91. if (len == -1 && errno == EAGAIN) {
  92. tap_write_poll(s, true);
  93. return 0;
  94. }
  95. return len;
  96. }
  97. static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
  98. int iovcnt)
  99. {
  100. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  101. const struct iovec *iovp = iov;
  102. struct iovec iov_copy[iovcnt + 1];
  103. struct virtio_net_hdr_mrg_rxbuf hdr = { };
  104. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  105. iov_copy[0].iov_base = &hdr;
  106. iov_copy[0].iov_len = s->host_vnet_hdr_len;
  107. memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
  108. iovp = iov_copy;
  109. iovcnt++;
  110. }
  111. return tap_write_packet(s, iovp, iovcnt);
  112. }
  113. static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
  114. {
  115. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  116. struct iovec iov[2];
  117. int iovcnt = 0;
  118. struct virtio_net_hdr_mrg_rxbuf hdr = { };
  119. if (s->host_vnet_hdr_len) {
  120. iov[iovcnt].iov_base = &hdr;
  121. iov[iovcnt].iov_len = s->host_vnet_hdr_len;
  122. iovcnt++;
  123. }
  124. iov[iovcnt].iov_base = (char *)buf;
  125. iov[iovcnt].iov_len = size;
  126. iovcnt++;
  127. return tap_write_packet(s, iov, iovcnt);
  128. }
  129. static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
  130. {
  131. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  132. struct iovec iov[1];
  133. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  134. return tap_receive_raw(nc, buf, size);
  135. }
  136. iov[0].iov_base = (char *)buf;
  137. iov[0].iov_len = size;
  138. return tap_write_packet(s, iov, 1);
  139. }
  140. #ifndef __sun__
  141. ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
  142. {
  143. return read(tapfd, buf, maxlen);
  144. }
  145. #endif
  146. static void tap_send_completed(NetClientState *nc, ssize_t len)
  147. {
  148. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  149. tap_read_poll(s, true);
  150. }
  151. static void tap_send(void *opaque)
  152. {
  153. TAPState *s = opaque;
  154. int size;
  155. int packets = 0;
  156. while (true) {
  157. uint8_t *buf = s->buf;
  158. size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
  159. if (size <= 0) {
  160. break;
  161. }
  162. if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
  163. buf += s->host_vnet_hdr_len;
  164. size -= s->host_vnet_hdr_len;
  165. }
  166. size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
  167. if (size == 0) {
  168. tap_read_poll(s, false);
  169. break;
  170. } else if (size < 0) {
  171. break;
  172. }
  173. /*
  174. * When the host keeps receiving more packets while tap_send() is
  175. * running we can hog the QEMU global mutex. Limit the number of
  176. * packets that are processed per tap_send() callback to prevent
  177. * stalling the guest.
  178. */
  179. packets++;
  180. if (packets >= 50) {
  181. break;
  182. }
  183. }
  184. }
  185. static bool tap_has_ufo(NetClientState *nc)
  186. {
  187. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  188. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  189. return s->has_ufo;
  190. }
  191. static bool tap_has_vnet_hdr(NetClientState *nc)
  192. {
  193. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  194. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  195. return !!s->host_vnet_hdr_len;
  196. }
  197. static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
  198. {
  199. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  200. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  201. return !!tap_probe_vnet_hdr_len(s->fd, len);
  202. }
  203. static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
  204. {
  205. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  206. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  207. assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
  208. len == sizeof(struct virtio_net_hdr) ||
  209. len == sizeof(struct virtio_net_hdr_v1_hash));
  210. tap_fd_set_vnet_hdr_len(s->fd, len);
  211. s->host_vnet_hdr_len = len;
  212. }
  213. static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
  214. {
  215. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  216. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  217. assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
  218. s->using_vnet_hdr = using_vnet_hdr;
  219. }
  220. static int tap_set_vnet_le(NetClientState *nc, bool is_le)
  221. {
  222. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  223. return tap_fd_set_vnet_le(s->fd, is_le);
  224. }
  225. static int tap_set_vnet_be(NetClientState *nc, bool is_be)
  226. {
  227. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  228. return tap_fd_set_vnet_be(s->fd, is_be);
  229. }
  230. static void tap_set_offload(NetClientState *nc, int csum, int tso4,
  231. int tso6, int ecn, int ufo)
  232. {
  233. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  234. if (s->fd < 0) {
  235. return;
  236. }
  237. tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
  238. }
  239. static void tap_exit_notify(Notifier *notifier, void *data)
  240. {
  241. TAPState *s = container_of(notifier, TAPState, exit);
  242. Error *err = NULL;
  243. if (s->down_script[0]) {
  244. launch_script(s->down_script, s->down_script_arg, s->fd, &err);
  245. if (err) {
  246. error_report_err(err);
  247. }
  248. }
  249. }
  250. static void tap_cleanup(NetClientState *nc)
  251. {
  252. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  253. if (s->vhost_net) {
  254. vhost_net_cleanup(s->vhost_net);
  255. g_free(s->vhost_net);
  256. s->vhost_net = NULL;
  257. }
  258. qemu_purge_queued_packets(nc);
  259. tap_exit_notify(&s->exit, NULL);
  260. qemu_remove_exit_notifier(&s->exit);
  261. tap_read_poll(s, false);
  262. tap_write_poll(s, false);
  263. close(s->fd);
  264. s->fd = -1;
  265. }
  266. static void tap_poll(NetClientState *nc, bool enable)
  267. {
  268. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  269. tap_read_poll(s, enable);
  270. tap_write_poll(s, enable);
  271. }
  272. int tap_get_fd(NetClientState *nc)
  273. {
  274. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  275. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  276. return s->fd;
  277. }
  278. /* fd support */
  279. static NetClientInfo net_tap_info = {
  280. .type = NET_CLIENT_DRIVER_TAP,
  281. .size = sizeof(TAPState),
  282. .receive = tap_receive,
  283. .receive_raw = tap_receive_raw,
  284. .receive_iov = tap_receive_iov,
  285. .poll = tap_poll,
  286. .cleanup = tap_cleanup,
  287. .has_ufo = tap_has_ufo,
  288. .has_vnet_hdr = tap_has_vnet_hdr,
  289. .has_vnet_hdr_len = tap_has_vnet_hdr_len,
  290. .using_vnet_hdr = tap_using_vnet_hdr,
  291. .set_offload = tap_set_offload,
  292. .set_vnet_hdr_len = tap_set_vnet_hdr_len,
  293. .set_vnet_le = tap_set_vnet_le,
  294. .set_vnet_be = tap_set_vnet_be,
  295. };
  296. static TAPState *net_tap_fd_init(NetClientState *peer,
  297. const char *model,
  298. const char *name,
  299. int fd,
  300. int vnet_hdr)
  301. {
  302. NetClientState *nc;
  303. TAPState *s;
  304. nc = qemu_new_net_client(&net_tap_info, peer, model, name);
  305. s = DO_UPCAST(TAPState, nc, nc);
  306. s->fd = fd;
  307. s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
  308. s->using_vnet_hdr = false;
  309. s->has_ufo = tap_probe_has_ufo(s->fd);
  310. s->enabled = true;
  311. tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
  312. /*
  313. * Make sure host header length is set correctly in tap:
  314. * it might have been modified by another instance of qemu.
  315. */
  316. if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
  317. tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
  318. }
  319. tap_read_poll(s, true);
  320. s->vhost_net = NULL;
  321. s->exit.notify = tap_exit_notify;
  322. qemu_add_exit_notifier(&s->exit);
  323. return s;
  324. }
  325. static void launch_script(const char *setup_script, const char *ifname,
  326. int fd, Error **errp)
  327. {
  328. int pid, status;
  329. char *args[3];
  330. char **parg;
  331. /* try to launch network script */
  332. pid = fork();
  333. if (pid < 0) {
  334. error_setg_errno(errp, errno, "could not launch network script %s",
  335. setup_script);
  336. return;
  337. }
  338. if (pid == 0) {
  339. int open_max = sysconf(_SC_OPEN_MAX), i;
  340. for (i = 3; i < open_max; i++) {
  341. if (i != fd) {
  342. close(i);
  343. }
  344. }
  345. parg = args;
  346. *parg++ = (char *)setup_script;
  347. *parg++ = (char *)ifname;
  348. *parg = NULL;
  349. execv(setup_script, args);
  350. _exit(1);
  351. } else {
  352. while (waitpid(pid, &status, 0) != pid) {
  353. /* loop */
  354. }
  355. if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
  356. return;
  357. }
  358. error_setg(errp, "network script %s failed with status %d",
  359. setup_script, status);
  360. }
  361. }
  362. static int recv_fd(int c)
  363. {
  364. int fd;
  365. uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
  366. struct msghdr msg = {
  367. .msg_control = msgbuf,
  368. .msg_controllen = sizeof(msgbuf),
  369. };
  370. struct cmsghdr *cmsg;
  371. struct iovec iov;
  372. uint8_t req[1];
  373. ssize_t len;
  374. cmsg = CMSG_FIRSTHDR(&msg);
  375. cmsg->cmsg_level = SOL_SOCKET;
  376. cmsg->cmsg_type = SCM_RIGHTS;
  377. cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
  378. msg.msg_controllen = cmsg->cmsg_len;
  379. iov.iov_base = req;
  380. iov.iov_len = sizeof(req);
  381. msg.msg_iov = &iov;
  382. msg.msg_iovlen = 1;
  383. len = recvmsg(c, &msg, 0);
  384. if (len > 0) {
  385. memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
  386. return fd;
  387. }
  388. return len;
  389. }
  390. static int net_bridge_run_helper(const char *helper, const char *bridge,
  391. Error **errp)
  392. {
  393. sigset_t oldmask, mask;
  394. int pid, status;
  395. char *args[5];
  396. char **parg;
  397. int sv[2];
  398. sigemptyset(&mask);
  399. sigaddset(&mask, SIGCHLD);
  400. sigprocmask(SIG_BLOCK, &mask, &oldmask);
  401. if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
  402. error_setg_errno(errp, errno, "socketpair() failed");
  403. return -1;
  404. }
  405. /* try to launch bridge helper */
  406. pid = fork();
  407. if (pid < 0) {
  408. error_setg_errno(errp, errno, "Can't fork bridge helper");
  409. return -1;
  410. }
  411. if (pid == 0) {
  412. int open_max = sysconf(_SC_OPEN_MAX), i;
  413. char *fd_buf = NULL;
  414. char *br_buf = NULL;
  415. char *helper_cmd = NULL;
  416. for (i = 3; i < open_max; i++) {
  417. if (i != sv[1]) {
  418. close(i);
  419. }
  420. }
  421. fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]);
  422. if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
  423. /* assume helper is a command */
  424. if (strstr(helper, "--br=") == NULL) {
  425. br_buf = g_strdup_printf("%s%s", "--br=", bridge);
  426. }
  427. helper_cmd = g_strdup_printf("%s %s %s %s", helper,
  428. "--use-vnet", fd_buf, br_buf ? br_buf : "");
  429. parg = args;
  430. *parg++ = (char *)"sh";
  431. *parg++ = (char *)"-c";
  432. *parg++ = helper_cmd;
  433. *parg++ = NULL;
  434. execv("/bin/sh", args);
  435. g_free(helper_cmd);
  436. } else {
  437. /* assume helper is just the executable path name */
  438. br_buf = g_strdup_printf("%s%s", "--br=", bridge);
  439. parg = args;
  440. *parg++ = (char *)helper;
  441. *parg++ = (char *)"--use-vnet";
  442. *parg++ = fd_buf;
  443. *parg++ = br_buf;
  444. *parg++ = NULL;
  445. execv(helper, args);
  446. }
  447. g_free(fd_buf);
  448. g_free(br_buf);
  449. _exit(1);
  450. } else {
  451. int fd;
  452. int saved_errno;
  453. close(sv[1]);
  454. do {
  455. fd = recv_fd(sv[0]);
  456. } while (fd == -1 && errno == EINTR);
  457. saved_errno = errno;
  458. close(sv[0]);
  459. while (waitpid(pid, &status, 0) != pid) {
  460. /* loop */
  461. }
  462. sigprocmask(SIG_SETMASK, &oldmask, NULL);
  463. if (fd < 0) {
  464. error_setg_errno(errp, saved_errno,
  465. "failed to recv file descriptor");
  466. return -1;
  467. }
  468. if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
  469. error_setg(errp, "bridge helper failed");
  470. return -1;
  471. }
  472. return fd;
  473. }
  474. }
  475. int net_init_bridge(const Netdev *netdev, const char *name,
  476. NetClientState *peer, Error **errp)
  477. {
  478. const NetdevBridgeOptions *bridge;
  479. const char *helper, *br;
  480. TAPState *s;
  481. int fd, vnet_hdr;
  482. assert(netdev->type == NET_CLIENT_DRIVER_BRIDGE);
  483. bridge = &netdev->u.bridge;
  484. helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
  485. br = bridge->has_br ? bridge->br : DEFAULT_BRIDGE_INTERFACE;
  486. fd = net_bridge_run_helper(helper, br, errp);
  487. if (fd == -1) {
  488. return -1;
  489. }
  490. qemu_set_nonblock(fd);
  491. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  492. if (vnet_hdr < 0) {
  493. close(fd);
  494. return -1;
  495. }
  496. s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
  497. snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
  498. br);
  499. return 0;
  500. }
  501. static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
  502. const char *setup_script, char *ifname,
  503. size_t ifname_sz, int mq_required, Error **errp)
  504. {
  505. Error *err = NULL;
  506. int fd, vnet_hdr_required;
  507. if (tap->has_vnet_hdr) {
  508. *vnet_hdr = tap->vnet_hdr;
  509. vnet_hdr_required = *vnet_hdr;
  510. } else {
  511. *vnet_hdr = 1;
  512. vnet_hdr_required = 0;
  513. }
  514. TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
  515. mq_required, errp));
  516. if (fd < 0) {
  517. return -1;
  518. }
  519. if (setup_script &&
  520. setup_script[0] != '\0' &&
  521. strcmp(setup_script, "no") != 0) {
  522. launch_script(setup_script, ifname, fd, &err);
  523. if (err) {
  524. error_propagate(errp, err);
  525. close(fd);
  526. return -1;
  527. }
  528. }
  529. return fd;
  530. }
  531. #define MAX_TAP_QUEUES 1024
  532. static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
  533. const char *model, const char *name,
  534. const char *ifname, const char *script,
  535. const char *downscript, const char *vhostfdname,
  536. int vnet_hdr, int fd, Error **errp)
  537. {
  538. Error *err = NULL;
  539. TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
  540. int vhostfd;
  541. tap_set_sndbuf(s->fd, tap, &err);
  542. if (err) {
  543. error_propagate(errp, err);
  544. return;
  545. }
  546. if (tap->has_fd || tap->has_fds) {
  547. snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
  548. } else if (tap->has_helper) {
  549. snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
  550. tap->helper);
  551. } else {
  552. snprintf(s->nc.info_str, sizeof(s->nc.info_str),
  553. "ifname=%s,script=%s,downscript=%s", ifname, script,
  554. downscript);
  555. if (strcmp(downscript, "no") != 0) {
  556. snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
  557. snprintf(s->down_script_arg, sizeof(s->down_script_arg),
  558. "%s", ifname);
  559. }
  560. }
  561. if (tap->has_vhost ? tap->vhost :
  562. vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
  563. VhostNetOptions options;
  564. options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
  565. options.net_backend = &s->nc;
  566. if (tap->has_poll_us) {
  567. options.busyloop_timeout = tap->poll_us;
  568. } else {
  569. options.busyloop_timeout = 0;
  570. }
  571. if (vhostfdname) {
  572. int ret;
  573. vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
  574. if (vhostfd == -1) {
  575. if (tap->has_vhostforce && tap->vhostforce) {
  576. error_propagate(errp, err);
  577. } else {
  578. warn_report_err(err);
  579. }
  580. return;
  581. }
  582. ret = qemu_try_set_nonblock(vhostfd);
  583. if (ret < 0) {
  584. error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
  585. name, fd);
  586. return;
  587. }
  588. } else {
  589. vhostfd = open("/dev/vhost-net", O_RDWR);
  590. if (vhostfd < 0) {
  591. if (tap->has_vhostforce && tap->vhostforce) {
  592. error_setg_errno(errp, errno,
  593. "tap: open vhost char device failed");
  594. } else {
  595. warn_report("tap: open vhost char device failed: %s",
  596. strerror(errno));
  597. }
  598. return;
  599. }
  600. qemu_set_nonblock(vhostfd);
  601. }
  602. options.opaque = (void *)(uintptr_t)vhostfd;
  603. s->vhost_net = vhost_net_init(&options);
  604. if (!s->vhost_net) {
  605. if (tap->has_vhostforce && tap->vhostforce) {
  606. error_setg(errp, VHOST_NET_INIT_FAILED);
  607. } else {
  608. warn_report(VHOST_NET_INIT_FAILED);
  609. }
  610. return;
  611. }
  612. } else if (vhostfdname) {
  613. error_setg(errp, "vhostfd(s)= is not valid without vhost");
  614. }
  615. }
  616. static int get_fds(char *str, char *fds[], int max)
  617. {
  618. char *ptr = str, *this;
  619. size_t len = strlen(str);
  620. int i = 0;
  621. while (i < max && ptr < str + len) {
  622. this = strchr(ptr, ':');
  623. if (this == NULL) {
  624. fds[i] = g_strdup(ptr);
  625. } else {
  626. fds[i] = g_strndup(ptr, this - ptr);
  627. }
  628. i++;
  629. if (this == NULL) {
  630. break;
  631. } else {
  632. ptr = this + 1;
  633. }
  634. }
  635. return i;
  636. }
  637. int net_init_tap(const Netdev *netdev, const char *name,
  638. NetClientState *peer, Error **errp)
  639. {
  640. const NetdevTapOptions *tap;
  641. int fd, vnet_hdr = 0, i = 0, queues;
  642. /* for the no-fd, no-helper case */
  643. const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
  644. const char *downscript = NULL;
  645. Error *err = NULL;
  646. const char *vhostfdname;
  647. char ifname[128];
  648. int ret = 0;
  649. assert(netdev->type == NET_CLIENT_DRIVER_TAP);
  650. tap = &netdev->u.tap;
  651. queues = tap->has_queues ? tap->queues : 1;
  652. vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
  653. /* QEMU hubs do not support multiqueue tap, in this case peer is set.
  654. * For -netdev, peer is always NULL. */
  655. if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
  656. error_setg(errp, "Multiqueue tap cannot be used with hubs");
  657. return -1;
  658. }
  659. if (tap->has_fd) {
  660. if (tap->has_ifname || tap->has_script || tap->has_downscript ||
  661. tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
  662. tap->has_fds || tap->has_vhostfds) {
  663. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  664. "helper=, queues=, fds=, and vhostfds= "
  665. "are invalid with fd=");
  666. return -1;
  667. }
  668. fd = monitor_fd_param(cur_mon, tap->fd, errp);
  669. if (fd == -1) {
  670. return -1;
  671. }
  672. ret = qemu_try_set_nonblock(fd);
  673. if (ret < 0) {
  674. error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
  675. name, fd);
  676. return -1;
  677. }
  678. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  679. if (vnet_hdr < 0) {
  680. close(fd);
  681. return -1;
  682. }
  683. net_init_tap_one(tap, peer, "tap", name, NULL,
  684. script, downscript,
  685. vhostfdname, vnet_hdr, fd, &err);
  686. if (err) {
  687. error_propagate(errp, err);
  688. return -1;
  689. }
  690. } else if (tap->has_fds) {
  691. char **fds;
  692. char **vhost_fds;
  693. int nfds = 0, nvhosts = 0;
  694. if (tap->has_ifname || tap->has_script || tap->has_downscript ||
  695. tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
  696. tap->has_vhostfd) {
  697. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  698. "helper=, queues=, and vhostfd= "
  699. "are invalid with fds=");
  700. return -1;
  701. }
  702. fds = g_new0(char *, MAX_TAP_QUEUES);
  703. vhost_fds = g_new0(char *, MAX_TAP_QUEUES);
  704. nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
  705. if (tap->has_vhostfds) {
  706. nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
  707. if (nfds != nvhosts) {
  708. error_setg(errp, "The number of fds passed does not match "
  709. "the number of vhostfds passed");
  710. ret = -1;
  711. goto free_fail;
  712. }
  713. }
  714. for (i = 0; i < nfds; i++) {
  715. fd = monitor_fd_param(cur_mon, fds[i], errp);
  716. if (fd == -1) {
  717. ret = -1;
  718. goto free_fail;
  719. }
  720. ret = qemu_try_set_nonblock(fd);
  721. if (ret < 0) {
  722. error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
  723. name, fd);
  724. goto free_fail;
  725. }
  726. if (i == 0) {
  727. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  728. if (vnet_hdr < 0) {
  729. goto free_fail;
  730. }
  731. } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
  732. error_setg(errp,
  733. "vnet_hdr not consistent across given tap fds");
  734. ret = -1;
  735. goto free_fail;
  736. }
  737. net_init_tap_one(tap, peer, "tap", name, ifname,
  738. script, downscript,
  739. tap->has_vhostfds ? vhost_fds[i] : NULL,
  740. vnet_hdr, fd, &err);
  741. if (err) {
  742. error_propagate(errp, err);
  743. ret = -1;
  744. goto free_fail;
  745. }
  746. }
  747. free_fail:
  748. for (i = 0; i < nvhosts; i++) {
  749. g_free(vhost_fds[i]);
  750. }
  751. for (i = 0; i < nfds; i++) {
  752. g_free(fds[i]);
  753. }
  754. g_free(fds);
  755. g_free(vhost_fds);
  756. return ret;
  757. } else if (tap->has_helper) {
  758. if (tap->has_ifname || tap->has_script || tap->has_downscript ||
  759. tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
  760. error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
  761. "queues=, and vhostfds= are invalid with helper=");
  762. return -1;
  763. }
  764. fd = net_bridge_run_helper(tap->helper,
  765. tap->has_br ?
  766. tap->br : DEFAULT_BRIDGE_INTERFACE,
  767. errp);
  768. if (fd == -1) {
  769. return -1;
  770. }
  771. qemu_set_nonblock(fd);
  772. vnet_hdr = tap_probe_vnet_hdr(fd, errp);
  773. if (vnet_hdr < 0) {
  774. close(fd);
  775. return -1;
  776. }
  777. net_init_tap_one(tap, peer, "bridge", name, ifname,
  778. script, downscript, vhostfdname,
  779. vnet_hdr, fd, &err);
  780. if (err) {
  781. error_propagate(errp, err);
  782. close(fd);
  783. return -1;
  784. }
  785. } else {
  786. if (tap->has_vhostfds) {
  787. error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
  788. return -1;
  789. }
  790. script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
  791. downscript = tap->has_downscript ? tap->downscript :
  792. DEFAULT_NETWORK_DOWN_SCRIPT;
  793. if (tap->has_ifname) {
  794. pstrcpy(ifname, sizeof ifname, tap->ifname);
  795. } else {
  796. ifname[0] = '\0';
  797. }
  798. for (i = 0; i < queues; i++) {
  799. fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
  800. ifname, sizeof ifname, queues > 1, errp);
  801. if (fd == -1) {
  802. return -1;
  803. }
  804. if (queues > 1 && i == 0 && !tap->has_ifname) {
  805. if (tap_fd_get_ifname(fd, ifname)) {
  806. error_setg(errp, "Fail to get ifname");
  807. close(fd);
  808. return -1;
  809. }
  810. }
  811. net_init_tap_one(tap, peer, "tap", name, ifname,
  812. i >= 1 ? "no" : script,
  813. i >= 1 ? "no" : downscript,
  814. vhostfdname, vnet_hdr, fd, &err);
  815. if (err) {
  816. error_propagate(errp, err);
  817. close(fd);
  818. return -1;
  819. }
  820. }
  821. }
  822. return 0;
  823. }
  824. VHostNetState *tap_get_vhost_net(NetClientState *nc)
  825. {
  826. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  827. assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
  828. return s->vhost_net;
  829. }
  830. int tap_enable(NetClientState *nc)
  831. {
  832. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  833. int ret;
  834. if (s->enabled) {
  835. return 0;
  836. } else {
  837. ret = tap_fd_enable(s->fd);
  838. if (ret == 0) {
  839. s->enabled = true;
  840. tap_update_fd_handler(s);
  841. }
  842. return ret;
  843. }
  844. }
  845. int tap_disable(NetClientState *nc)
  846. {
  847. TAPState *s = DO_UPCAST(TAPState, nc, nc);
  848. int ret;
  849. if (s->enabled == 0) {
  850. return 0;
  851. } else {
  852. ret = tap_fd_disable(s->fd);
  853. if (ret == 0) {
  854. qemu_purge_queued_packets(nc);
  855. s->enabled = false;
  856. tap_update_fd_handler(s);
  857. }
  858. return ret;
  859. }
  860. }