123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731 |
- /*
- * QEMU System Emulator
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- * Copyright (c) 2012-2014 Cisco Systems
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
- #include "qemu/osdep.h"
- #include <linux/ip.h>
- #include <netdb.h>
- #include "net/net.h"
- #include "clients.h"
- #include "qapi/error.h"
- #include "qemu/error-report.h"
- #include "qemu/option.h"
- #include "qemu/sockets.h"
- #include "qemu/iov.h"
- #include "qemu/main-loop.h"
- #include "qemu/memalign.h"
- /* The buffer size needs to be investigated for optimum numbers and
- * optimum means of paging in on different systems. This size is
- * chosen to be sufficient to accommodate one packet with some headers
- */
- #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
- #define BUFFER_SIZE 16384
- #define IOVSIZE 2
- #define MAX_L2TPV3_MSGCNT 64
- #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
- /* Header set to 0x30000 signifies a data packet */
- #define L2TPV3_DATA_PACKET 0x30000
- /* IANA-assigned IP protocol ID for L2TPv3 */
- #ifndef IPPROTO_L2TP
- #define IPPROTO_L2TP 0x73
- #endif
- typedef struct NetL2TPV3State {
- NetClientState nc;
- int fd;
- /*
- * these are used for xmit - that happens packet a time
- * and for first sign of life packet (easier to parse that once)
- */
- uint8_t *header_buf;
- struct iovec *vec;
- /*
- * these are used for receive - try to "eat" up to 32 packets at a time
- */
- struct mmsghdr *msgvec;
- /*
- * peer address
- */
- struct sockaddr_storage *dgram_dst;
- uint32_t dst_size;
- /*
- * L2TPv3 parameters
- */
- uint64_t rx_cookie;
- uint64_t tx_cookie;
- uint32_t rx_session;
- uint32_t tx_session;
- uint32_t header_size;
- uint32_t counter;
- /*
- * DOS avoidance in error handling
- */
- bool header_mismatch;
- /*
- * Ring buffer handling
- */
- int queue_head;
- int queue_tail;
- int queue_depth;
- /*
- * Precomputed offsets
- */
- uint32_t offset;
- uint32_t cookie_offset;
- uint32_t counter_offset;
- uint32_t session_offset;
- /* Poll Control */
- bool read_poll;
- bool write_poll;
- /* Flags */
- bool ipv6;
- bool udp;
- bool has_counter;
- bool pin_counter;
- bool cookie;
- bool cookie_is_64;
- } NetL2TPV3State;
- static void net_l2tpv3_send(void *opaque);
- static void l2tpv3_writable(void *opaque);
- static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
- {
- qemu_set_fd_handler(s->fd,
- s->read_poll ? net_l2tpv3_send : NULL,
- s->write_poll ? l2tpv3_writable : NULL,
- s);
- }
- static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
- {
- if (s->read_poll != enable) {
- s->read_poll = enable;
- l2tpv3_update_fd_handler(s);
- }
- }
- static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
- {
- if (s->write_poll != enable) {
- s->write_poll = enable;
- l2tpv3_update_fd_handler(s);
- }
- }
- static void l2tpv3_writable(void *opaque)
- {
- NetL2TPV3State *s = opaque;
- l2tpv3_write_poll(s, false);
- qemu_flush_queued_packets(&s->nc);
- }
- static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
- {
- NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
- l2tpv3_read_poll(s, true);
- }
- static void l2tpv3_poll(NetClientState *nc, bool enable)
- {
- NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
- l2tpv3_write_poll(s, enable);
- l2tpv3_read_poll(s, enable);
- }
- static void l2tpv3_form_header(NetL2TPV3State *s)
- {
- uint32_t *counter;
- if (s->udp) {
- stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
- }
- stl_be_p(
- (uint32_t *) (s->header_buf + s->session_offset),
- s->tx_session
- );
- if (s->cookie) {
- if (s->cookie_is_64) {
- stq_be_p(
- (uint64_t *)(s->header_buf + s->cookie_offset),
- s->tx_cookie
- );
- } else {
- stl_be_p(
- (uint32_t *) (s->header_buf + s->cookie_offset),
- s->tx_cookie
- );
- }
- }
- if (s->has_counter) {
- counter = (uint32_t *)(s->header_buf + s->counter_offset);
- if (s->pin_counter) {
- *counter = 0;
- } else {
- stl_be_p(counter, ++s->counter);
- }
- }
- }
- static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
- const struct iovec *iov,
- int iovcnt)
- {
- NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
- struct msghdr message;
- int ret;
- if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
- error_report(
- "iovec too long %d > %d, change l2tpv3.h",
- iovcnt, MAX_L2TPV3_IOVCNT
- );
- return -1;
- }
- l2tpv3_form_header(s);
- memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
- s->vec->iov_base = s->header_buf;
- s->vec->iov_len = s->offset;
- message.msg_name = s->dgram_dst;
- message.msg_namelen = s->dst_size;
- message.msg_iov = s->vec;
- message.msg_iovlen = iovcnt + 1;
- message.msg_control = NULL;
- message.msg_controllen = 0;
- message.msg_flags = 0;
- ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0));
- if (ret > 0) {
- ret -= s->offset;
- } else if (ret == 0) {
- /* belt and braces - should not occur on DGRAM
- * we should get an error and never a 0 send
- */
- ret = iov_size(iov, iovcnt);
- } else {
- /* signal upper layer that socket buffer is full */
- ret = -errno;
- if (ret == -EAGAIN || ret == -ENOBUFS) {
- l2tpv3_write_poll(s, true);
- ret = 0;
- }
- }
- return ret;
- }
- static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
- const uint8_t *buf,
- size_t size)
- {
- NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
- struct iovec *vec;
- struct msghdr message;
- ssize_t ret = 0;
- l2tpv3_form_header(s);
- vec = s->vec;
- vec->iov_base = s->header_buf;
- vec->iov_len = s->offset;
- vec++;
- vec->iov_base = (void *) buf;
- vec->iov_len = size;
- message.msg_name = s->dgram_dst;
- message.msg_namelen = s->dst_size;
- message.msg_iov = s->vec;
- message.msg_iovlen = 2;
- message.msg_control = NULL;
- message.msg_controllen = 0;
- message.msg_flags = 0;
- ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0));
- if (ret > 0) {
- ret -= s->offset;
- } else if (ret == 0) {
- /* belt and braces - should not occur on DGRAM
- * we should get an error and never a 0 send
- */
- ret = size;
- } else {
- ret = -errno;
- if (ret == -EAGAIN || ret == -ENOBUFS) {
- /* signal upper layer that socket buffer is full */
- l2tpv3_write_poll(s, true);
- ret = 0;
- }
- }
- return ret;
- }
- static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
- {
- uint32_t *session;
- uint64_t cookie;
- if ((!s->udp) && (!s->ipv6)) {
- buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
- }
- /* we do not do a strict check for "data" packets as per
- * the RFC spec because the pure IP spec does not have
- * that anyway.
- */
- if (s->cookie) {
- if (s->cookie_is_64) {
- cookie = ldq_be_p(buf + s->cookie_offset);
- } else {
- cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
- }
- if (cookie != s->rx_cookie) {
- if (!s->header_mismatch) {
- error_report("unknown cookie id");
- }
- return -1;
- }
- }
- session = (uint32_t *) (buf + s->session_offset);
- if (ldl_be_p(session) != s->rx_session) {
- if (!s->header_mismatch) {
- error_report("session mismatch");
- }
- return -1;
- }
- return 0;
- }
- static void net_l2tpv3_process_queue(NetL2TPV3State *s)
- {
- int size = 0;
- struct iovec *vec;
- bool bad_read;
- int data_size;
- struct mmsghdr *msgvec;
- /* go into ring mode only if there is a "pending" tail */
- if (s->queue_depth > 0) {
- do {
- msgvec = s->msgvec + s->queue_tail;
- if (msgvec->msg_len > 0) {
- data_size = msgvec->msg_len - s->header_size;
- vec = msgvec->msg_hdr.msg_iov;
- if ((data_size > 0) &&
- (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
- vec++;
- /* Use the legacy delivery for now, we will
- * switch to using our own ring as a queueing mechanism
- * at a later date
- */
- size = qemu_send_packet_async(
- &s->nc,
- vec->iov_base,
- data_size,
- l2tpv3_send_completed
- );
- if (size == 0) {
- l2tpv3_read_poll(s, false);
- }
- bad_read = false;
- } else {
- bad_read = true;
- if (!s->header_mismatch) {
- /* report error only once */
- error_report("l2tpv3 header verification failed");
- s->header_mismatch = true;
- }
- }
- } else {
- bad_read = true;
- }
- s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
- s->queue_depth--;
- } while (
- (s->queue_depth > 0) &&
- qemu_can_send_packet(&s->nc) &&
- ((size > 0) || bad_read)
- );
- }
- }
- static void net_l2tpv3_send(void *opaque)
- {
- NetL2TPV3State *s = opaque;
- int target_count, count;
- struct mmsghdr *msgvec;
- /* go into ring mode only if there is a "pending" tail */
- if (s->queue_depth) {
- /* The ring buffer we use has variable intake
- * count of how much we can read varies - adjust accordingly
- */
- target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
- /* Ensure we do not overrun the ring when we have
- * a lot of enqueued packets
- */
- if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
- target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
- }
- } else {
- /* we do not have any pending packets - we can use
- * the whole message vector linearly instead of using
- * it as a ring
- */
- s->queue_head = 0;
- s->queue_tail = 0;
- target_count = MAX_L2TPV3_MSGCNT;
- }
- msgvec = s->msgvec + s->queue_head;
- if (target_count > 0) {
- count = RETRY_ON_EINTR(
- recvmmsg(s->fd, msgvec, target_count, MSG_DONTWAIT, NULL)
- );
- if (count < 0) {
- /* Recv error - we still need to flush packets here,
- * (re)set queue head to current position
- */
- count = 0;
- }
- s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
- s->queue_depth += count;
- }
- net_l2tpv3_process_queue(s);
- }
- static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
- {
- int i, j;
- struct iovec *iov;
- struct mmsghdr *cleanup = msgvec;
- if (cleanup) {
- for (i = 0; i < count; i++) {
- if (cleanup->msg_hdr.msg_iov) {
- iov = cleanup->msg_hdr.msg_iov;
- for (j = 0; j < iovcount; j++) {
- g_free(iov->iov_base);
- iov++;
- }
- g_free(cleanup->msg_hdr.msg_iov);
- }
- cleanup++;
- }
- g_free(msgvec);
- }
- }
- static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
- {
- int i;
- struct iovec *iov;
- struct mmsghdr *msgvec, *result;
- msgvec = g_new(struct mmsghdr, count);
- result = msgvec;
- for (i = 0; i < count ; i++) {
- msgvec->msg_hdr.msg_name = NULL;
- msgvec->msg_hdr.msg_namelen = 0;
- iov = g_new(struct iovec, IOVSIZE);
- msgvec->msg_hdr.msg_iov = iov;
- iov->iov_base = g_malloc(s->header_size);
- iov->iov_len = s->header_size;
- iov++ ;
- iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
- iov->iov_len = BUFFER_SIZE;
- msgvec->msg_hdr.msg_iovlen = 2;
- msgvec->msg_hdr.msg_control = NULL;
- msgvec->msg_hdr.msg_controllen = 0;
- msgvec->msg_hdr.msg_flags = 0;
- msgvec++;
- }
- return result;
- }
- static void net_l2tpv3_cleanup(NetClientState *nc)
- {
- NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
- qemu_purge_queued_packets(nc);
- l2tpv3_read_poll(s, false);
- l2tpv3_write_poll(s, false);
- if (s->fd >= 0) {
- close(s->fd);
- }
- destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
- g_free(s->vec);
- g_free(s->header_buf);
- g_free(s->dgram_dst);
- }
- static NetClientInfo net_l2tpv3_info = {
- .type = NET_CLIENT_DRIVER_L2TPV3,
- .size = sizeof(NetL2TPV3State),
- .receive = net_l2tpv3_receive_dgram,
- .receive_iov = net_l2tpv3_receive_dgram_iov,
- .poll = l2tpv3_poll,
- .cleanup = net_l2tpv3_cleanup,
- };
- int net_init_l2tpv3(const Netdev *netdev,
- const char *name,
- NetClientState *peer, Error **errp)
- {
- const NetdevL2TPv3Options *l2tpv3;
- NetL2TPV3State *s;
- NetClientState *nc;
- int fd = -1, gairet;
- struct addrinfo hints;
- struct addrinfo *result = NULL;
- char *srcport, *dstport;
- nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
- s = DO_UPCAST(NetL2TPV3State, nc, nc);
- s->queue_head = 0;
- s->queue_tail = 0;
- s->header_mismatch = false;
- assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3);
- l2tpv3 = &netdev->u.l2tpv3;
- if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
- s->ipv6 = l2tpv3->ipv6;
- } else {
- s->ipv6 = false;
- }
- if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
- error_setg(errp, "offset must be less than 256 bytes");
- goto outerr;
- }
- if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
- if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
- s->cookie = true;
- } else {
- error_setg(errp,
- "require both 'rxcookie' and 'txcookie' or neither");
- goto outerr;
- }
- } else {
- s->cookie = false;
- }
- if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
- s->cookie_is_64 = true;
- } else {
- s->cookie_is_64 = false;
- }
- if (l2tpv3->has_udp && l2tpv3->udp) {
- s->udp = true;
- if (!(l2tpv3->srcport && l2tpv3->dstport)) {
- error_setg(errp, "need both src and dst port for udp");
- goto outerr;
- } else {
- srcport = l2tpv3->srcport;
- dstport = l2tpv3->dstport;
- }
- } else {
- s->udp = false;
- srcport = NULL;
- dstport = NULL;
- }
- s->offset = 4;
- s->session_offset = 0;
- s->cookie_offset = 4;
- s->counter_offset = 4;
- s->tx_session = l2tpv3->txsession;
- if (l2tpv3->has_rxsession) {
- s->rx_session = l2tpv3->rxsession;
- } else {
- s->rx_session = s->tx_session;
- }
- if (s->cookie) {
- s->rx_cookie = l2tpv3->rxcookie;
- s->tx_cookie = l2tpv3->txcookie;
- if (s->cookie_is_64 == true) {
- /* 64 bit cookie */
- s->offset += 8;
- s->counter_offset += 8;
- } else {
- /* 32 bit cookie */
- s->offset += 4;
- s->counter_offset += 4;
- }
- }
- memset(&hints, 0, sizeof(hints));
- if (s->ipv6) {
- hints.ai_family = AF_INET6;
- } else {
- hints.ai_family = AF_INET;
- }
- if (s->udp) {
- hints.ai_socktype = SOCK_DGRAM;
- hints.ai_protocol = 0;
- s->offset += 4;
- s->counter_offset += 4;
- s->session_offset += 4;
- s->cookie_offset += 4;
- } else {
- hints.ai_socktype = SOCK_RAW;
- hints.ai_protocol = IPPROTO_L2TP;
- }
- gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
- if ((gairet != 0) || (result == NULL)) {
- error_setg(errp, "could not resolve src, errno = %s",
- gai_strerror(gairet));
- goto outerr;
- }
- fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
- if (fd == -1) {
- fd = -errno;
- error_setg(errp, "socket creation failed, errno = %d",
- -fd);
- goto outerr;
- }
- if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
- error_setg(errp, "could not bind socket err=%i", errno);
- goto outerr;
- }
- freeaddrinfo(result);
- memset(&hints, 0, sizeof(hints));
- if (s->ipv6) {
- hints.ai_family = AF_INET6;
- } else {
- hints.ai_family = AF_INET;
- }
- if (s->udp) {
- hints.ai_socktype = SOCK_DGRAM;
- hints.ai_protocol = 0;
- } else {
- hints.ai_socktype = SOCK_RAW;
- hints.ai_protocol = IPPROTO_L2TP;
- }
- result = NULL;
- gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
- if ((gairet != 0) || (result == NULL)) {
- error_setg(errp, "could not resolve dst, error = %s",
- gai_strerror(gairet));
- goto outerr;
- }
- s->dgram_dst = g_new0(struct sockaddr_storage, 1);
- memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
- s->dst_size = result->ai_addrlen;
- freeaddrinfo(result);
- if (l2tpv3->has_counter && l2tpv3->counter) {
- s->has_counter = true;
- s->offset += 4;
- } else {
- s->has_counter = false;
- }
- if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
- s->has_counter = true; /* pin counter implies that there is counter */
- s->pin_counter = true;
- } else {
- s->pin_counter = false;
- }
- if (l2tpv3->has_offset) {
- /* extra offset */
- s->offset += l2tpv3->offset;
- }
- if ((s->ipv6) || (s->udp)) {
- s->header_size = s->offset;
- } else {
- s->header_size = s->offset + sizeof(struct iphdr);
- }
- s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
- s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
- s->header_buf = g_malloc(s->header_size);
- qemu_socket_set_nonblock(fd);
- s->fd = fd;
- s->counter = 0;
- l2tpv3_read_poll(s, true);
- qemu_set_info_str(&s->nc, "l2tpv3: connected");
- return 0;
- outerr:
- qemu_del_net_client(nc);
- if (fd >= 0) {
- close(fd);
- }
- if (result) {
- freeaddrinfo(result);
- }
- return -1;
- }
|