eth.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. /*
  2. * QEMU network structures definitions and helper functions
  3. *
  4. * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
  5. *
  6. * Developed by Daynix Computing LTD (http://www.daynix.com)
  7. *
  8. * Authors:
  9. * Dmitry Fleytman <dmitry@daynix.com>
  10. * Tamir Shomer <tamirs@daynix.com>
  11. * Yan Vugenfirer <yan@daynix.com>
  12. *
  13. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14. * See the COPYING file in the top-level directory.
  15. *
  16. */
  17. #include "qemu/osdep.h"
  18. #include "qemu/log.h"
  19. #include "net/eth.h"
  20. #include "net/checksum.h"
  21. #include "net/tap.h"
  22. void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
  23. uint16_t vlan_tag, uint16_t vlan_ethtype)
  24. {
  25. struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  26. memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);
  27. vhdr->h_tci = cpu_to_be16(vlan_tag);
  28. vhdr->h_proto = ehdr->h_proto;
  29. ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  30. *ehdr_size += sizeof(*vhdr);
  31. }
  32. uint8_t
  33. eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  34. {
  35. uint8_t ecn_state = 0;
  36. if (l3_proto == ETH_P_IP) {
  37. struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  38. if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  39. if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  40. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  41. }
  42. if (l4proto == IP_PROTO_TCP) {
  43. return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  44. } else if (l4proto == IP_PROTO_UDP) {
  45. return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  46. }
  47. }
  48. } else if (l3_proto == ETH_P_IPV6) {
  49. struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  50. if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  51. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  52. }
  53. if (l4proto == IP_PROTO_TCP) {
  54. return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  55. }
  56. }
  57. qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
  58. "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
  59. return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  60. }
  61. uint16_t
  62. eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  63. {
  64. uint16_t proto;
  65. size_t copied;
  66. size_t size = iov_size(l2hdr_iov, iovcnt);
  67. size_t proto_offset = l2hdr_len - sizeof(proto);
  68. if (size < proto_offset) {
  69. return ETH_P_UNKNOWN;
  70. }
  71. copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  72. &proto, sizeof(proto));
  73. return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  74. }
  75. static bool
  76. _eth_copy_chunk(size_t input_size,
  77. const struct iovec *iov, int iovcnt,
  78. size_t offset, size_t length,
  79. void *buffer)
  80. {
  81. size_t copied;
  82. if (input_size < offset) {
  83. return false;
  84. }
  85. copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
  86. if (copied < length) {
  87. return false;
  88. }
  89. return true;
  90. }
  91. static bool
  92. _eth_tcp_has_data(bool is_ip4,
  93. const struct ip_header *ip4_hdr,
  94. const struct ip6_header *ip6_hdr,
  95. size_t full_ip6hdr_len,
  96. const struct tcp_header *tcp)
  97. {
  98. uint32_t l4len;
  99. if (is_ip4) {
  100. l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
  101. } else {
  102. size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
  103. l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
  104. }
  105. return l4len > TCP_HEADER_DATA_OFFSET(tcp);
  106. }
  107. void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
  108. bool *hasip4, bool *hasip6,
  109. size_t *l3hdr_off,
  110. size_t *l4hdr_off,
  111. size_t *l5hdr_off,
  112. eth_ip6_hdr_info *ip6hdr_info,
  113. eth_ip4_hdr_info *ip4hdr_info,
  114. eth_l4_hdr_info *l4hdr_info)
  115. {
  116. int proto;
  117. bool fragment = false;
  118. size_t input_size = iov_size(iov, iovcnt);
  119. size_t copied;
  120. uint8_t ip_p;
  121. *hasip4 = *hasip6 = false;
  122. *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
  123. l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
  124. proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
  125. if (proto == ETH_P_IP) {
  126. struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
  127. if (input_size < *l3hdr_off) {
  128. return;
  129. }
  130. copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
  131. if (copied < sizeof(*iphdr) ||
  132. IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
  133. return;
  134. }
  135. *hasip4 = true;
  136. ip_p = iphdr->ip_p;
  137. ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
  138. *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
  139. fragment = ip4hdr_info->fragment;
  140. } else if (proto == ETH_P_IPV6) {
  141. if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
  142. return;
  143. }
  144. *hasip6 = true;
  145. ip_p = ip6hdr_info->l4proto;
  146. *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
  147. fragment = ip6hdr_info->fragment;
  148. } else {
  149. return;
  150. }
  151. if (fragment) {
  152. return;
  153. }
  154. switch (ip_p) {
  155. case IP_PROTO_TCP:
  156. if (_eth_copy_chunk(input_size,
  157. iov, iovcnt,
  158. *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
  159. &l4hdr_info->hdr.tcp)) {
  160. l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
  161. *l5hdr_off = *l4hdr_off +
  162. TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
  163. l4hdr_info->has_tcp_data =
  164. _eth_tcp_has_data(proto == ETH_P_IP,
  165. &ip4hdr_info->ip4_hdr,
  166. &ip6hdr_info->ip6_hdr,
  167. *l4hdr_off - *l3hdr_off,
  168. &l4hdr_info->hdr.tcp);
  169. }
  170. break;
  171. case IP_PROTO_UDP:
  172. if (_eth_copy_chunk(input_size,
  173. iov, iovcnt,
  174. *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
  175. &l4hdr_info->hdr.udp)) {
  176. l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
  177. *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
  178. }
  179. break;
  180. case IP_PROTO_SCTP:
  181. l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP;
  182. break;
  183. }
  184. }
  185. size_t
  186. eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
  187. void *new_ehdr_buf,
  188. uint16_t *payload_offset, uint16_t *tci)
  189. {
  190. struct vlan_header vlan_hdr;
  191. struct eth_header *new_ehdr = new_ehdr_buf;
  192. size_t copied = iov_to_buf(iov, iovcnt, iovoff,
  193. new_ehdr, sizeof(*new_ehdr));
  194. if (copied < sizeof(*new_ehdr)) {
  195. return 0;
  196. }
  197. switch (be16_to_cpu(new_ehdr->h_proto)) {
  198. case ETH_P_VLAN:
  199. case ETH_P_DVLAN:
  200. copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
  201. &vlan_hdr, sizeof(vlan_hdr));
  202. if (copied < sizeof(vlan_hdr)) {
  203. return 0;
  204. }
  205. new_ehdr->h_proto = vlan_hdr.h_proto;
  206. *tci = be16_to_cpu(vlan_hdr.h_tci);
  207. *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
  208. if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
  209. copied = iov_to_buf(iov, iovcnt, *payload_offset,
  210. PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
  211. if (copied < sizeof(vlan_hdr)) {
  212. return 0;
  213. }
  214. *payload_offset += sizeof(vlan_hdr);
  215. return sizeof(struct eth_header) + sizeof(struct vlan_header);
  216. } else {
  217. return sizeof(struct eth_header);
  218. }
  219. default:
  220. return 0;
  221. }
  222. }
  223. size_t
  224. eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
  225. uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
  226. uint16_t *payload_offset, uint16_t *tci)
  227. {
  228. struct vlan_header vlan_hdr;
  229. uint16_t *new_ehdr_proto;
  230. size_t new_ehdr_size;
  231. size_t copied;
  232. switch (index) {
  233. case 0:
  234. new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto;
  235. new_ehdr_size = sizeof(struct eth_header);
  236. copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
  237. break;
  238. case 1:
  239. new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto;
  240. new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
  241. copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
  242. if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) {
  243. return 0;
  244. }
  245. break;
  246. default:
  247. return 0;
  248. }
  249. if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) {
  250. return 0;
  251. }
  252. copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size,
  253. &vlan_hdr, sizeof(vlan_hdr));
  254. if (copied < sizeof(vlan_hdr)) {
  255. return 0;
  256. }
  257. *new_ehdr_proto = vlan_hdr.h_proto;
  258. *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr);
  259. *tci = be16_to_cpu(vlan_hdr.h_tci);
  260. return new_ehdr_size;
  261. }
  262. void
  263. eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
  264. {
  265. struct ip_header *iphdr = (struct ip_header *) l3hdr;
  266. iphdr->ip_sum = 0;
  267. iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
  268. }
  269. uint32_t
  270. eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
  271. uint16_t csl,
  272. uint32_t *cso)
  273. {
  274. struct ip_pseudo_header ipph;
  275. ipph.ip_src = iphdr->ip_src;
  276. ipph.ip_dst = iphdr->ip_dst;
  277. ipph.ip_payload = cpu_to_be16(csl);
  278. ipph.ip_proto = iphdr->ip_p;
  279. ipph.zeros = 0;
  280. *cso = sizeof(ipph);
  281. return net_checksum_add(*cso, (uint8_t *) &ipph);
  282. }
  283. uint32_t
  284. eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
  285. uint16_t csl,
  286. uint8_t l4_proto,
  287. uint32_t *cso)
  288. {
  289. struct ip6_pseudo_header ipph;
  290. ipph.ip6_src = iphdr->ip6_src;
  291. ipph.ip6_dst = iphdr->ip6_dst;
  292. ipph.len = cpu_to_be16(csl);
  293. ipph.zero[0] = 0;
  294. ipph.zero[1] = 0;
  295. ipph.zero[2] = 0;
  296. ipph.next_hdr = l4_proto;
  297. *cso = sizeof(ipph);
  298. return net_checksum_add(*cso, (uint8_t *)&ipph);
  299. }
  300. static bool
  301. eth_is_ip6_extension_header_type(uint8_t hdr_type)
  302. {
  303. switch (hdr_type) {
  304. case IP6_HOP_BY_HOP:
  305. case IP6_ROUTING:
  306. case IP6_FRAGMENT:
  307. case IP6_AUTHENTICATION:
  308. case IP6_DESTINATON:
  309. case IP6_MOBILITY:
  310. return true;
  311. default:
  312. return false;
  313. }
  314. }
  315. static bool
  316. _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
  317. size_t ext_hdr_offset,
  318. struct ip6_ext_hdr *ext_hdr,
  319. struct in6_address *dst_addr)
  320. {
  321. struct ip6_ext_hdr_routing rt_hdr;
  322. size_t input_size = iov_size(pkt, pkt_frags);
  323. size_t bytes_read;
  324. if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
  325. return false;
  326. }
  327. bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
  328. &rt_hdr, sizeof(rt_hdr));
  329. assert(bytes_read == sizeof(rt_hdr));
  330. if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
  331. return false;
  332. }
  333. bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
  334. dst_addr, sizeof(*dst_addr));
  335. assert(bytes_read == sizeof(*dst_addr));
  336. return true;
  337. }
  338. static bool
  339. _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
  340. size_t dsthdr_offset,
  341. struct ip6_ext_hdr *ext_hdr,
  342. struct in6_address *src_addr)
  343. {
  344. size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
  345. struct ip6_option_hdr opthdr;
  346. size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
  347. while (bytes_left > sizeof(opthdr)) {
  348. size_t input_size = iov_size(pkt, pkt_frags);
  349. size_t bytes_read, optlen;
  350. if (input_size < opt_offset) {
  351. return false;
  352. }
  353. bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
  354. &opthdr, sizeof(opthdr));
  355. if (bytes_read != sizeof(opthdr)) {
  356. return false;
  357. }
  358. optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
  359. : (opthdr.len + sizeof(opthdr));
  360. if (optlen > bytes_left) {
  361. return false;
  362. }
  363. if (opthdr.type == IP6_OPT_HOME) {
  364. if (input_size < opt_offset + sizeof(opthdr)) {
  365. return false;
  366. }
  367. bytes_read = iov_to_buf(pkt, pkt_frags,
  368. opt_offset + sizeof(opthdr),
  369. src_addr, sizeof(*src_addr));
  370. return bytes_read == sizeof(*src_addr);
  371. }
  372. opt_offset += optlen;
  373. bytes_left -= optlen;
  374. }
  375. return false;
  376. }
  377. bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
  378. size_t ip6hdr_off, eth_ip6_hdr_info *info)
  379. {
  380. struct ip6_ext_hdr ext_hdr;
  381. size_t bytes_read;
  382. uint8_t curr_ext_hdr_type;
  383. size_t input_size = iov_size(pkt, pkt_frags);
  384. info->rss_ex_dst_valid = false;
  385. info->rss_ex_src_valid = false;
  386. info->fragment = false;
  387. if (input_size < ip6hdr_off) {
  388. return false;
  389. }
  390. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
  391. &info->ip6_hdr, sizeof(info->ip6_hdr));
  392. if (bytes_read < sizeof(info->ip6_hdr)) {
  393. return false;
  394. }
  395. info->full_hdr_len = sizeof(struct ip6_header);
  396. curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
  397. if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
  398. info->l4proto = info->ip6_hdr.ip6_nxt;
  399. info->has_ext_hdrs = false;
  400. return true;
  401. }
  402. info->has_ext_hdrs = true;
  403. do {
  404. if (input_size < ip6hdr_off + info->full_hdr_len) {
  405. return false;
  406. }
  407. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
  408. &ext_hdr, sizeof(ext_hdr));
  409. if (bytes_read < sizeof(ext_hdr)) {
  410. return false;
  411. }
  412. if (curr_ext_hdr_type == IP6_ROUTING) {
  413. if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
  414. info->rss_ex_dst_valid =
  415. _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
  416. ip6hdr_off + info->full_hdr_len,
  417. &ext_hdr, &info->rss_ex_dst);
  418. }
  419. } else if (curr_ext_hdr_type == IP6_DESTINATON) {
  420. info->rss_ex_src_valid =
  421. _eth_get_rss_ex_src_addr(pkt, pkt_frags,
  422. ip6hdr_off + info->full_hdr_len,
  423. &ext_hdr, &info->rss_ex_src);
  424. } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
  425. info->fragment = true;
  426. }
  427. info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
  428. curr_ext_hdr_type = ext_hdr.ip6r_nxt;
  429. } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
  430. info->l4proto = ext_hdr.ip6r_nxt;
  431. return true;
  432. }
  433. bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
  434. const void *pkt, size_t pkt_size)
  435. {
  436. assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
  437. if (pkt_size >= ETH_ZLEN) {
  438. return false;
  439. }
  440. /* pad to minimum Ethernet frame length */
  441. memcpy(padded_pkt, pkt, pkt_size);
  442. memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
  443. *padded_buflen = ETH_ZLEN;
  444. return true;
  445. }