eth.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. /*
  2. * QEMU network structures definitions and helper functions
  3. *
  4. * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
  5. *
  6. * Developed by Daynix Computing LTD (http://www.daynix.com)
  7. *
  8. * Authors:
  9. * Dmitry Fleytman <dmitry@daynix.com>
  10. * Tamir Shomer <tamirs@daynix.com>
  11. * Yan Vugenfirer <yan@daynix.com>
  12. *
  13. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14. * See the COPYING file in the top-level directory.
  15. *
  16. */
  17. #include "qemu/osdep.h"
  18. #include "qemu/log.h"
  19. #include "net/eth.h"
  20. #include "net/checksum.h"
  21. #include "net/tap.h"
  22. void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  23. uint16_t vlan_ethtype, bool *is_new)
  24. {
  25. struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  26. switch (be16_to_cpu(ehdr->h_proto)) {
  27. case ETH_P_VLAN:
  28. case ETH_P_DVLAN:
  29. /* vlan hdr exists */
  30. *is_new = false;
  31. break;
  32. default:
  33. /* No VLAN header, put a new one */
  34. vhdr->h_proto = ehdr->h_proto;
  35. ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  36. *is_new = true;
  37. break;
  38. }
  39. vhdr->h_tci = cpu_to_be16(vlan_tag);
  40. }
  41. uint8_t
  42. eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  43. {
  44. uint8_t ecn_state = 0;
  45. if (l3_proto == ETH_P_IP) {
  46. struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  47. if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  48. if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  49. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  50. }
  51. if (l4proto == IP_PROTO_TCP) {
  52. return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  53. } else if (l4proto == IP_PROTO_UDP) {
  54. return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  55. }
  56. }
  57. } else if (l3_proto == ETH_P_IPV6) {
  58. struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  59. if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  60. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  61. }
  62. if (l4proto == IP_PROTO_TCP) {
  63. return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  64. }
  65. }
  66. qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, "
  67. "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto);
  68. return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  69. }
  70. uint16_t
  71. eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  72. {
  73. uint16_t proto;
  74. size_t copied;
  75. size_t size = iov_size(l2hdr_iov, iovcnt);
  76. size_t proto_offset = l2hdr_len - sizeof(proto);
  77. if (size < proto_offset) {
  78. return ETH_P_UNKNOWN;
  79. }
  80. copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  81. &proto, sizeof(proto));
  82. return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  83. }
  84. static bool
  85. _eth_copy_chunk(size_t input_size,
  86. const struct iovec *iov, int iovcnt,
  87. size_t offset, size_t length,
  88. void *buffer)
  89. {
  90. size_t copied;
  91. if (input_size < offset) {
  92. return false;
  93. }
  94. copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
  95. if (copied < length) {
  96. return false;
  97. }
  98. return true;
  99. }
  100. static bool
  101. _eth_tcp_has_data(bool is_ip4,
  102. const struct ip_header *ip4_hdr,
  103. const struct ip6_header *ip6_hdr,
  104. size_t full_ip6hdr_len,
  105. const struct tcp_header *tcp)
  106. {
  107. uint32_t l4len;
  108. if (is_ip4) {
  109. l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
  110. } else {
  111. size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
  112. l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
  113. }
  114. return l4len > TCP_HEADER_DATA_OFFSET(tcp);
  115. }
  116. void eth_get_protocols(const struct iovec *iov, int iovcnt,
  117. bool *hasip4, bool *hasip6,
  118. size_t *l3hdr_off,
  119. size_t *l4hdr_off,
  120. size_t *l5hdr_off,
  121. eth_ip6_hdr_info *ip6hdr_info,
  122. eth_ip4_hdr_info *ip4hdr_info,
  123. eth_l4_hdr_info *l4hdr_info)
  124. {
  125. int proto;
  126. bool fragment = false;
  127. size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
  128. size_t input_size = iov_size(iov, iovcnt);
  129. size_t copied;
  130. uint8_t ip_p;
  131. *hasip4 = *hasip6 = false;
  132. l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
  133. proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
  134. *l3hdr_off = l2hdr_len;
  135. if (proto == ETH_P_IP) {
  136. struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
  137. if (input_size < l2hdr_len) {
  138. return;
  139. }
  140. copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
  141. if (copied < sizeof(*iphdr) ||
  142. IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
  143. return;
  144. }
  145. *hasip4 = true;
  146. ip_p = iphdr->ip_p;
  147. ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
  148. *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
  149. fragment = ip4hdr_info->fragment;
  150. } else if (proto == ETH_P_IPV6) {
  151. if (!eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, ip6hdr_info)) {
  152. return;
  153. }
  154. *hasip6 = true;
  155. ip_p = ip6hdr_info->l4proto;
  156. *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
  157. fragment = ip6hdr_info->fragment;
  158. } else {
  159. return;
  160. }
  161. if (fragment) {
  162. return;
  163. }
  164. switch (ip_p) {
  165. case IP_PROTO_TCP:
  166. if (_eth_copy_chunk(input_size,
  167. iov, iovcnt,
  168. *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
  169. &l4hdr_info->hdr.tcp)) {
  170. l4hdr_info->proto = ETH_L4_HDR_PROTO_TCP;
  171. *l5hdr_off = *l4hdr_off +
  172. TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
  173. l4hdr_info->has_tcp_data =
  174. _eth_tcp_has_data(proto == ETH_P_IP,
  175. &ip4hdr_info->ip4_hdr,
  176. &ip6hdr_info->ip6_hdr,
  177. *l4hdr_off - *l3hdr_off,
  178. &l4hdr_info->hdr.tcp);
  179. }
  180. break;
  181. case IP_PROTO_UDP:
  182. if (_eth_copy_chunk(input_size,
  183. iov, iovcnt,
  184. *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
  185. &l4hdr_info->hdr.udp)) {
  186. l4hdr_info->proto = ETH_L4_HDR_PROTO_UDP;
  187. *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
  188. }
  189. break;
  190. }
  191. }
  192. size_t
  193. eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
  194. uint8_t *new_ehdr_buf,
  195. uint16_t *payload_offset, uint16_t *tci)
  196. {
  197. struct vlan_header vlan_hdr;
  198. struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
  199. size_t copied = iov_to_buf(iov, iovcnt, iovoff,
  200. new_ehdr, sizeof(*new_ehdr));
  201. if (copied < sizeof(*new_ehdr)) {
  202. return 0;
  203. }
  204. switch (be16_to_cpu(new_ehdr->h_proto)) {
  205. case ETH_P_VLAN:
  206. case ETH_P_DVLAN:
  207. copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
  208. &vlan_hdr, sizeof(vlan_hdr));
  209. if (copied < sizeof(vlan_hdr)) {
  210. return 0;
  211. }
  212. new_ehdr->h_proto = vlan_hdr.h_proto;
  213. *tci = be16_to_cpu(vlan_hdr.h_tci);
  214. *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
  215. if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
  216. copied = iov_to_buf(iov, iovcnt, *payload_offset,
  217. PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
  218. if (copied < sizeof(vlan_hdr)) {
  219. return 0;
  220. }
  221. *payload_offset += sizeof(vlan_hdr);
  222. return sizeof(struct eth_header) + sizeof(struct vlan_header);
  223. } else {
  224. return sizeof(struct eth_header);
  225. }
  226. default:
  227. return 0;
  228. }
  229. }
  230. size_t
  231. eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
  232. uint16_t vet, uint8_t *new_ehdr_buf,
  233. uint16_t *payload_offset, uint16_t *tci)
  234. {
  235. struct vlan_header vlan_hdr;
  236. struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
  237. size_t copied = iov_to_buf(iov, iovcnt, iovoff,
  238. new_ehdr, sizeof(*new_ehdr));
  239. if (copied < sizeof(*new_ehdr)) {
  240. return 0;
  241. }
  242. if (be16_to_cpu(new_ehdr->h_proto) == vet) {
  243. copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
  244. &vlan_hdr, sizeof(vlan_hdr));
  245. if (copied < sizeof(vlan_hdr)) {
  246. return 0;
  247. }
  248. new_ehdr->h_proto = vlan_hdr.h_proto;
  249. *tci = be16_to_cpu(vlan_hdr.h_tci);
  250. *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
  251. return sizeof(struct eth_header);
  252. }
  253. return 0;
  254. }
  255. void
  256. eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
  257. {
  258. struct ip_header *iphdr = (struct ip_header *) l3hdr;
  259. iphdr->ip_sum = 0;
  260. iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
  261. }
  262. uint32_t
  263. eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
  264. uint16_t csl,
  265. uint32_t *cso)
  266. {
  267. struct ip_pseudo_header ipph;
  268. ipph.ip_src = iphdr->ip_src;
  269. ipph.ip_dst = iphdr->ip_dst;
  270. ipph.ip_payload = cpu_to_be16(csl);
  271. ipph.ip_proto = iphdr->ip_p;
  272. ipph.zeros = 0;
  273. *cso = sizeof(ipph);
  274. return net_checksum_add(*cso, (uint8_t *) &ipph);
  275. }
  276. uint32_t
  277. eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
  278. uint16_t csl,
  279. uint8_t l4_proto,
  280. uint32_t *cso)
  281. {
  282. struct ip6_pseudo_header ipph;
  283. ipph.ip6_src = iphdr->ip6_src;
  284. ipph.ip6_dst = iphdr->ip6_dst;
  285. ipph.len = cpu_to_be16(csl);
  286. ipph.zero[0] = 0;
  287. ipph.zero[1] = 0;
  288. ipph.zero[2] = 0;
  289. ipph.next_hdr = l4_proto;
  290. *cso = sizeof(ipph);
  291. return net_checksum_add(*cso, (uint8_t *)&ipph);
  292. }
  293. static bool
  294. eth_is_ip6_extension_header_type(uint8_t hdr_type)
  295. {
  296. switch (hdr_type) {
  297. case IP6_HOP_BY_HOP:
  298. case IP6_ROUTING:
  299. case IP6_FRAGMENT:
  300. case IP6_AUTHENTICATION:
  301. case IP6_DESTINATON:
  302. case IP6_MOBILITY:
  303. return true;
  304. default:
  305. return false;
  306. }
  307. }
  308. static bool
  309. _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
  310. size_t ext_hdr_offset,
  311. struct ip6_ext_hdr *ext_hdr,
  312. struct in6_address *dst_addr)
  313. {
  314. struct ip6_ext_hdr_routing rt_hdr;
  315. size_t input_size = iov_size(pkt, pkt_frags);
  316. size_t bytes_read;
  317. if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
  318. return false;
  319. }
  320. bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
  321. &rt_hdr, sizeof(rt_hdr));
  322. assert(bytes_read == sizeof(rt_hdr));
  323. if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
  324. return false;
  325. }
  326. bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
  327. dst_addr, sizeof(*dst_addr));
  328. assert(bytes_read == sizeof(*dst_addr));
  329. return true;
  330. }
  331. static bool
  332. _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
  333. size_t dsthdr_offset,
  334. struct ip6_ext_hdr *ext_hdr,
  335. struct in6_address *src_addr)
  336. {
  337. size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
  338. struct ip6_option_hdr opthdr;
  339. size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
  340. while (bytes_left > sizeof(opthdr)) {
  341. size_t input_size = iov_size(pkt, pkt_frags);
  342. size_t bytes_read, optlen;
  343. if (input_size < opt_offset) {
  344. return false;
  345. }
  346. bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
  347. &opthdr, sizeof(opthdr));
  348. if (bytes_read != sizeof(opthdr)) {
  349. return false;
  350. }
  351. optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
  352. : (opthdr.len + sizeof(opthdr));
  353. if (optlen > bytes_left) {
  354. return false;
  355. }
  356. if (opthdr.type == IP6_OPT_HOME) {
  357. size_t input_size = iov_size(pkt, pkt_frags);
  358. if (input_size < opt_offset + sizeof(opthdr)) {
  359. return false;
  360. }
  361. bytes_read = iov_to_buf(pkt, pkt_frags,
  362. opt_offset + sizeof(opthdr),
  363. src_addr, sizeof(*src_addr));
  364. return bytes_read == sizeof(*src_addr);
  365. }
  366. opt_offset += optlen;
  367. bytes_left -= optlen;
  368. }
  369. return false;
  370. }
  371. bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
  372. size_t ip6hdr_off, eth_ip6_hdr_info *info)
  373. {
  374. struct ip6_ext_hdr ext_hdr;
  375. size_t bytes_read;
  376. uint8_t curr_ext_hdr_type;
  377. size_t input_size = iov_size(pkt, pkt_frags);
  378. info->rss_ex_dst_valid = false;
  379. info->rss_ex_src_valid = false;
  380. info->fragment = false;
  381. if (input_size < ip6hdr_off) {
  382. return false;
  383. }
  384. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
  385. &info->ip6_hdr, sizeof(info->ip6_hdr));
  386. if (bytes_read < sizeof(info->ip6_hdr)) {
  387. return false;
  388. }
  389. info->full_hdr_len = sizeof(struct ip6_header);
  390. curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
  391. if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
  392. info->l4proto = info->ip6_hdr.ip6_nxt;
  393. info->has_ext_hdrs = false;
  394. return true;
  395. }
  396. info->has_ext_hdrs = true;
  397. do {
  398. if (input_size < ip6hdr_off + info->full_hdr_len) {
  399. return false;
  400. }
  401. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
  402. &ext_hdr, sizeof(ext_hdr));
  403. if (bytes_read < sizeof(ext_hdr)) {
  404. return false;
  405. }
  406. if (curr_ext_hdr_type == IP6_ROUTING) {
  407. if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
  408. info->rss_ex_dst_valid =
  409. _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
  410. ip6hdr_off + info->full_hdr_len,
  411. &ext_hdr, &info->rss_ex_dst);
  412. }
  413. } else if (curr_ext_hdr_type == IP6_DESTINATON) {
  414. info->rss_ex_src_valid =
  415. _eth_get_rss_ex_src_addr(pkt, pkt_frags,
  416. ip6hdr_off + info->full_hdr_len,
  417. &ext_hdr, &info->rss_ex_src);
  418. } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
  419. info->fragment = true;
  420. }
  421. info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
  422. curr_ext_hdr_type = ext_hdr.ip6r_nxt;
  423. } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
  424. info->l4proto = ext_hdr.ip6r_nxt;
  425. return true;
  426. }
  427. bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
  428. const void *pkt, size_t pkt_size)
  429. {
  430. assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
  431. if (pkt_size >= ETH_ZLEN) {
  432. return false;
  433. }
  434. /* pad to minimum Ethernet frame length */
  435. memcpy(padded_pkt, pkt, pkt_size);
  436. memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
  437. *padded_buflen = ETH_ZLEN;
  438. return true;
  439. }