eth.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. /*
  2. * QEMU network structures definitions and helper functions
  3. *
  4. * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
  5. *
  6. * Developed by Daynix Computing LTD (http://www.daynix.com)
  7. *
  8. * Authors:
  9. * Dmitry Fleytman <dmitry@daynix.com>
  10. * Tamir Shomer <tamirs@daynix.com>
  11. * Yan Vugenfirer <yan@daynix.com>
  12. *
  13. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14. * See the COPYING file in the top-level directory.
  15. *
  16. */
  17. #include "qemu/osdep.h"
  18. #include "net/eth.h"
  19. #include "net/checksum.h"
  20. #include "net/tap.h"
  21. void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
  22. uint16_t vlan_ethtype, bool *is_new)
  23. {
  24. struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
  25. switch (be16_to_cpu(ehdr->h_proto)) {
  26. case ETH_P_VLAN:
  27. case ETH_P_DVLAN:
  28. /* vlan hdr exists */
  29. *is_new = false;
  30. break;
  31. default:
  32. /* No VLAN header, put a new one */
  33. vhdr->h_proto = ehdr->h_proto;
  34. ehdr->h_proto = cpu_to_be16(vlan_ethtype);
  35. *is_new = true;
  36. break;
  37. }
  38. vhdr->h_tci = cpu_to_be16(vlan_tag);
  39. }
  40. uint8_t
  41. eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
  42. {
  43. uint8_t ecn_state = 0;
  44. if (l3_proto == ETH_P_IP) {
  45. struct ip_header *iphdr = (struct ip_header *) l3_hdr;
  46. if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  47. if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) {
  48. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  49. }
  50. if (l4proto == IP_PROTO_TCP) {
  51. return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
  52. } else if (l4proto == IP_PROTO_UDP) {
  53. return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
  54. }
  55. }
  56. } else if (l3_proto == ETH_P_IPV6) {
  57. struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
  58. if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) {
  59. ecn_state = VIRTIO_NET_HDR_GSO_ECN;
  60. }
  61. if (l4proto == IP_PROTO_TCP) {
  62. return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
  63. }
  64. }
  65. /* Unsupported offload */
  66. g_assert_not_reached();
  67. return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
  68. }
  69. uint16_t
  70. eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
  71. {
  72. uint16_t proto;
  73. size_t copied;
  74. size_t size = iov_size(l2hdr_iov, iovcnt);
  75. size_t proto_offset = l2hdr_len - sizeof(proto);
  76. if (size < proto_offset) {
  77. return ETH_P_UNKNOWN;
  78. }
  79. copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
  80. &proto, sizeof(proto));
  81. return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
  82. }
  83. static bool
  84. _eth_copy_chunk(size_t input_size,
  85. const struct iovec *iov, int iovcnt,
  86. size_t offset, size_t length,
  87. void *buffer)
  88. {
  89. size_t copied;
  90. if (input_size < offset) {
  91. return false;
  92. }
  93. copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
  94. if (copied < length) {
  95. return false;
  96. }
  97. return true;
  98. }
  99. static bool
  100. _eth_tcp_has_data(bool is_ip4,
  101. const struct ip_header *ip4_hdr,
  102. const struct ip6_header *ip6_hdr,
  103. size_t full_ip6hdr_len,
  104. const struct tcp_header *tcp)
  105. {
  106. uint32_t l4len;
  107. if (is_ip4) {
  108. l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
  109. } else {
  110. size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
  111. l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
  112. }
  113. return l4len > TCP_HEADER_DATA_OFFSET(tcp);
  114. }
  115. void eth_get_protocols(const struct iovec *iov, int iovcnt,
  116. bool *isip4, bool *isip6,
  117. bool *isudp, bool *istcp,
  118. size_t *l3hdr_off,
  119. size_t *l4hdr_off,
  120. size_t *l5hdr_off,
  121. eth_ip6_hdr_info *ip6hdr_info,
  122. eth_ip4_hdr_info *ip4hdr_info,
  123. eth_l4_hdr_info *l4hdr_info)
  124. {
  125. int proto;
  126. bool fragment = false;
  127. size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
  128. size_t input_size = iov_size(iov, iovcnt);
  129. size_t copied;
  130. *isip4 = *isip6 = *isudp = *istcp = false;
  131. proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
  132. *l3hdr_off = l2hdr_len;
  133. if (proto == ETH_P_IP) {
  134. struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
  135. if (input_size < l2hdr_len) {
  136. return;
  137. }
  138. copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
  139. *isip4 = true;
  140. if (copied < sizeof(*iphdr)) {
  141. return;
  142. }
  143. if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
  144. if (iphdr->ip_p == IP_PROTO_TCP) {
  145. *istcp = true;
  146. } else if (iphdr->ip_p == IP_PROTO_UDP) {
  147. *isudp = true;
  148. }
  149. }
  150. ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
  151. *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
  152. fragment = ip4hdr_info->fragment;
  153. } else if (proto == ETH_P_IPV6) {
  154. *isip6 = true;
  155. if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
  156. ip6hdr_info)) {
  157. if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
  158. *istcp = true;
  159. } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
  160. *isudp = true;
  161. }
  162. } else {
  163. return;
  164. }
  165. *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
  166. fragment = ip6hdr_info->fragment;
  167. }
  168. if (!fragment) {
  169. if (*istcp) {
  170. *istcp = _eth_copy_chunk(input_size,
  171. iov, iovcnt,
  172. *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
  173. &l4hdr_info->hdr.tcp);
  174. if (*istcp) {
  175. *l5hdr_off = *l4hdr_off +
  176. TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
  177. l4hdr_info->has_tcp_data =
  178. _eth_tcp_has_data(proto == ETH_P_IP,
  179. &ip4hdr_info->ip4_hdr,
  180. &ip6hdr_info->ip6_hdr,
  181. *l4hdr_off - *l3hdr_off,
  182. &l4hdr_info->hdr.tcp);
  183. }
  184. } else if (*isudp) {
  185. *isudp = _eth_copy_chunk(input_size,
  186. iov, iovcnt,
  187. *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
  188. &l4hdr_info->hdr.udp);
  189. *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
  190. }
  191. }
  192. }
  193. size_t
  194. eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
  195. uint8_t *new_ehdr_buf,
  196. uint16_t *payload_offset, uint16_t *tci)
  197. {
  198. struct vlan_header vlan_hdr;
  199. struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
  200. size_t copied = iov_to_buf(iov, iovcnt, iovoff,
  201. new_ehdr, sizeof(*new_ehdr));
  202. if (copied < sizeof(*new_ehdr)) {
  203. return 0;
  204. }
  205. switch (be16_to_cpu(new_ehdr->h_proto)) {
  206. case ETH_P_VLAN:
  207. case ETH_P_DVLAN:
  208. copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
  209. &vlan_hdr, sizeof(vlan_hdr));
  210. if (copied < sizeof(vlan_hdr)) {
  211. return 0;
  212. }
  213. new_ehdr->h_proto = vlan_hdr.h_proto;
  214. *tci = be16_to_cpu(vlan_hdr.h_tci);
  215. *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
  216. if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
  217. copied = iov_to_buf(iov, iovcnt, *payload_offset,
  218. PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
  219. if (copied < sizeof(vlan_hdr)) {
  220. return 0;
  221. }
  222. *payload_offset += sizeof(vlan_hdr);
  223. return sizeof(struct eth_header) + sizeof(struct vlan_header);
  224. } else {
  225. return sizeof(struct eth_header);
  226. }
  227. default:
  228. return 0;
  229. }
  230. }
  231. size_t
  232. eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
  233. uint16_t vet, uint8_t *new_ehdr_buf,
  234. uint16_t *payload_offset, uint16_t *tci)
  235. {
  236. struct vlan_header vlan_hdr;
  237. struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
  238. size_t copied = iov_to_buf(iov, iovcnt, iovoff,
  239. new_ehdr, sizeof(*new_ehdr));
  240. if (copied < sizeof(*new_ehdr)) {
  241. return 0;
  242. }
  243. if (be16_to_cpu(new_ehdr->h_proto) == vet) {
  244. copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
  245. &vlan_hdr, sizeof(vlan_hdr));
  246. if (copied < sizeof(vlan_hdr)) {
  247. return 0;
  248. }
  249. new_ehdr->h_proto = vlan_hdr.h_proto;
  250. *tci = be16_to_cpu(vlan_hdr.h_tci);
  251. *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
  252. return sizeof(struct eth_header);
  253. }
  254. return 0;
  255. }
  256. void
  257. eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
  258. void *l3hdr, size_t l3hdr_len,
  259. size_t l3payload_len,
  260. size_t frag_offset, bool more_frags)
  261. {
  262. const struct iovec l2vec = {
  263. .iov_base = (void *) l2hdr,
  264. .iov_len = l2hdr_len
  265. };
  266. if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
  267. uint16_t orig_flags;
  268. struct ip_header *iphdr = (struct ip_header *) l3hdr;
  269. uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
  270. uint16_t new_ip_off;
  271. assert(frag_offset % IP_FRAG_UNIT_SIZE == 0);
  272. assert((frag_off_units & ~IP_OFFMASK) == 0);
  273. orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF);
  274. new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
  275. iphdr->ip_off = cpu_to_be16(new_ip_off);
  276. iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
  277. }
  278. }
  279. void
  280. eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
  281. {
  282. struct ip_header *iphdr = (struct ip_header *) l3hdr;
  283. iphdr->ip_sum = 0;
  284. iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len));
  285. }
  286. uint32_t
  287. eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
  288. uint16_t csl,
  289. uint32_t *cso)
  290. {
  291. struct ip_pseudo_header ipph;
  292. ipph.ip_src = iphdr->ip_src;
  293. ipph.ip_dst = iphdr->ip_dst;
  294. ipph.ip_payload = cpu_to_be16(csl);
  295. ipph.ip_proto = iphdr->ip_p;
  296. ipph.zeros = 0;
  297. *cso = sizeof(ipph);
  298. return net_checksum_add(*cso, (uint8_t *) &ipph);
  299. }
  300. uint32_t
  301. eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
  302. uint16_t csl,
  303. uint8_t l4_proto,
  304. uint32_t *cso)
  305. {
  306. struct ip6_pseudo_header ipph;
  307. ipph.ip6_src = iphdr->ip6_src;
  308. ipph.ip6_dst = iphdr->ip6_dst;
  309. ipph.len = cpu_to_be16(csl);
  310. ipph.zero[0] = 0;
  311. ipph.zero[1] = 0;
  312. ipph.zero[2] = 0;
  313. ipph.next_hdr = l4_proto;
  314. *cso = sizeof(ipph);
  315. return net_checksum_add(*cso, (uint8_t *)&ipph);
  316. }
  317. static bool
  318. eth_is_ip6_extension_header_type(uint8_t hdr_type)
  319. {
  320. switch (hdr_type) {
  321. case IP6_HOP_BY_HOP:
  322. case IP6_ROUTING:
  323. case IP6_FRAGMENT:
  324. case IP6_ESP:
  325. case IP6_AUTHENTICATION:
  326. case IP6_DESTINATON:
  327. case IP6_MOBILITY:
  328. return true;
  329. default:
  330. return false;
  331. }
  332. }
  333. static bool
  334. _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
  335. size_t rthdr_offset,
  336. struct ip6_ext_hdr *ext_hdr,
  337. struct in6_address *dst_addr)
  338. {
  339. struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
  340. if ((rthdr->rtype == 2) &&
  341. (rthdr->len == sizeof(struct in6_address) / 8) &&
  342. (rthdr->segleft == 1)) {
  343. size_t input_size = iov_size(pkt, pkt_frags);
  344. size_t bytes_read;
  345. if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
  346. return false;
  347. }
  348. bytes_read = iov_to_buf(pkt, pkt_frags,
  349. rthdr_offset + sizeof(*ext_hdr),
  350. dst_addr, sizeof(*dst_addr));
  351. return bytes_read == sizeof(*dst_addr);
  352. }
  353. return false;
  354. }
  355. static bool
  356. _eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
  357. size_t dsthdr_offset,
  358. struct ip6_ext_hdr *ext_hdr,
  359. struct in6_address *src_addr)
  360. {
  361. size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
  362. struct ip6_option_hdr opthdr;
  363. size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
  364. while (bytes_left > sizeof(opthdr)) {
  365. size_t input_size = iov_size(pkt, pkt_frags);
  366. size_t bytes_read, optlen;
  367. if (input_size < opt_offset) {
  368. return false;
  369. }
  370. bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
  371. &opthdr, sizeof(opthdr));
  372. if (bytes_read != sizeof(opthdr)) {
  373. return false;
  374. }
  375. optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
  376. : (opthdr.len + sizeof(opthdr));
  377. if (optlen > bytes_left) {
  378. return false;
  379. }
  380. if (opthdr.type == IP6_OPT_HOME) {
  381. size_t input_size = iov_size(pkt, pkt_frags);
  382. if (input_size < opt_offset + sizeof(opthdr)) {
  383. return false;
  384. }
  385. bytes_read = iov_to_buf(pkt, pkt_frags,
  386. opt_offset + sizeof(opthdr),
  387. src_addr, sizeof(*src_addr));
  388. return bytes_read == sizeof(*src_addr);
  389. }
  390. opt_offset += optlen;
  391. bytes_left -= optlen;
  392. }
  393. return false;
  394. }
  395. bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
  396. size_t ip6hdr_off, eth_ip6_hdr_info *info)
  397. {
  398. struct ip6_ext_hdr ext_hdr;
  399. size_t bytes_read;
  400. uint8_t curr_ext_hdr_type;
  401. size_t input_size = iov_size(pkt, pkt_frags);
  402. info->rss_ex_dst_valid = false;
  403. info->rss_ex_src_valid = false;
  404. info->fragment = false;
  405. if (input_size < ip6hdr_off) {
  406. return false;
  407. }
  408. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
  409. &info->ip6_hdr, sizeof(info->ip6_hdr));
  410. if (bytes_read < sizeof(info->ip6_hdr)) {
  411. return false;
  412. }
  413. info->full_hdr_len = sizeof(struct ip6_header);
  414. curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
  415. if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
  416. info->l4proto = info->ip6_hdr.ip6_nxt;
  417. info->has_ext_hdrs = false;
  418. return true;
  419. }
  420. info->has_ext_hdrs = true;
  421. do {
  422. if (input_size < ip6hdr_off + info->full_hdr_len) {
  423. return false;
  424. }
  425. bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
  426. &ext_hdr, sizeof(ext_hdr));
  427. if (bytes_read < sizeof(ext_hdr)) {
  428. return false;
  429. }
  430. if (curr_ext_hdr_type == IP6_ROUTING) {
  431. info->rss_ex_dst_valid =
  432. _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
  433. ip6hdr_off + info->full_hdr_len,
  434. &ext_hdr, &info->rss_ex_dst);
  435. } else if (curr_ext_hdr_type == IP6_DESTINATON) {
  436. info->rss_ex_src_valid =
  437. _eth_get_rss_ex_src_addr(pkt, pkt_frags,
  438. ip6hdr_off + info->full_hdr_len,
  439. &ext_hdr, &info->rss_ex_src);
  440. } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
  441. info->fragment = true;
  442. }
  443. info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
  444. curr_ext_hdr_type = ext_hdr.ip6r_nxt;
  445. } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
  446. info->l4proto = ext_hdr.ip6r_nxt;
  447. return true;
  448. }