net_rx_pkt.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. /*
  2. * QEMU RX packets abstractions
  3. *
  4. * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
  5. *
  6. * Developed by Daynix Computing LTD (http://www.daynix.com)
  7. *
  8. * Authors:
  9. * Dmitry Fleytman <dmitry@daynix.com>
  10. * Tamir Shomer <tamirs@daynix.com>
  11. * Yan Vugenfirer <yan@daynix.com>
  12. *
  13. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  14. * See the COPYING file in the top-level directory.
  15. *
  16. */
  17. #include "qemu/osdep.h"
  18. #include "trace.h"
  19. #include "net_rx_pkt.h"
  20. #include "net/checksum.h"
  21. #include "net/tap.h"
  22. struct NetRxPkt {
  23. struct virtio_net_hdr virt_hdr;
  24. uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)];
  25. struct iovec *vec;
  26. uint16_t vec_len_total;
  27. uint16_t vec_len;
  28. uint32_t tot_len;
  29. uint16_t tci;
  30. size_t ehdr_buf_len;
  31. bool has_virt_hdr;
  32. eth_pkt_types_e packet_type;
  33. /* Analysis results */
  34. bool isip4;
  35. bool isip6;
  36. bool isudp;
  37. bool istcp;
  38. size_t l3hdr_off;
  39. size_t l4hdr_off;
  40. size_t l5hdr_off;
  41. eth_ip6_hdr_info ip6hdr_info;
  42. eth_ip4_hdr_info ip4hdr_info;
  43. eth_l4_hdr_info l4hdr_info;
  44. };
  45. void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
  46. {
  47. struct NetRxPkt *p = g_malloc0(sizeof *p);
  48. p->has_virt_hdr = has_virt_hdr;
  49. p->vec = NULL;
  50. p->vec_len_total = 0;
  51. *pkt = p;
  52. }
  53. void net_rx_pkt_uninit(struct NetRxPkt *pkt)
  54. {
  55. if (pkt->vec_len_total != 0) {
  56. g_free(pkt->vec);
  57. }
  58. g_free(pkt);
  59. }
  60. struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
  61. {
  62. assert(pkt);
  63. return &pkt->virt_hdr;
  64. }
  65. static inline void
  66. net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
  67. int new_iov_len)
  68. {
  69. if (pkt->vec_len_total < new_iov_len) {
  70. g_free(pkt->vec);
  71. pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
  72. pkt->vec_len_total = new_iov_len;
  73. }
  74. }
  75. static void
  76. net_rx_pkt_pull_data(struct NetRxPkt *pkt,
  77. const struct iovec *iov, int iovcnt,
  78. size_t ploff)
  79. {
  80. uint32_t pllen = iov_size(iov, iovcnt) - ploff;
  81. if (pkt->ehdr_buf_len) {
  82. net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
  83. pkt->vec[0].iov_base = pkt->ehdr_buf;
  84. pkt->vec[0].iov_len = pkt->ehdr_buf_len;
  85. pkt->tot_len = pllen + pkt->ehdr_buf_len;
  86. pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
  87. iov, iovcnt, ploff, pllen) + 1;
  88. } else {
  89. net_rx_pkt_iovec_realloc(pkt, iovcnt);
  90. pkt->tot_len = pllen;
  91. pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
  92. iov, iovcnt, ploff, pkt->tot_len);
  93. }
  94. eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6,
  95. &pkt->isudp, &pkt->istcp,
  96. &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
  97. &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
  98. trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
  99. pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
  100. }
  101. void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
  102. const struct iovec *iov, int iovcnt,
  103. size_t iovoff, bool strip_vlan)
  104. {
  105. uint16_t tci = 0;
  106. uint16_t ploff = iovoff;
  107. assert(pkt);
  108. if (strip_vlan) {
  109. pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
  110. &ploff, &tci);
  111. } else {
  112. pkt->ehdr_buf_len = 0;
  113. }
  114. pkt->tci = tci;
  115. net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
  116. }
  117. void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
  118. const struct iovec *iov, int iovcnt,
  119. size_t iovoff, bool strip_vlan,
  120. uint16_t vet)
  121. {
  122. uint16_t tci = 0;
  123. uint16_t ploff = iovoff;
  124. assert(pkt);
  125. if (strip_vlan) {
  126. pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
  127. pkt->ehdr_buf,
  128. &ploff, &tci);
  129. } else {
  130. pkt->ehdr_buf_len = 0;
  131. }
  132. pkt->tci = tci;
  133. net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
  134. }
  135. void net_rx_pkt_dump(struct NetRxPkt *pkt)
  136. {
  137. #ifdef NET_RX_PKT_DEBUG
  138. assert(pkt);
  139. printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
  140. pkt->tot_len, pkt->ehdr_buf_len, pkt->tci);
  141. #endif
  142. }
  143. void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
  144. eth_pkt_types_e packet_type)
  145. {
  146. assert(pkt);
  147. pkt->packet_type = packet_type;
  148. }
  149. eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
  150. {
  151. assert(pkt);
  152. return pkt->packet_type;
  153. }
  154. size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
  155. {
  156. assert(pkt);
  157. return pkt->tot_len;
  158. }
  159. void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
  160. size_t len)
  161. {
  162. const struct iovec iov = {
  163. .iov_base = (void *)data,
  164. .iov_len = len
  165. };
  166. assert(pkt);
  167. eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6,
  168. &pkt->isudp, &pkt->istcp,
  169. &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
  170. &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
  171. }
  172. void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
  173. bool *isip4, bool *isip6,
  174. bool *isudp, bool *istcp)
  175. {
  176. assert(pkt);
  177. *isip4 = pkt->isip4;
  178. *isip6 = pkt->isip6;
  179. *isudp = pkt->isudp;
  180. *istcp = pkt->istcp;
  181. }
  182. size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
  183. {
  184. assert(pkt);
  185. return pkt->l3hdr_off;
  186. }
  187. size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
  188. {
  189. assert(pkt);
  190. return pkt->l4hdr_off;
  191. }
  192. size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
  193. {
  194. assert(pkt);
  195. return pkt->l5hdr_off;
  196. }
  197. eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
  198. {
  199. return &pkt->ip6hdr_info;
  200. }
  201. eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
  202. {
  203. return &pkt->ip4hdr_info;
  204. }
  205. eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt)
  206. {
  207. return &pkt->l4hdr_info;
  208. }
  209. static inline void
  210. _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
  211. void *ptr, size_t size)
  212. {
  213. memcpy(&rss_input[*bytes_written], ptr, size);
  214. trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
  215. *bytes_written += size;
  216. }
  217. static inline void
  218. _net_rx_rss_prepare_ip4(uint8_t *rss_input,
  219. struct NetRxPkt *pkt,
  220. size_t *bytes_written)
  221. {
  222. struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
  223. _net_rx_rss_add_chunk(rss_input, bytes_written,
  224. &ip4_hdr->ip_src, sizeof(uint32_t));
  225. _net_rx_rss_add_chunk(rss_input, bytes_written,
  226. &ip4_hdr->ip_dst, sizeof(uint32_t));
  227. }
  228. static inline void
  229. _net_rx_rss_prepare_ip6(uint8_t *rss_input,
  230. struct NetRxPkt *pkt,
  231. bool ipv6ex, size_t *bytes_written)
  232. {
  233. eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
  234. _net_rx_rss_add_chunk(rss_input, bytes_written,
  235. (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
  236. : &ip6info->ip6_hdr.ip6_src,
  237. sizeof(struct in6_address));
  238. _net_rx_rss_add_chunk(rss_input, bytes_written,
  239. (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
  240. : &ip6info->ip6_hdr.ip6_dst,
  241. sizeof(struct in6_address));
  242. }
  243. static inline void
  244. _net_rx_rss_prepare_tcp(uint8_t *rss_input,
  245. struct NetRxPkt *pkt,
  246. size_t *bytes_written)
  247. {
  248. struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
  249. _net_rx_rss_add_chunk(rss_input, bytes_written,
  250. &tcphdr->th_sport, sizeof(uint16_t));
  251. _net_rx_rss_add_chunk(rss_input, bytes_written,
  252. &tcphdr->th_dport, sizeof(uint16_t));
  253. }
  254. uint32_t
  255. net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
  256. NetRxPktRssType type,
  257. uint8_t *key)
  258. {
  259. uint8_t rss_input[36];
  260. size_t rss_length = 0;
  261. uint32_t rss_hash = 0;
  262. net_toeplitz_key key_data;
  263. switch (type) {
  264. case NetPktRssIpV4:
  265. assert(pkt->isip4);
  266. trace_net_rx_pkt_rss_ip4();
  267. _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
  268. break;
  269. case NetPktRssIpV4Tcp:
  270. assert(pkt->isip4);
  271. assert(pkt->istcp);
  272. trace_net_rx_pkt_rss_ip4_tcp();
  273. _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
  274. _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
  275. break;
  276. case NetPktRssIpV6Tcp:
  277. assert(pkt->isip6);
  278. assert(pkt->istcp);
  279. trace_net_rx_pkt_rss_ip6_tcp();
  280. _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
  281. _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
  282. break;
  283. case NetPktRssIpV6:
  284. assert(pkt->isip6);
  285. trace_net_rx_pkt_rss_ip6();
  286. _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
  287. break;
  288. case NetPktRssIpV6Ex:
  289. assert(pkt->isip6);
  290. trace_net_rx_pkt_rss_ip6_ex();
  291. _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
  292. break;
  293. default:
  294. assert(false);
  295. break;
  296. }
  297. net_toeplitz_key_init(&key_data, key);
  298. net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
  299. trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
  300. return rss_hash;
  301. }
  302. uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
  303. {
  304. assert(pkt);
  305. if (pkt->isip4) {
  306. return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
  307. }
  308. return 0;
  309. }
  310. bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
  311. {
  312. assert(pkt);
  313. if (pkt->istcp) {
  314. return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
  315. }
  316. return false;
  317. }
  318. bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
  319. {
  320. assert(pkt);
  321. if (pkt->istcp) {
  322. return pkt->l4hdr_info.has_tcp_data;
  323. }
  324. return false;
  325. }
  326. struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
  327. {
  328. assert(pkt);
  329. return pkt->vec;
  330. }
  331. uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
  332. {
  333. assert(pkt);
  334. return pkt->vec_len;
  335. }
  336. void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
  337. struct virtio_net_hdr *vhdr)
  338. {
  339. assert(pkt);
  340. memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
  341. }
  342. void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
  343. const struct iovec *iov, int iovcnt)
  344. {
  345. assert(pkt);
  346. iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
  347. }
  348. bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
  349. {
  350. assert(pkt);
  351. return pkt->ehdr_buf_len ? true : false;
  352. }
  353. bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
  354. {
  355. assert(pkt);
  356. return pkt->has_virt_hdr;
  357. }
  358. uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
  359. {
  360. assert(pkt);
  361. return pkt->tci;
  362. }
  363. bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
  364. {
  365. uint32_t cntr;
  366. uint16_t csum;
  367. uint32_t csl;
  368. trace_net_rx_pkt_l3_csum_validate_entry();
  369. if (!pkt->isip4) {
  370. trace_net_rx_pkt_l3_csum_validate_not_ip4();
  371. return false;
  372. }
  373. csl = pkt->l4hdr_off - pkt->l3hdr_off;
  374. cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
  375. pkt->l3hdr_off,
  376. csl, 0);
  377. csum = net_checksum_finish(cntr);
  378. *csum_valid = (csum == 0);
  379. trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
  380. cntr, csum, *csum_valid);
  381. return true;
  382. }
  383. static uint16_t
  384. _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
  385. {
  386. uint32_t cntr;
  387. uint16_t csum;
  388. uint16_t csl;
  389. uint32_t cso;
  390. trace_net_rx_pkt_l4_csum_calc_entry();
  391. if (pkt->isip4) {
  392. if (pkt->isudp) {
  393. csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
  394. trace_net_rx_pkt_l4_csum_calc_ip4_udp();
  395. } else {
  396. csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
  397. IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
  398. trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
  399. }
  400. cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
  401. csl, &cso);
  402. trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
  403. } else {
  404. if (pkt->isudp) {
  405. csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
  406. trace_net_rx_pkt_l4_csum_calc_ip6_udp();
  407. } else {
  408. struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
  409. size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
  410. size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
  411. csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
  412. ip6opts_len;
  413. trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
  414. }
  415. cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
  416. pkt->ip6hdr_info.l4proto, &cso);
  417. trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
  418. }
  419. cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
  420. pkt->l4hdr_off, csl, cso);
  421. csum = net_checksum_finish_nozero(cntr);
  422. trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum);
  423. return csum;
  424. }
  425. bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
  426. {
  427. uint16_t csum;
  428. trace_net_rx_pkt_l4_csum_validate_entry();
  429. if (!pkt->istcp && !pkt->isudp) {
  430. trace_net_rx_pkt_l4_csum_validate_not_xxp();
  431. return false;
  432. }
  433. if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) {
  434. trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
  435. return false;
  436. }
  437. if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
  438. trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
  439. return false;
  440. }
  441. csum = _net_rx_pkt_calc_l4_csum(pkt);
  442. *csum_valid = ((csum == 0) || (csum == 0xFFFF));
  443. trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
  444. return true;
  445. }
  446. bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)
  447. {
  448. uint16_t csum = 0;
  449. uint32_t l4_cso;
  450. trace_net_rx_pkt_l4_csum_fix_entry();
  451. if (pkt->istcp) {
  452. l4_cso = offsetof(struct tcp_header, th_sum);
  453. trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
  454. } else if (pkt->isudp) {
  455. if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
  456. trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
  457. return false;
  458. }
  459. l4_cso = offsetof(struct udp_header, uh_sum);
  460. trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
  461. } else {
  462. trace_net_rx_pkt_l4_csum_fix_not_xxp();
  463. return false;
  464. }
  465. if (pkt->isip4 && pkt->ip4hdr_info.fragment) {
  466. trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
  467. return false;
  468. }
  469. /* Set zero to checksum word */
  470. iov_from_buf(pkt->vec, pkt->vec_len,
  471. pkt->l4hdr_off + l4_cso,
  472. &csum, sizeof(csum));
  473. /* Calculate L4 checksum */
  474. csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt));
  475. /* Set calculated checksum to checksum word */
  476. iov_from_buf(pkt->vec, pkt->vec_len,
  477. pkt->l4hdr_off + l4_cso,
  478. &csum, sizeof(csum));
  479. trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum);
  480. return true;
  481. }