2
0

colo.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /*
  2. * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
  3. * (a.k.a. Fault Tolerance or Continuous Replication)
  4. *
  5. * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
  6. * Copyright (c) 2016 FUJITSU LIMITED
  7. * Copyright (c) 2016 Intel Corporation
  8. *
  9. * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2 or
  12. * later. See the COPYING file in the top-level directory.
  13. */
  14. #include "qemu/osdep.h"
  15. #include "trace.h"
  16. #include "colo.h"
  17. #include "util.h"
  18. uint32_t connection_key_hash(const void *opaque)
  19. {
  20. const ConnectionKey *key = opaque;
  21. uint32_t a, b, c;
  22. /* Jenkins hash */
  23. a = b = c = JHASH_INITVAL + sizeof(*key);
  24. a += key->src.s_addr;
  25. b += key->dst.s_addr;
  26. c += (key->src_port | key->dst_port << 16);
  27. __jhash_mix(a, b, c);
  28. a += key->ip_proto;
  29. __jhash_final(a, b, c);
  30. return c;
  31. }
  32. int connection_key_equal(const void *key1, const void *key2)
  33. {
  34. return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
  35. }
  36. int parse_packet_early(Packet *pkt)
  37. {
  38. int network_length;
  39. static const uint8_t vlan[] = {0x81, 0x00};
  40. uint8_t *data = pkt->data;
  41. uint16_t l3_proto;
  42. ssize_t l2hdr_len;
  43. assert(data);
  44. /* Check the received vnet_hdr_len then add the offset */
  45. if ((pkt->vnet_hdr_len > sizeof(struct virtio_net_hdr_v1_hash)) ||
  46. (pkt->size < sizeof(struct eth_header) + sizeof(struct vlan_header) +
  47. pkt->vnet_hdr_len)) {
  48. /*
  49. * The received remote packet maybe misconfiguration here,
  50. * Please enable/disable filter module's the vnet_hdr flag at
  51. * the same time.
  52. */
  53. trace_colo_proxy_main_vnet_info("This received packet load wrong ",
  54. pkt->vnet_hdr_len, pkt->size);
  55. return 1;
  56. }
  57. data += pkt->vnet_hdr_len;
  58. l2hdr_len = eth_get_l2_hdr_length(data);
  59. /*
  60. * TODO: support vlan.
  61. */
  62. if (!memcmp(&data[12], vlan, sizeof(vlan))) {
  63. trace_colo_proxy_main("COLO-proxy don't support vlan");
  64. return 1;
  65. }
  66. pkt->network_header = data + l2hdr_len;
  67. const struct iovec l2vec = {
  68. .iov_base = (void *) data,
  69. .iov_len = l2hdr_len
  70. };
  71. l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
  72. if (l3_proto != ETH_P_IP) {
  73. return 1;
  74. }
  75. network_length = pkt->ip->ip_hl * 4;
  76. if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
  77. trace_colo_proxy_main("pkt->size < network_header + network_length");
  78. return 1;
  79. }
  80. pkt->transport_header = pkt->network_header + network_length;
  81. return 0;
  82. }
  83. void extract_ip_and_port(uint32_t tmp_ports, ConnectionKey *key,
  84. Packet *pkt, bool reverse)
  85. {
  86. if (reverse) {
  87. key->src = pkt->ip->ip_dst;
  88. key->dst = pkt->ip->ip_src;
  89. key->src_port = ntohs(tmp_ports & 0xffff);
  90. key->dst_port = ntohs(tmp_ports >> 16);
  91. } else {
  92. key->src = pkt->ip->ip_src;
  93. key->dst = pkt->ip->ip_dst;
  94. key->src_port = ntohs(tmp_ports >> 16);
  95. key->dst_port = ntohs(tmp_ports & 0xffff);
  96. }
  97. }
  98. void fill_connection_key(Packet *pkt, ConnectionKey *key, bool reverse)
  99. {
  100. uint32_t tmp_ports = 0;
  101. key->ip_proto = pkt->ip->ip_p;
  102. switch (key->ip_proto) {
  103. case IPPROTO_TCP:
  104. case IPPROTO_UDP:
  105. case IPPROTO_DCCP:
  106. case IPPROTO_ESP:
  107. case IPPROTO_SCTP:
  108. case IPPROTO_UDPLITE:
  109. tmp_ports = *(uint32_t *)(pkt->transport_header);
  110. break;
  111. case IPPROTO_AH:
  112. tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
  113. break;
  114. default:
  115. break;
  116. }
  117. extract_ip_and_port(tmp_ports, key, pkt, reverse);
  118. }
  119. Connection *connection_new(ConnectionKey *key)
  120. {
  121. Connection *conn = g_slice_new0(Connection);
  122. conn->ip_proto = key->ip_proto;
  123. conn->processing = false;
  124. conn->tcp_state = TCPS_CLOSED;
  125. g_queue_init(&conn->primary_list);
  126. g_queue_init(&conn->secondary_list);
  127. return conn;
  128. }
  129. void connection_destroy(void *opaque)
  130. {
  131. Connection *conn = opaque;
  132. g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
  133. g_queue_clear(&conn->primary_list);
  134. g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
  135. g_queue_clear(&conn->secondary_list);
  136. g_slice_free(Connection, conn);
  137. }
  138. Packet *packet_new(const void *data, int size, int vnet_hdr_len)
  139. {
  140. Packet *pkt = g_slice_new0(Packet);
  141. pkt->data = g_memdup(data, size);
  142. pkt->size = size;
  143. pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  144. pkt->vnet_hdr_len = vnet_hdr_len;
  145. return pkt;
  146. }
  147. /*
  148. * packet_new_nocopy will not copy data, so the caller can't release
  149. * the data. And it will be released in packet_destroy.
  150. */
  151. Packet *packet_new_nocopy(void *data, int size, int vnet_hdr_len)
  152. {
  153. Packet *pkt = g_slice_new0(Packet);
  154. pkt->data = data;
  155. pkt->size = size;
  156. pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  157. pkt->vnet_hdr_len = vnet_hdr_len;
  158. return pkt;
  159. }
  160. void packet_destroy(void *opaque, void *user_data)
  161. {
  162. Packet *pkt = opaque;
  163. g_free(pkt->data);
  164. g_slice_free(Packet, pkt);
  165. }
  166. void packet_destroy_partial(void *opaque, void *user_data)
  167. {
  168. Packet *pkt = opaque;
  169. g_slice_free(Packet, pkt);
  170. }
  171. /*
  172. * Clear hashtable, stop this hash growing really huge
  173. */
  174. void connection_hashtable_reset(GHashTable *connection_track_table)
  175. {
  176. g_hash_table_remove_all(connection_track_table);
  177. }
  178. /* if not found, create a new connection and add to hash table */
  179. Connection *connection_get(GHashTable *connection_track_table,
  180. ConnectionKey *key,
  181. GQueue *conn_list)
  182. {
  183. Connection *conn = g_hash_table_lookup(connection_track_table, key);
  184. if (conn == NULL) {
  185. ConnectionKey *new_key = g_memdup(key, sizeof(*key));
  186. conn = connection_new(key);
  187. if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) {
  188. trace_colo_proxy_main("colo proxy connection hashtable full,"
  189. " clear it");
  190. connection_hashtable_reset(connection_track_table);
  191. /*
  192. * clear the conn_list
  193. */
  194. while (conn_list && !g_queue_is_empty(conn_list)) {
  195. connection_destroy(g_queue_pop_head(conn_list));
  196. }
  197. }
  198. g_hash_table_insert(connection_track_table, new_key, conn);
  199. }
  200. return conn;
  201. }
  202. bool connection_has_tracked(GHashTable *connection_track_table,
  203. ConnectionKey *key)
  204. {
  205. Connection *conn = g_hash_table_lookup(connection_track_table, key);
  206. return conn ? true : false;
  207. }