multifd.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269
  1. /*
  2. * Multifd common code
  3. *
  4. * Copyright (c) 2019-2020 Red Hat Inc
  5. *
  6. * Authors:
  7. * Juan Quintela <quintela@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "qemu/rcu.h"
  14. #include "exec/target_page.h"
  15. #include "sysemu/sysemu.h"
  16. #include "exec/ramblock.h"
  17. #include "qemu/error-report.h"
  18. #include "qapi/error.h"
  19. #include "ram.h"
  20. #include "migration.h"
  21. #include "socket.h"
  22. #include "tls.h"
  23. #include "qemu-file.h"
  24. #include "trace.h"
  25. #include "multifd.h"
  26. #include "threadinfo.h"
  27. #include "qemu/yank.h"
  28. #include "io/channel-socket.h"
  29. #include "yank_functions.h"
  30. /* Multiple fd's */
  31. #define MULTIFD_MAGIC 0x11223344U
  32. #define MULTIFD_VERSION 1
  33. typedef struct {
  34. uint32_t magic;
  35. uint32_t version;
  36. unsigned char uuid[16]; /* QemuUUID */
  37. uint8_t id;
  38. uint8_t unused1[7]; /* Reserved for future use */
  39. uint64_t unused2[4]; /* Reserved for future use */
  40. } __attribute__((packed)) MultiFDInit_t;
  41. /* Multifd without compression */
  42. /**
  43. * nocomp_send_setup: setup send side
  44. *
  45. * For no compression this function does nothing.
  46. *
  47. * Returns 0 for success or -1 for error
  48. *
  49. * @p: Params for the channel that we are using
  50. * @errp: pointer to an error
  51. */
  52. static int nocomp_send_setup(MultiFDSendParams *p, Error **errp)
  53. {
  54. return 0;
  55. }
  56. /**
  57. * nocomp_send_cleanup: cleanup send side
  58. *
  59. * For no compression this function does nothing.
  60. *
  61. * @p: Params for the channel that we are using
  62. * @errp: pointer to an error
  63. */
  64. static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
  65. {
  66. return;
  67. }
  68. /**
  69. * nocomp_send_prepare: prepare date to be able to send
  70. *
  71. * For no compression we just have to calculate the size of the
  72. * packet.
  73. *
  74. * Returns 0 for success or -1 for error
  75. *
  76. * @p: Params for the channel that we are using
  77. * @errp: pointer to an error
  78. */
  79. static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
  80. {
  81. MultiFDPages_t *pages = p->pages;
  82. for (int i = 0; i < p->normal_num; i++) {
  83. p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
  84. p->iov[p->iovs_num].iov_len = p->page_size;
  85. p->iovs_num++;
  86. }
  87. p->next_packet_size = p->normal_num * p->page_size;
  88. p->flags |= MULTIFD_FLAG_NOCOMP;
  89. return 0;
  90. }
  91. /**
  92. * nocomp_recv_setup: setup receive side
  93. *
  94. * For no compression this function does nothing.
  95. *
  96. * Returns 0 for success or -1 for error
  97. *
  98. * @p: Params for the channel that we are using
  99. * @errp: pointer to an error
  100. */
  101. static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp)
  102. {
  103. return 0;
  104. }
  105. /**
  106. * nocomp_recv_cleanup: setup receive side
  107. *
  108. * For no compression this function does nothing.
  109. *
  110. * @p: Params for the channel that we are using
  111. */
  112. static void nocomp_recv_cleanup(MultiFDRecvParams *p)
  113. {
  114. }
  115. /**
  116. * nocomp_recv_pages: read the data from the channel into actual pages
  117. *
  118. * For no compression we just need to read things into the correct place.
  119. *
  120. * Returns 0 for success or -1 for error
  121. *
  122. * @p: Params for the channel that we are using
  123. * @errp: pointer to an error
  124. */
  125. static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
  126. {
  127. uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
  128. if (flags != MULTIFD_FLAG_NOCOMP) {
  129. error_setg(errp, "multifd %u: flags received %x flags expected %x",
  130. p->id, flags, MULTIFD_FLAG_NOCOMP);
  131. return -1;
  132. }
  133. for (int i = 0; i < p->normal_num; i++) {
  134. p->iov[i].iov_base = p->host + p->normal[i];
  135. p->iov[i].iov_len = p->page_size;
  136. }
  137. return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp);
  138. }
  139. static MultiFDMethods multifd_nocomp_ops = {
  140. .send_setup = nocomp_send_setup,
  141. .send_cleanup = nocomp_send_cleanup,
  142. .send_prepare = nocomp_send_prepare,
  143. .recv_setup = nocomp_recv_setup,
  144. .recv_cleanup = nocomp_recv_cleanup,
  145. .recv_pages = nocomp_recv_pages
  146. };
  147. static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {
  148. [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops,
  149. };
  150. void multifd_register_ops(int method, MultiFDMethods *ops)
  151. {
  152. assert(0 < method && method < MULTIFD_COMPRESSION__MAX);
  153. multifd_ops[method] = ops;
  154. }
  155. static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
  156. {
  157. MultiFDInit_t msg = {};
  158. int ret;
  159. msg.magic = cpu_to_be32(MULTIFD_MAGIC);
  160. msg.version = cpu_to_be32(MULTIFD_VERSION);
  161. msg.id = p->id;
  162. memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
  163. ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
  164. if (ret != 0) {
  165. return -1;
  166. }
  167. return 0;
  168. }
  169. static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
  170. {
  171. MultiFDInit_t msg;
  172. int ret;
  173. ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
  174. if (ret != 0) {
  175. return -1;
  176. }
  177. msg.magic = be32_to_cpu(msg.magic);
  178. msg.version = be32_to_cpu(msg.version);
  179. if (msg.magic != MULTIFD_MAGIC) {
  180. error_setg(errp, "multifd: received packet magic %x "
  181. "expected %x", msg.magic, MULTIFD_MAGIC);
  182. return -1;
  183. }
  184. if (msg.version != MULTIFD_VERSION) {
  185. error_setg(errp, "multifd: received packet version %u "
  186. "expected %u", msg.version, MULTIFD_VERSION);
  187. return -1;
  188. }
  189. if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
  190. char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
  191. char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
  192. error_setg(errp, "multifd: received uuid '%s' and expected "
  193. "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
  194. g_free(uuid);
  195. g_free(msg_uuid);
  196. return -1;
  197. }
  198. if (msg.id > migrate_multifd_channels()) {
  199. error_setg(errp, "multifd: received channel version %u "
  200. "expected %u", msg.version, MULTIFD_VERSION);
  201. return -1;
  202. }
  203. return msg.id;
  204. }
  205. static MultiFDPages_t *multifd_pages_init(size_t size)
  206. {
  207. MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
  208. pages->allocated = size;
  209. pages->offset = g_new0(ram_addr_t, size);
  210. return pages;
  211. }
  212. static void multifd_pages_clear(MultiFDPages_t *pages)
  213. {
  214. pages->num = 0;
  215. pages->allocated = 0;
  216. pages->packet_num = 0;
  217. pages->block = NULL;
  218. g_free(pages->offset);
  219. pages->offset = NULL;
  220. g_free(pages);
  221. }
  222. static void multifd_send_fill_packet(MultiFDSendParams *p)
  223. {
  224. MultiFDPacket_t *packet = p->packet;
  225. int i;
  226. packet->flags = cpu_to_be32(p->flags);
  227. packet->pages_alloc = cpu_to_be32(p->pages->allocated);
  228. packet->normal_pages = cpu_to_be32(p->normal_num);
  229. packet->next_packet_size = cpu_to_be32(p->next_packet_size);
  230. packet->packet_num = cpu_to_be64(p->packet_num);
  231. if (p->pages->block) {
  232. strncpy(packet->ramblock, p->pages->block->idstr, 256);
  233. }
  234. for (i = 0; i < p->normal_num; i++) {
  235. /* there are architectures where ram_addr_t is 32 bit */
  236. uint64_t temp = p->normal[i];
  237. packet->offset[i] = cpu_to_be64(temp);
  238. }
  239. }
  240. static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
  241. {
  242. MultiFDPacket_t *packet = p->packet;
  243. RAMBlock *block;
  244. int i;
  245. packet->magic = be32_to_cpu(packet->magic);
  246. if (packet->magic != MULTIFD_MAGIC) {
  247. error_setg(errp, "multifd: received packet "
  248. "magic %x and expected magic %x",
  249. packet->magic, MULTIFD_MAGIC);
  250. return -1;
  251. }
  252. packet->version = be32_to_cpu(packet->version);
  253. if (packet->version != MULTIFD_VERSION) {
  254. error_setg(errp, "multifd: received packet "
  255. "version %u and expected version %u",
  256. packet->version, MULTIFD_VERSION);
  257. return -1;
  258. }
  259. p->flags = be32_to_cpu(packet->flags);
  260. packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
  261. /*
  262. * If we received a packet that is 100 times bigger than expected
  263. * just stop migration. It is a magic number.
  264. */
  265. if (packet->pages_alloc > p->page_count) {
  266. error_setg(errp, "multifd: received packet "
  267. "with size %u and expected a size of %u",
  268. packet->pages_alloc, p->page_count) ;
  269. return -1;
  270. }
  271. p->normal_num = be32_to_cpu(packet->normal_pages);
  272. if (p->normal_num > packet->pages_alloc) {
  273. error_setg(errp, "multifd: received packet "
  274. "with %u pages and expected maximum pages are %u",
  275. p->normal_num, packet->pages_alloc) ;
  276. return -1;
  277. }
  278. p->next_packet_size = be32_to_cpu(packet->next_packet_size);
  279. p->packet_num = be64_to_cpu(packet->packet_num);
  280. if (p->normal_num == 0) {
  281. return 0;
  282. }
  283. /* make sure that ramblock is 0 terminated */
  284. packet->ramblock[255] = 0;
  285. block = qemu_ram_block_by_name(packet->ramblock);
  286. if (!block) {
  287. error_setg(errp, "multifd: unknown ram block %s",
  288. packet->ramblock);
  289. return -1;
  290. }
  291. p->host = block->host;
  292. for (i = 0; i < p->normal_num; i++) {
  293. uint64_t offset = be64_to_cpu(packet->offset[i]);
  294. if (offset > (block->used_length - p->page_size)) {
  295. error_setg(errp, "multifd: offset too long %" PRIu64
  296. " (max " RAM_ADDR_FMT ")",
  297. offset, block->used_length);
  298. return -1;
  299. }
  300. p->normal[i] = offset;
  301. }
  302. return 0;
  303. }
  304. struct {
  305. MultiFDSendParams *params;
  306. /* array of pages to sent */
  307. MultiFDPages_t *pages;
  308. /* global number of generated multifd packets */
  309. uint64_t packet_num;
  310. /* send channels ready */
  311. QemuSemaphore channels_ready;
  312. /*
  313. * Have we already run terminate threads. There is a race when it
  314. * happens that we got one error while we are exiting.
  315. * We will use atomic operations. Only valid values are 0 and 1.
  316. */
  317. int exiting;
  318. /* multifd ops */
  319. MultiFDMethods *ops;
  320. } *multifd_send_state;
  321. /*
  322. * How we use multifd_send_state->pages and channel->pages?
  323. *
  324. * We create a pages for each channel, and a main one. Each time that
  325. * we need to send a batch of pages we interchange the ones between
  326. * multifd_send_state and the channel that is sending it. There are
  327. * two reasons for that:
  328. * - to not have to do so many mallocs during migration
  329. * - to make easier to know what to free at the end of migration
  330. *
  331. * This way we always know who is the owner of each "pages" struct,
  332. * and we don't need any locking. It belongs to the migration thread
  333. * or to the channel thread. Switching is safe because the migration
  334. * thread is using the channel mutex when changing it, and the channel
  335. * have to had finish with its own, otherwise pending_job can't be
  336. * false.
  337. */
  338. static int multifd_send_pages(QEMUFile *f)
  339. {
  340. int i;
  341. static int next_channel;
  342. MultiFDSendParams *p = NULL; /* make happy gcc */
  343. MultiFDPages_t *pages = multifd_send_state->pages;
  344. uint64_t transferred;
  345. if (qatomic_read(&multifd_send_state->exiting)) {
  346. return -1;
  347. }
  348. qemu_sem_wait(&multifd_send_state->channels_ready);
  349. /*
  350. * next_channel can remain from a previous migration that was
  351. * using more channels, so ensure it doesn't overflow if the
  352. * limit is lower now.
  353. */
  354. next_channel %= migrate_multifd_channels();
  355. for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
  356. p = &multifd_send_state->params[i];
  357. qemu_mutex_lock(&p->mutex);
  358. if (p->quit) {
  359. error_report("%s: channel %d has already quit!", __func__, i);
  360. qemu_mutex_unlock(&p->mutex);
  361. return -1;
  362. }
  363. if (!p->pending_job) {
  364. p->pending_job++;
  365. next_channel = (i + 1) % migrate_multifd_channels();
  366. break;
  367. }
  368. qemu_mutex_unlock(&p->mutex);
  369. }
  370. assert(!p->pages->num);
  371. assert(!p->pages->block);
  372. p->packet_num = multifd_send_state->packet_num++;
  373. multifd_send_state->pages = p->pages;
  374. p->pages = pages;
  375. transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len;
  376. qemu_file_acct_rate_limit(f, transferred);
  377. ram_counters.multifd_bytes += transferred;
  378. stat64_add(&ram_atomic_counters.transferred, transferred);
  379. qemu_mutex_unlock(&p->mutex);
  380. qemu_sem_post(&p->sem);
  381. return 1;
  382. }
  383. int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
  384. {
  385. MultiFDPages_t *pages = multifd_send_state->pages;
  386. bool changed = false;
  387. if (!pages->block) {
  388. pages->block = block;
  389. }
  390. if (pages->block == block) {
  391. pages->offset[pages->num] = offset;
  392. pages->num++;
  393. if (pages->num < pages->allocated) {
  394. return 1;
  395. }
  396. } else {
  397. changed = true;
  398. }
  399. if (multifd_send_pages(f) < 0) {
  400. return -1;
  401. }
  402. if (changed) {
  403. return multifd_queue_page(f, block, offset);
  404. }
  405. return 1;
  406. }
  407. static void multifd_send_terminate_threads(Error *err)
  408. {
  409. int i;
  410. trace_multifd_send_terminate_threads(err != NULL);
  411. if (err) {
  412. MigrationState *s = migrate_get_current();
  413. migrate_set_error(s, err);
  414. if (s->state == MIGRATION_STATUS_SETUP ||
  415. s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
  416. s->state == MIGRATION_STATUS_DEVICE ||
  417. s->state == MIGRATION_STATUS_ACTIVE) {
  418. migrate_set_state(&s->state, s->state,
  419. MIGRATION_STATUS_FAILED);
  420. }
  421. }
  422. /*
  423. * We don't want to exit each threads twice. Depending on where
  424. * we get the error, or if there are two independent errors in two
  425. * threads at the same time, we can end calling this function
  426. * twice.
  427. */
  428. if (qatomic_xchg(&multifd_send_state->exiting, 1)) {
  429. return;
  430. }
  431. for (i = 0; i < migrate_multifd_channels(); i++) {
  432. MultiFDSendParams *p = &multifd_send_state->params[i];
  433. qemu_mutex_lock(&p->mutex);
  434. p->quit = true;
  435. qemu_sem_post(&p->sem);
  436. if (p->c) {
  437. qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
  438. }
  439. qemu_mutex_unlock(&p->mutex);
  440. }
  441. }
  442. void multifd_save_cleanup(void)
  443. {
  444. int i;
  445. if (!migrate_use_multifd()) {
  446. return;
  447. }
  448. multifd_send_terminate_threads(NULL);
  449. for (i = 0; i < migrate_multifd_channels(); i++) {
  450. MultiFDSendParams *p = &multifd_send_state->params[i];
  451. if (p->running) {
  452. qemu_thread_join(&p->thread);
  453. }
  454. }
  455. for (i = 0; i < migrate_multifd_channels(); i++) {
  456. MultiFDSendParams *p = &multifd_send_state->params[i];
  457. Error *local_err = NULL;
  458. if (p->registered_yank) {
  459. migration_ioc_unregister_yank(p->c);
  460. }
  461. socket_send_channel_destroy(p->c);
  462. p->c = NULL;
  463. qemu_mutex_destroy(&p->mutex);
  464. qemu_sem_destroy(&p->sem);
  465. qemu_sem_destroy(&p->sem_sync);
  466. g_free(p->name);
  467. p->name = NULL;
  468. multifd_pages_clear(p->pages);
  469. p->pages = NULL;
  470. p->packet_len = 0;
  471. g_free(p->packet);
  472. p->packet = NULL;
  473. g_free(p->iov);
  474. p->iov = NULL;
  475. g_free(p->normal);
  476. p->normal = NULL;
  477. multifd_send_state->ops->send_cleanup(p, &local_err);
  478. if (local_err) {
  479. migrate_set_error(migrate_get_current(), local_err);
  480. error_free(local_err);
  481. }
  482. }
  483. qemu_sem_destroy(&multifd_send_state->channels_ready);
  484. g_free(multifd_send_state->params);
  485. multifd_send_state->params = NULL;
  486. multifd_pages_clear(multifd_send_state->pages);
  487. multifd_send_state->pages = NULL;
  488. g_free(multifd_send_state);
  489. multifd_send_state = NULL;
  490. }
  491. static int multifd_zero_copy_flush(QIOChannel *c)
  492. {
  493. int ret;
  494. Error *err = NULL;
  495. ret = qio_channel_flush(c, &err);
  496. if (ret < 0) {
  497. error_report_err(err);
  498. return -1;
  499. }
  500. if (ret == 1) {
  501. dirty_sync_missed_zero_copy();
  502. }
  503. return ret;
  504. }
  505. int multifd_send_sync_main(QEMUFile *f)
  506. {
  507. int i;
  508. bool flush_zero_copy;
  509. if (!migrate_use_multifd()) {
  510. return 0;
  511. }
  512. if (multifd_send_state->pages->num) {
  513. if (multifd_send_pages(f) < 0) {
  514. error_report("%s: multifd_send_pages fail", __func__);
  515. return -1;
  516. }
  517. }
  518. /*
  519. * When using zero-copy, it's necessary to flush the pages before any of
  520. * the pages can be sent again, so we'll make sure the new version of the
  521. * pages will always arrive _later_ than the old pages.
  522. *
  523. * Currently we achieve this by flushing the zero-page requested writes
  524. * per ram iteration, but in the future we could potentially optimize it
  525. * to be less frequent, e.g. only after we finished one whole scanning of
  526. * all the dirty bitmaps.
  527. */
  528. flush_zero_copy = migrate_use_zero_copy_send();
  529. for (i = 0; i < migrate_multifd_channels(); i++) {
  530. MultiFDSendParams *p = &multifd_send_state->params[i];
  531. trace_multifd_send_sync_main_signal(p->id);
  532. qemu_mutex_lock(&p->mutex);
  533. if (p->quit) {
  534. error_report("%s: channel %d has already quit", __func__, i);
  535. qemu_mutex_unlock(&p->mutex);
  536. return -1;
  537. }
  538. p->packet_num = multifd_send_state->packet_num++;
  539. p->flags |= MULTIFD_FLAG_SYNC;
  540. p->pending_job++;
  541. qemu_file_acct_rate_limit(f, p->packet_len);
  542. ram_counters.multifd_bytes += p->packet_len;
  543. stat64_add(&ram_atomic_counters.transferred, p->packet_len);
  544. qemu_mutex_unlock(&p->mutex);
  545. qemu_sem_post(&p->sem);
  546. }
  547. for (i = 0; i < migrate_multifd_channels(); i++) {
  548. MultiFDSendParams *p = &multifd_send_state->params[i];
  549. trace_multifd_send_sync_main_wait(p->id);
  550. qemu_sem_wait(&p->sem_sync);
  551. if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
  552. return -1;
  553. }
  554. }
  555. trace_multifd_send_sync_main(multifd_send_state->packet_num);
  556. return 0;
  557. }
  558. static void *multifd_send_thread(void *opaque)
  559. {
  560. MultiFDSendParams *p = opaque;
  561. MigrationThread *thread = NULL;
  562. Error *local_err = NULL;
  563. int ret = 0;
  564. bool use_zero_copy_send = migrate_use_zero_copy_send();
  565. thread = MigrationThreadAdd(p->name, qemu_get_thread_id());
  566. trace_multifd_send_thread_start(p->id);
  567. rcu_register_thread();
  568. if (multifd_send_initial_packet(p, &local_err) < 0) {
  569. ret = -1;
  570. goto out;
  571. }
  572. /* initial packet */
  573. p->num_packets = 1;
  574. while (true) {
  575. qemu_sem_wait(&p->sem);
  576. if (qatomic_read(&multifd_send_state->exiting)) {
  577. break;
  578. }
  579. qemu_mutex_lock(&p->mutex);
  580. if (p->pending_job) {
  581. uint64_t packet_num = p->packet_num;
  582. uint32_t flags;
  583. p->normal_num = 0;
  584. if (use_zero_copy_send) {
  585. p->iovs_num = 0;
  586. } else {
  587. p->iovs_num = 1;
  588. }
  589. for (int i = 0; i < p->pages->num; i++) {
  590. p->normal[p->normal_num] = p->pages->offset[i];
  591. p->normal_num++;
  592. }
  593. if (p->normal_num) {
  594. ret = multifd_send_state->ops->send_prepare(p, &local_err);
  595. if (ret != 0) {
  596. qemu_mutex_unlock(&p->mutex);
  597. break;
  598. }
  599. }
  600. multifd_send_fill_packet(p);
  601. flags = p->flags;
  602. p->flags = 0;
  603. p->num_packets++;
  604. p->total_normal_pages += p->normal_num;
  605. p->pages->num = 0;
  606. p->pages->block = NULL;
  607. qemu_mutex_unlock(&p->mutex);
  608. trace_multifd_send(p->id, packet_num, p->normal_num, flags,
  609. p->next_packet_size);
  610. if (use_zero_copy_send) {
  611. /* Send header first, without zerocopy */
  612. ret = qio_channel_write_all(p->c, (void *)p->packet,
  613. p->packet_len, &local_err);
  614. if (ret != 0) {
  615. break;
  616. }
  617. } else {
  618. /* Send header using the same writev call */
  619. p->iov[0].iov_len = p->packet_len;
  620. p->iov[0].iov_base = p->packet;
  621. }
  622. ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
  623. 0, p->write_flags, &local_err);
  624. if (ret != 0) {
  625. break;
  626. }
  627. qemu_mutex_lock(&p->mutex);
  628. p->pending_job--;
  629. qemu_mutex_unlock(&p->mutex);
  630. if (flags & MULTIFD_FLAG_SYNC) {
  631. qemu_sem_post(&p->sem_sync);
  632. }
  633. qemu_sem_post(&multifd_send_state->channels_ready);
  634. } else if (p->quit) {
  635. qemu_mutex_unlock(&p->mutex);
  636. break;
  637. } else {
  638. qemu_mutex_unlock(&p->mutex);
  639. /* sometimes there are spurious wakeups */
  640. }
  641. }
  642. out:
  643. if (local_err) {
  644. trace_multifd_send_error(p->id);
  645. multifd_send_terminate_threads(local_err);
  646. error_free(local_err);
  647. }
  648. /*
  649. * Error happen, I will exit, but I can't just leave, tell
  650. * who pay attention to me.
  651. */
  652. if (ret != 0) {
  653. qemu_sem_post(&p->sem_sync);
  654. qemu_sem_post(&multifd_send_state->channels_ready);
  655. }
  656. qemu_mutex_lock(&p->mutex);
  657. p->running = false;
  658. qemu_mutex_unlock(&p->mutex);
  659. rcu_unregister_thread();
  660. MigrationThreadDel(thread);
  661. trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
  662. return NULL;
  663. }
  664. static bool multifd_channel_connect(MultiFDSendParams *p,
  665. QIOChannel *ioc,
  666. Error *error);
  667. static void multifd_tls_outgoing_handshake(QIOTask *task,
  668. gpointer opaque)
  669. {
  670. MultiFDSendParams *p = opaque;
  671. QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
  672. Error *err = NULL;
  673. if (qio_task_propagate_error(task, &err)) {
  674. trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err));
  675. } else {
  676. trace_multifd_tls_outgoing_handshake_complete(ioc);
  677. }
  678. if (!multifd_channel_connect(p, ioc, err)) {
  679. /*
  680. * Error happen, mark multifd_send_thread status as 'quit' although it
  681. * is not created, and then tell who pay attention to me.
  682. */
  683. p->quit = true;
  684. qemu_sem_post(&multifd_send_state->channels_ready);
  685. qemu_sem_post(&p->sem_sync);
  686. }
  687. }
  688. static void *multifd_tls_handshake_thread(void *opaque)
  689. {
  690. MultiFDSendParams *p = opaque;
  691. QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c);
  692. qio_channel_tls_handshake(tioc,
  693. multifd_tls_outgoing_handshake,
  694. p,
  695. NULL,
  696. NULL);
  697. return NULL;
  698. }
  699. static void multifd_tls_channel_connect(MultiFDSendParams *p,
  700. QIOChannel *ioc,
  701. Error **errp)
  702. {
  703. MigrationState *s = migrate_get_current();
  704. const char *hostname = s->hostname;
  705. QIOChannelTLS *tioc;
  706. tioc = migration_tls_client_create(s, ioc, hostname, errp);
  707. if (!tioc) {
  708. return;
  709. }
  710. object_unref(OBJECT(ioc));
  711. trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
  712. qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
  713. p->c = QIO_CHANNEL(tioc);
  714. qemu_thread_create(&p->thread, "multifd-tls-handshake-worker",
  715. multifd_tls_handshake_thread, p,
  716. QEMU_THREAD_JOINABLE);
  717. }
  718. static bool multifd_channel_connect(MultiFDSendParams *p,
  719. QIOChannel *ioc,
  720. Error *error)
  721. {
  722. trace_multifd_set_outgoing_channel(
  723. ioc, object_get_typename(OBJECT(ioc)),
  724. migrate_get_current()->hostname, error);
  725. if (error) {
  726. return false;
  727. }
  728. if (migrate_channel_requires_tls_upgrade(ioc)) {
  729. multifd_tls_channel_connect(p, ioc, &error);
  730. if (!error) {
  731. /*
  732. * tls_channel_connect will call back to this
  733. * function after the TLS handshake,
  734. * so we mustn't call multifd_send_thread until then
  735. */
  736. return true;
  737. } else {
  738. return false;
  739. }
  740. } else {
  741. migration_ioc_register_yank(ioc);
  742. p->registered_yank = true;
  743. p->c = ioc;
  744. qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
  745. QEMU_THREAD_JOINABLE);
  746. }
  747. return true;
  748. }
  749. static void multifd_new_send_channel_cleanup(MultiFDSendParams *p,
  750. QIOChannel *ioc, Error *err)
  751. {
  752. migrate_set_error(migrate_get_current(), err);
  753. /* Error happen, we need to tell who pay attention to me */
  754. qemu_sem_post(&multifd_send_state->channels_ready);
  755. qemu_sem_post(&p->sem_sync);
  756. /*
  757. * Although multifd_send_thread is not created, but main migration
  758. * thread neet to judge whether it is running, so we need to mark
  759. * its status.
  760. */
  761. p->quit = true;
  762. object_unref(OBJECT(ioc));
  763. error_free(err);
  764. }
  765. static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
  766. {
  767. MultiFDSendParams *p = opaque;
  768. QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
  769. Error *local_err = NULL;
  770. trace_multifd_new_send_channel_async(p->id);
  771. if (!qio_task_propagate_error(task, &local_err)) {
  772. p->c = QIO_CHANNEL(sioc);
  773. qio_channel_set_delay(p->c, false);
  774. p->running = true;
  775. if (multifd_channel_connect(p, sioc, local_err)) {
  776. return;
  777. }
  778. }
  779. multifd_new_send_channel_cleanup(p, sioc, local_err);
  780. }
  781. int multifd_save_setup(Error **errp)
  782. {
  783. int thread_count;
  784. uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
  785. uint8_t i;
  786. if (!migrate_use_multifd()) {
  787. return 0;
  788. }
  789. thread_count = migrate_multifd_channels();
  790. multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
  791. multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
  792. multifd_send_state->pages = multifd_pages_init(page_count);
  793. qemu_sem_init(&multifd_send_state->channels_ready, 0);
  794. qatomic_set(&multifd_send_state->exiting, 0);
  795. multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
  796. for (i = 0; i < thread_count; i++) {
  797. MultiFDSendParams *p = &multifd_send_state->params[i];
  798. qemu_mutex_init(&p->mutex);
  799. qemu_sem_init(&p->sem, 0);
  800. qemu_sem_init(&p->sem_sync, 0);
  801. p->quit = false;
  802. p->pending_job = 0;
  803. p->id = i;
  804. p->pages = multifd_pages_init(page_count);
  805. p->packet_len = sizeof(MultiFDPacket_t)
  806. + sizeof(uint64_t) * page_count;
  807. p->packet = g_malloc0(p->packet_len);
  808. p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
  809. p->packet->version = cpu_to_be32(MULTIFD_VERSION);
  810. p->name = g_strdup_printf("multifdsend_%d", i);
  811. /* We need one extra place for the packet header */
  812. p->iov = g_new0(struct iovec, page_count + 1);
  813. p->normal = g_new0(ram_addr_t, page_count);
  814. p->page_size = qemu_target_page_size();
  815. p->page_count = page_count;
  816. if (migrate_use_zero_copy_send()) {
  817. p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
  818. } else {
  819. p->write_flags = 0;
  820. }
  821. socket_send_channel_create(multifd_new_send_channel_async, p);
  822. }
  823. for (i = 0; i < thread_count; i++) {
  824. MultiFDSendParams *p = &multifd_send_state->params[i];
  825. Error *local_err = NULL;
  826. int ret;
  827. ret = multifd_send_state->ops->send_setup(p, &local_err);
  828. if (ret) {
  829. error_propagate(errp, local_err);
  830. return ret;
  831. }
  832. }
  833. return 0;
  834. }
  835. struct {
  836. MultiFDRecvParams *params;
  837. /* number of created threads */
  838. int count;
  839. /* syncs main thread and channels */
  840. QemuSemaphore sem_sync;
  841. /* global number of generated multifd packets */
  842. uint64_t packet_num;
  843. /* multifd ops */
  844. MultiFDMethods *ops;
  845. } *multifd_recv_state;
  846. static void multifd_recv_terminate_threads(Error *err)
  847. {
  848. int i;
  849. trace_multifd_recv_terminate_threads(err != NULL);
  850. if (err) {
  851. MigrationState *s = migrate_get_current();
  852. migrate_set_error(s, err);
  853. if (s->state == MIGRATION_STATUS_SETUP ||
  854. s->state == MIGRATION_STATUS_ACTIVE) {
  855. migrate_set_state(&s->state, s->state,
  856. MIGRATION_STATUS_FAILED);
  857. }
  858. }
  859. for (i = 0; i < migrate_multifd_channels(); i++) {
  860. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  861. qemu_mutex_lock(&p->mutex);
  862. p->quit = true;
  863. /*
  864. * We could arrive here for two reasons:
  865. * - normal quit, i.e. everything went fine, just finished
  866. * - error quit: We close the channels so the channel threads
  867. * finish the qio_channel_read_all_eof()
  868. */
  869. if (p->c) {
  870. qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
  871. }
  872. qemu_mutex_unlock(&p->mutex);
  873. }
  874. }
  875. void multifd_load_shutdown(void)
  876. {
  877. if (migrate_use_multifd()) {
  878. multifd_recv_terminate_threads(NULL);
  879. }
  880. }
  881. void multifd_load_cleanup(void)
  882. {
  883. int i;
  884. if (!migrate_use_multifd()) {
  885. return;
  886. }
  887. multifd_recv_terminate_threads(NULL);
  888. for (i = 0; i < migrate_multifd_channels(); i++) {
  889. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  890. if (p->running) {
  891. /*
  892. * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code,
  893. * however try to wakeup it without harm in cleanup phase.
  894. */
  895. qemu_sem_post(&p->sem_sync);
  896. }
  897. qemu_thread_join(&p->thread);
  898. }
  899. for (i = 0; i < migrate_multifd_channels(); i++) {
  900. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  901. migration_ioc_unregister_yank(p->c);
  902. object_unref(OBJECT(p->c));
  903. p->c = NULL;
  904. qemu_mutex_destroy(&p->mutex);
  905. qemu_sem_destroy(&p->sem_sync);
  906. g_free(p->name);
  907. p->name = NULL;
  908. p->packet_len = 0;
  909. g_free(p->packet);
  910. p->packet = NULL;
  911. g_free(p->iov);
  912. p->iov = NULL;
  913. g_free(p->normal);
  914. p->normal = NULL;
  915. multifd_recv_state->ops->recv_cleanup(p);
  916. }
  917. qemu_sem_destroy(&multifd_recv_state->sem_sync);
  918. g_free(multifd_recv_state->params);
  919. multifd_recv_state->params = NULL;
  920. g_free(multifd_recv_state);
  921. multifd_recv_state = NULL;
  922. }
  923. void multifd_recv_sync_main(void)
  924. {
  925. int i;
  926. if (!migrate_use_multifd()) {
  927. return;
  928. }
  929. for (i = 0; i < migrate_multifd_channels(); i++) {
  930. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  931. trace_multifd_recv_sync_main_wait(p->id);
  932. qemu_sem_wait(&multifd_recv_state->sem_sync);
  933. }
  934. for (i = 0; i < migrate_multifd_channels(); i++) {
  935. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  936. WITH_QEMU_LOCK_GUARD(&p->mutex) {
  937. if (multifd_recv_state->packet_num < p->packet_num) {
  938. multifd_recv_state->packet_num = p->packet_num;
  939. }
  940. }
  941. trace_multifd_recv_sync_main_signal(p->id);
  942. qemu_sem_post(&p->sem_sync);
  943. }
  944. trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
  945. }
  946. static void *multifd_recv_thread(void *opaque)
  947. {
  948. MultiFDRecvParams *p = opaque;
  949. Error *local_err = NULL;
  950. int ret;
  951. trace_multifd_recv_thread_start(p->id);
  952. rcu_register_thread();
  953. while (true) {
  954. uint32_t flags;
  955. if (p->quit) {
  956. break;
  957. }
  958. ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
  959. p->packet_len, &local_err);
  960. if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */
  961. break;
  962. }
  963. qemu_mutex_lock(&p->mutex);
  964. ret = multifd_recv_unfill_packet(p, &local_err);
  965. if (ret) {
  966. qemu_mutex_unlock(&p->mutex);
  967. break;
  968. }
  969. flags = p->flags;
  970. /* recv methods don't know how to handle the SYNC flag */
  971. p->flags &= ~MULTIFD_FLAG_SYNC;
  972. trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags,
  973. p->next_packet_size);
  974. p->num_packets++;
  975. p->total_normal_pages += p->normal_num;
  976. qemu_mutex_unlock(&p->mutex);
  977. if (p->normal_num) {
  978. ret = multifd_recv_state->ops->recv_pages(p, &local_err);
  979. if (ret != 0) {
  980. break;
  981. }
  982. }
  983. if (flags & MULTIFD_FLAG_SYNC) {
  984. qemu_sem_post(&multifd_recv_state->sem_sync);
  985. qemu_sem_wait(&p->sem_sync);
  986. }
  987. }
  988. if (local_err) {
  989. multifd_recv_terminate_threads(local_err);
  990. error_free(local_err);
  991. }
  992. qemu_mutex_lock(&p->mutex);
  993. p->running = false;
  994. qemu_mutex_unlock(&p->mutex);
  995. rcu_unregister_thread();
  996. trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages);
  997. return NULL;
  998. }
  999. int multifd_load_setup(Error **errp)
  1000. {
  1001. int thread_count;
  1002. uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
  1003. uint8_t i;
  1004. /*
  1005. * Return successfully if multiFD recv state is already initialised
  1006. * or multiFD is not enabled.
  1007. */
  1008. if (multifd_recv_state || !migrate_use_multifd()) {
  1009. return 0;
  1010. }
  1011. thread_count = migrate_multifd_channels();
  1012. multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
  1013. multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
  1014. qatomic_set(&multifd_recv_state->count, 0);
  1015. qemu_sem_init(&multifd_recv_state->sem_sync, 0);
  1016. multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
  1017. for (i = 0; i < thread_count; i++) {
  1018. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1019. qemu_mutex_init(&p->mutex);
  1020. qemu_sem_init(&p->sem_sync, 0);
  1021. p->quit = false;
  1022. p->id = i;
  1023. p->packet_len = sizeof(MultiFDPacket_t)
  1024. + sizeof(uint64_t) * page_count;
  1025. p->packet = g_malloc0(p->packet_len);
  1026. p->name = g_strdup_printf("multifdrecv_%d", i);
  1027. p->iov = g_new0(struct iovec, page_count);
  1028. p->normal = g_new0(ram_addr_t, page_count);
  1029. p->page_count = page_count;
  1030. p->page_size = qemu_target_page_size();
  1031. }
  1032. for (i = 0; i < thread_count; i++) {
  1033. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1034. Error *local_err = NULL;
  1035. int ret;
  1036. ret = multifd_recv_state->ops->recv_setup(p, &local_err);
  1037. if (ret) {
  1038. error_propagate(errp, local_err);
  1039. return ret;
  1040. }
  1041. }
  1042. return 0;
  1043. }
  1044. bool multifd_recv_all_channels_created(void)
  1045. {
  1046. int thread_count = migrate_multifd_channels();
  1047. if (!migrate_use_multifd()) {
  1048. return true;
  1049. }
  1050. if (!multifd_recv_state) {
  1051. /* Called before any connections created */
  1052. return false;
  1053. }
  1054. return thread_count == qatomic_read(&multifd_recv_state->count);
  1055. }
  1056. /*
  1057. * Try to receive all multifd channels to get ready for the migration.
  1058. * Sets @errp when failing to receive the current channel.
  1059. */
  1060. void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
  1061. {
  1062. MultiFDRecvParams *p;
  1063. Error *local_err = NULL;
  1064. int id;
  1065. id = multifd_recv_initial_packet(ioc, &local_err);
  1066. if (id < 0) {
  1067. multifd_recv_terminate_threads(local_err);
  1068. error_propagate_prepend(errp, local_err,
  1069. "failed to receive packet"
  1070. " via multifd channel %d: ",
  1071. qatomic_read(&multifd_recv_state->count));
  1072. return;
  1073. }
  1074. trace_multifd_recv_new_channel(id);
  1075. p = &multifd_recv_state->params[id];
  1076. if (p->c != NULL) {
  1077. error_setg(&local_err, "multifd: received id '%d' already setup'",
  1078. id);
  1079. multifd_recv_terminate_threads(local_err);
  1080. error_propagate(errp, local_err);
  1081. return;
  1082. }
  1083. p->c = ioc;
  1084. object_ref(OBJECT(ioc));
  1085. /* initial packet */
  1086. p->num_packets = 1;
  1087. p->running = true;
  1088. qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
  1089. QEMU_THREAD_JOINABLE);
  1090. qatomic_inc(&multifd_recv_state->count);
  1091. }